diff options
Diffstat (limited to 'util/crossgcc/patches')
-rw-r--r-- | util/crossgcc/patches/gcc-8.3.0_ada-musl_workaround.patch (renamed from util/crossgcc/patches/gcc-9.2.0_ada-musl_workaround.patch) | 0 | ||||
-rw-r--r-- | util/crossgcc/patches/gcc-8.3.0_gnat-bad_constant.patch | 150 | ||||
-rw-r--r-- | util/crossgcc/patches/gcc-8.3.0_gnat.patch (renamed from util/crossgcc/patches/gcc-9.2.0_gnat.patch) | 0 | ||||
-rw-r--r-- | util/crossgcc/patches/gcc-8.3.0_libgcc.patch (renamed from util/crossgcc/patches/gcc-9.2.0_libgcc.patch) | 0 | ||||
-rw-r--r-- | util/crossgcc/patches/gcc-8.3.0_nds32_ite.patch | 21019 |
5 files changed, 21169 insertions, 0 deletions
diff --git a/util/crossgcc/patches/gcc-9.2.0_ada-musl_workaround.patch b/util/crossgcc/patches/gcc-8.3.0_ada-musl_workaround.patch index 1f504a4b5e..1f504a4b5e 100644 --- a/util/crossgcc/patches/gcc-9.2.0_ada-musl_workaround.patch +++ b/util/crossgcc/patches/gcc-8.3.0_ada-musl_workaround.patch diff --git a/util/crossgcc/patches/gcc-8.3.0_gnat-bad_constant.patch b/util/crossgcc/patches/gcc-8.3.0_gnat-bad_constant.patch new file mode 100644 index 0000000000..e98f933a13 --- /dev/null +++ b/util/crossgcc/patches/gcc-8.3.0_gnat-bad_constant.patch @@ -0,0 +1,150 @@ +commit b6f742f96c62bab0582021455328ae3be58e16d3 +Author: pmderodat <pmderodat@138bc75d-0d04-0410-961f-82ee72b054a4> +Date: Fri May 25 09:05:10 2018 +0000 + + [Ada] Remove "constant" attribute on Osint.Unknown_Attributes + + 2018-05-25 Arnaud Charlet <charlet@adacore.com> + + gcc/ada/ + + * exp_aggr.adb (Convert_To_Positional): Bump default for + Max_Others_Replicate to 32. Update comments. + * osint.ads (Unknown_Attributes): No longer pretend this is a constant. + (No_File_Info_Cache): Initialize separately. + * osint.adb (No_File_Info_Cache): Update initializer. + + git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@260739 138bc75d-0d04-0410-961f-82ee72b054a4 + +diff --git a/gcc/ada/ChangeLog b/gcc/ada/ChangeLog +index e4127e472aa..d56240b7b82 100644 +--- a/gcc/ada/ChangeLog ++++ b/gcc/ada/ChangeLog +@@ -188,6 +188,14 @@ + an allocator if the type is an unconstrained record type with default + discriminant. + ++2018-05-25 Arnaud Charlet <charlet@adacore.com> ++ ++ * exp_aggr.adb (Convert_To_Positional): Bump default for ++ Max_Others_Replicate to 32. Update comments. ++ * osint.ads (Unknown_Attributes): No longer pretend this is a constant. ++ (No_File_Info_Cache): Initialize separately. ++ * osint.adb (No_File_Info_Cache): Update initializer. ++ + 2018-05-04 John Marino <gnugcc@marino.st> + + PR ada/85635 +diff --git a/gcc/ada/exp_aggr.adb b/gcc/ada/exp_aggr.adb +index f723c1b4d99..ff5210eb4e4 100644 +--- a/gcc/ada/exp_aggr.adb ++++ b/gcc/ada/exp_aggr.adb +@@ -284,14 +284,14 @@ package body Exp_Aggr is + + procedure Convert_To_Positional + (N : Node_Id; +- Max_Others_Replicate : Nat := 5; ++ Max_Others_Replicate : Nat := 32; + Handle_Bit_Packed : Boolean := False); + -- If possible, convert named notation to positional notation. This + -- conversion is possible only in some static cases. If the conversion is + -- possible, then N is rewritten with the analyzed converted aggregate. + -- The parameter Max_Others_Replicate controls the maximum number of + -- values corresponding to an others choice that will be converted to +- -- positional notation (the default of 5 is the normal limit, and reflects ++ -- positional notation (the default of 32 is the normal limit, and reflects + -- the fact that normally the loop is better than a lot of separate + -- assignments). Note that this limit gets overridden in any case if + -- either of the restrictions No_Elaboration_Code or No_Implicit_Loops is +@@ -301,11 +301,6 @@ package body Exp_Aggr is + -- Packed_Array_Aggregate_Handled, we set this parameter to True, since + -- these are cases we handle in there. + +- -- It would seem useful to have a higher default for Max_Others_Replicate, +- -- but aggregates in the compiler make this impossible: the compiler +- -- bootstrap fails if Max_Others_Replicate is greater than 25. This +- -- is unexpected ??? +- + procedure Expand_Array_Aggregate (N : Node_Id); + -- This is the top-level routine to perform array aggregate expansion. + -- N is the N_Aggregate node to be expanded. +@@ -4292,7 +4287,7 @@ package body Exp_Aggr is + + procedure Convert_To_Positional + (N : Node_Id; +- Max_Others_Replicate : Nat := 5; ++ Max_Others_Replicate : Nat := 32; + Handle_Bit_Packed : Boolean := False) + is + Typ : constant Entity_Id := Etype (N); +diff --git a/gcc/ada/osint.adb b/gcc/ada/osint.adb +index 0c23761b6dc..896fbc7ee37 100644 +--- a/gcc/ada/osint.adb ++++ b/gcc/ada/osint.adb +@@ -250,8 +250,7 @@ package body Osint is + Attr : aliased File_Attributes; + end record; + +- No_File_Info_Cache : constant File_Info_Cache := +- (No_File, Unknown_Attributes); ++ No_File_Info_Cache : constant File_Info_Cache := (No_File, (others => 0)); + + package File_Name_Hash_Table is new GNAT.HTable.Simple_HTable ( + Header_Num => File_Hash_Num, +diff --git a/gcc/ada/osint.ads b/gcc/ada/osint.ads +index 65a87fe4ce3..6c75b521456 100644 +--- a/gcc/ada/osint.ads ++++ b/gcc/ada/osint.ads +@@ -255,10 +255,26 @@ package Osint is + -- from the disk and then cached in the File_Attributes parameter (possibly + -- along with other values). + +- type File_Attributes is private; +- Unknown_Attributes : constant File_Attributes; ++ File_Attributes_Size : constant Natural := 32; ++ -- This should be big enough to fit a "struct file_attributes" on any ++ -- system. It doesn't cause any malfunction if it is too big (which avoids ++ -- the need for either mapping the struct exactly or importing the sizeof ++ -- from C, which would result in dynamic code). However, it does waste ++ -- space (e.g. when a component of this type appears in a record, if it is ++ -- unnecessarily large). Note: for runtime units, use System.OS_Constants. ++ -- SIZEOF_struct_file_attributes instead, which has the exact value. ++ ++ type File_Attributes is ++ array (1 .. File_Attributes_Size) ++ of System.Storage_Elements.Storage_Element; ++ for File_Attributes'Alignment use Standard'Maximum_Alignment; ++ ++ Unknown_Attributes : File_Attributes; + -- A cache for various attributes for a file (length, accessibility,...) +- -- This must be initialized to Unknown_Attributes prior to the first call. ++ -- Will be initialized properly at elaboration (for efficiency later on, ++ -- avoid function calls every time we want to reset the attributes) prior ++ -- to the first usage. We cannot make it constant since the compiler may ++ -- put it in a read-only section. + + function Is_Directory + (Name : C_File_Name; +@@ -754,22 +770,4 @@ private + -- detected, the file being written is deleted, and a fatal error is + -- signalled. + +- File_Attributes_Size : constant Natural := 32; +- -- This should be big enough to fit a "struct file_attributes" on any +- -- system. It doesn't cause any malfunction if it is too big (which avoids +- -- the need for either mapping the struct exactly or importing the sizeof +- -- from C, which would result in dynamic code). However, it does waste +- -- space (e.g. when a component of this type appears in a record, if it is +- -- unnecessarily large). Note: for runtime units, use System.OS_Constants. +- -- SIZEOF_struct_file_attributes instead, which has the exact value. +- +- type File_Attributes is +- array (1 .. File_Attributes_Size) +- of System.Storage_Elements.Storage_Element; +- for File_Attributes'Alignment use Standard'Maximum_Alignment; +- +- Unknown_Attributes : constant File_Attributes := (others => 0); +- -- Will be initialized properly at elaboration (for efficiency later on, +- -- avoid function calls every time we want to reset the attributes). +- + end Osint; diff --git a/util/crossgcc/patches/gcc-9.2.0_gnat.patch b/util/crossgcc/patches/gcc-8.3.0_gnat.patch index 2d7cecee24..2d7cecee24 100644 --- a/util/crossgcc/patches/gcc-9.2.0_gnat.patch +++ b/util/crossgcc/patches/gcc-8.3.0_gnat.patch diff --git a/util/crossgcc/patches/gcc-9.2.0_libgcc.patch b/util/crossgcc/patches/gcc-8.3.0_libgcc.patch index 2f75c92b41..2f75c92b41 100644 --- a/util/crossgcc/patches/gcc-9.2.0_libgcc.patch +++ b/util/crossgcc/patches/gcc-8.3.0_libgcc.patch diff --git a/util/crossgcc/patches/gcc-8.3.0_nds32_ite.patch b/util/crossgcc/patches/gcc-8.3.0_nds32_ite.patch new file mode 100644 index 0000000000..2f0780be7f --- /dev/null +++ b/util/crossgcc/patches/gcc-8.3.0_nds32_ite.patch @@ -0,0 +1,21019 @@ +diff -urN gcc-8.2.0.orig/gcc/common/config/nds32/nds32-common.c gcc-8.2.0/gcc/common/config/nds32/nds32-common.c +--- gcc-8.2.0.orig/gcc/common/config/nds32/nds32-common.c 2018-04-06 07:51:33.000000000 +0200 ++++ gcc-8.2.0/gcc/common/config/nds32/nds32-common.c 2019-01-25 15:38:32.817242625 +0100 +@@ -53,6 +53,16 @@ + + return true; + ++ case OPT_misr_secure_: ++ /* Check the valid security level: 0 1 2 3. */ ++ if (value < 0 || value > 3) ++ { ++ error_at (loc, "for the option -misr-secure=X, the valid X " ++ "must be: 0, 1, 2, or 3"); ++ return false; ++ } ++ return true; ++ + case OPT_mcache_block_size_: + /* Check valid value: 4 8 16 32 64 128 256 512. */ + if (exact_log2 (value) < 2 || exact_log2 (value) > 9) +@@ -74,12 +84,19 @@ + /* Implement TARGET_OPTION_OPTIMIZATION_TABLE. */ + static const struct default_options nds32_option_optimization_table[] = + { ++#if TARGET_LINUX_ABI == 0 ++ /* Disable -fdelete-null-pointer-checks by default in ELF toolchain. */ ++ { OPT_LEVELS_ALL, OPT_fdelete_null_pointer_checks, ++ NULL, 0 }, ++#endif + /* Enable -fsched-pressure by default at -O1 and above. */ + { OPT_LEVELS_1_PLUS, OPT_fsched_pressure, NULL, 1 }, + /* Enable -fomit-frame-pointer by default at all optimization levels. */ + { OPT_LEVELS_ALL, OPT_fomit_frame_pointer, NULL, 1 }, + /* Enable -mrelax-hint by default at all optimization levels. */ + { OPT_LEVELS_ALL, OPT_mrelax_hint, NULL, 1 }, ++ /* Enalbe -malways-align by default at -O1 and above, but not -Os or -Og. */ ++ { OPT_LEVELS_1_PLUS_SPEED_ONLY, OPT_malways_align, NULL, 1 }, + /* Enable -mv3push by default at -Os, but it is useless under V2 ISA. */ + { OPT_LEVELS_SIZE, OPT_mv3push, NULL, 1 }, + +@@ -87,6 +104,19 @@ + }; + + /* ------------------------------------------------------------------------ */ ++ ++/* Implement TARGET_EXCEPT_UNWIND_INFO. */ ++static enum unwind_info_type ++nds32_except_unwind_info (struct gcc_options *opts ATTRIBUTE_UNUSED) ++{ ++ if (TARGET_LINUX_ABI) ++ return UI_DWARF2; ++ ++ return UI_SJLJ; ++} ++ ++/* ------------------------------------------------------------------------ */ ++ + + /* Run-time Target Specification. */ + +@@ -103,6 +133,7 @@ + TARGET_EXT_PERF : Generate performance extention instrcution. + TARGET_EXT_PERF2 : Generate performance extention version 2 instrcution. + TARGET_EXT_STRING : Generate string extention instrcution. ++ TARGET_HW_ABS : Generate hardware abs instruction. + TARGET_CMOV : Generate conditional move instruction. */ + #undef TARGET_DEFAULT_TARGET_FLAGS + #define TARGET_DEFAULT_TARGET_FLAGS \ +@@ -113,6 +144,7 @@ + | MASK_EXT_PERF \ + | MASK_EXT_PERF2 \ + | MASK_EXT_STRING \ ++ | MASK_HW_ABS \ + | MASK_CMOV) + + #undef TARGET_HANDLE_OPTION +@@ -125,7 +157,7 @@ + /* Defining the Output Assembler Language. */ + + #undef TARGET_EXCEPT_UNWIND_INFO +-#define TARGET_EXCEPT_UNWIND_INFO sjlj_except_unwind_info ++#define TARGET_EXCEPT_UNWIND_INFO nds32_except_unwind_info + + /* ------------------------------------------------------------------------ */ + +diff -urN gcc-8.2.0.orig/gcc/config/nds32/constants.md gcc-8.2.0/gcc/config/nds32/constants.md +--- gcc-8.2.0.orig/gcc/config/nds32/constants.md 2018-04-22 09:46:39.000000000 +0200 ++++ gcc-8.2.0/gcc/config/nds32/constants.md 2019-01-25 15:38:32.821242637 +0100 +@@ -23,6 +23,7 @@ + (define_constants + [(R8_REGNUM 8) + (TA_REGNUM 15) ++ (TP_REGNUM 25) + (FP_REGNUM 28) + (GP_REGNUM 29) + (LP_REGNUM 30) +@@ -49,6 +50,16 @@ + UNSPEC_FFB + UNSPEC_FFMISM + UNSPEC_FLMISM ++ UNSPEC_KDMBB ++ UNSPEC_KDMBT ++ UNSPEC_KDMTB ++ UNSPEC_KDMTT ++ UNSPEC_KHMBB ++ UNSPEC_KHMBT ++ UNSPEC_KHMTB ++ UNSPEC_KHMTT ++ UNSPEC_KSLRAW ++ UNSPEC_KSLRAWU + UNSPEC_SVA + UNSPEC_SVS + UNSPEC_WSBH +@@ -62,6 +73,29 @@ + UNSPEC_UASTORE_HW + UNSPEC_UASTORE_W + UNSPEC_UASTORE_DW ++ UNSPEC_GOTINIT ++ UNSPEC_GOT ++ UNSPEC_GOTOFF ++ UNSPEC_PLT ++ UNSPEC_TLSGD ++ UNSPEC_TLSLD ++ UNSPEC_TLSIE ++ UNSPEC_TLSLE ++ UNSPEC_ROUND ++ UNSPEC_VEC_COMPARE ++ UNSPEC_KHM ++ UNSPEC_KHMX ++ UNSPEC_CLIP_OV ++ UNSPEC_CLIPS_OV ++ UNSPEC_BITREV ++ UNSPEC_KABS ++ UNSPEC_LOOP_END ++ UNSPEC_TLS_DESC ++ UNSPEC_TLS_IE ++ UNSPEC_ADD32 ++ UNSPEC_ICT ++ UNSPEC_KADDH ++ UNSPEC_KSUBH + ]) + + ;; The unspec_volatile operation index. +@@ -135,10 +169,14 @@ + UNSPEC_VOLATILE_SET_TRIG_EDGE + UNSPEC_VOLATILE_GET_TRIG_TYPE + UNSPEC_VOLATILE_RELAX_GROUP ++ UNSPEC_VOLATILE_OMIT_FP_BEGIN ++ UNSPEC_VOLATILE_OMIT_FP_END + UNSPEC_VOLATILE_POP25_RETURN + UNSPEC_VOLATILE_UNALIGNED_FEATURE + UNSPEC_VOLATILE_ENABLE_UNALIGNED + UNSPEC_VOLATILE_DISABLE_UNALIGNED ++ UNSPEC_VOLATILE_RDOV ++ UNSPEC_VOLATILE_CLROV + ]) + + ;; ------------------------------------------------------------------------ +diff -urN gcc-8.2.0.orig/gcc/config/nds32/constraints.md gcc-8.2.0/gcc/config/nds32/constraints.md +--- gcc-8.2.0.orig/gcc/config/nds32/constraints.md 2018-04-06 07:51:33.000000000 +0200 ++++ gcc-8.2.0/gcc/config/nds32/constraints.md 2019-01-25 15:38:32.821242637 +0100 +@@ -127,6 +127,11 @@ + (and (match_code "const_int") + (match_test "IN_RANGE (ival, -31, 0)"))) + ++(define_constraint "Iu06" ++ "Unsigned immediate 6-bit value" ++ (and (match_code "const_int") ++ (match_test "ival < (1 << 6) && ival >= 0"))) ++ + ;; Ip05 is special and dedicated for v3 movpi45 instruction. + ;; movpi45 has imm5u field but the range is 16 ~ 47. + (define_constraint "Ip05" +@@ -136,10 +141,10 @@ + && ival >= (0 + 16) + && (TARGET_ISA_V3 || TARGET_ISA_V3M)"))) + +-(define_constraint "Iu06" ++(define_constraint "IU06" + "Unsigned immediate 6-bit value constraint for addri36.sp instruction" + (and (match_code "const_int") +- (match_test "ival < (1 << 6) ++ (match_test "ival < (1 << 8) + && ival >= 0 + && (ival % 4 == 0) + && (TARGET_ISA_V3 || TARGET_ISA_V3M)"))) +@@ -302,6 +307,25 @@ + (match_test "(TARGET_ISA_V3 || TARGET_ISA_V3M) + && (IN_RANGE (exact_log2 (ival + 1), 1, 8))"))) + ++(define_constraint "CVp5" ++ "Unsigned immediate 5-bit value for movpi45 instruction with range 16-47" ++ (and (match_code "const_vector") ++ (match_test "nds32_valid_CVp5_p (op)"))) ++ ++(define_constraint "CVs5" ++ "Signed immediate 5-bit value" ++ (and (match_code "const_vector") ++ (match_test "nds32_valid_CVs5_p (op)"))) ++ ++(define_constraint "CVs2" ++ "Signed immediate 20-bit value" ++ (and (match_code "const_vector") ++ (match_test "nds32_valid_CVs2_p (op)"))) ++ ++(define_constraint "CVhi" ++ "The immediate value that can be simply set high 20-bit" ++ (and (match_code "const_vector") ++ (match_test "nds32_valid_CVhi_p (op)"))) + + (define_memory_constraint "U33" + "Memory constraint for 333 format" +@@ -349,4 +373,9 @@ + (match_test "(TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE) + && nds32_float_mem_operand_p (op)"))) + ++(define_constraint "S" ++ "@internal ++ A constant call address." ++ (match_operand 0 "nds32_symbolic_operand")) ++ + ;; ------------------------------------------------------------------------ +diff -urN gcc-8.2.0.orig/gcc/config/nds32/elf.h gcc-8.2.0/gcc/config/nds32/elf.h +--- gcc-8.2.0.orig/gcc/config/nds32/elf.h 1970-01-01 01:00:00.000000000 +0100 ++++ gcc-8.2.0/gcc/config/nds32/elf.h 2019-01-25 15:38:32.821242637 +0100 +@@ -0,0 +1,81 @@ ++/* Definitions of target machine of Andes NDS32 cpu for GNU compiler ++ Copyright (C) 2012-2014 Free Software Foundation, Inc. ++ Contributed by Andes Technology Corporation. ++ ++ This file is part of GCC. ++ ++ GCC is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published ++ by the Free Software Foundation; either version 3, or (at your ++ option) any later version. ++ ++ GCC is distributed in the hope that it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ++ License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with GCC; see the file COPYING3. If not see ++ <http://www.gnu.org/licenses/>. */ ++ ++ ++/* ------------------------------------------------------------------------ */ ++ ++#define TARGET_LINUX_ABI 0 ++ ++/* In the configure stage we may use options --enable-default-relax, ++ --enable-Os-default-ifc and --enable-Os-default-ex9. They effect ++ the default spec of passing --relax, --mifc, and --mex9 to linker. ++ We use NDS32_RELAX_SPEC, NDS32_IFC_SPEC, and NDS32_EX9_SPEC ++ so that we can customize them conveniently. */ ++#define LINK_SPEC \ ++ " %{G*}" \ ++ " %{mbig-endian:-EB} %{mlittle-endian:-EL}" \ ++ " %{shared:-shared}" \ ++ NDS32_RELAX_SPEC ++ ++#define LIB_SPEC \ ++ " -lc -lgloss" ++ ++#define LIBGCC_SPEC \ ++ " -lgcc" ++ ++/* The option -mno-ctor-dtor can disable constructor/destructor feature ++ by applying different crt stuff. In the convention, crt0.o is the ++ startup file without constructor/destructor; ++ crt1.o, crti.o, crtbegin.o, crtend.o, and crtn.o are the ++ startup files with constructor/destructor. ++ Note that crt0.o, crt1.o, crti.o, and crtn.o are provided ++ by newlib/mculib/glibc/ublic, while crtbegin.o and crtend.o are ++ currently provided by GCC for nds32 target. ++ ++ For nds32 target so far: ++ If -mno-ctor-dtor, we are going to link ++ "crt0.o [user objects]". ++ If -mctor-dtor, we are going to link ++ "crt1.o crtbegin1.o [user objects] crtend1.o". ++ ++ Note that the TARGET_DEFAULT_CTOR_DTOR would effect the ++ default behavior. Check gcc/config.gcc for more information. */ ++#ifdef TARGET_DEFAULT_CTOR_DTOR ++ #define STARTFILE_SPEC \ ++ " %{!mno-ctor-dtor:crt1.o%s;:crt0.o%s}" \ ++ " %{!mno-ctor-dtor:crtbegin1.o%s}" \ ++ " %{mcrt-arg:crtarg.o%s}" ++ #define ENDFILE_SPEC \ ++ " %{!mno-ctor-dtor:crtend1.o%s}" ++#else ++ #define STARTFILE_SPEC \ ++ " %{mctor-dtor|coverage:crt1.o%s;:crt0.o%s}" \ ++ " %{mctor-dtor|coverage:crtbegin1.o%s}" \ ++ " %{mcrt-arg:crtarg.o%s}" ++ #define ENDFILE_SPEC \ ++ " %{mctor-dtor|coverage:crtend1.o%s}" ++#endif ++ ++#define STARTFILE_CXX_SPEC \ ++ " %{!mno-ctor-dtor:crt1.o%s;:crt0.o%s}" \ ++ " %{!mno-ctor-dtor:crtbegin1.o%s}" \ ++ " %{mcrt-arg:crtarg.o%s}" ++#define ENDFILE_CXX_SPEC \ ++ " %{!mno-ctor-dtor:crtend1.o%s}" +diff -urN gcc-8.2.0.orig/gcc/config/nds32/iterators.md gcc-8.2.0/gcc/config/nds32/iterators.md +--- gcc-8.2.0.orig/gcc/config/nds32/iterators.md 2018-04-06 07:51:33.000000000 +0200 ++++ gcc-8.2.0/gcc/config/nds32/iterators.md 2019-01-25 15:38:32.821242637 +0100 +@@ -68,6 +68,28 @@ + ;; shifts + (define_code_iterator shift_rotate [ashift ashiftrt lshiftrt rotatert]) + ++(define_code_iterator shifts [ashift ashiftrt lshiftrt]) ++ ++(define_code_iterator shiftrt [ashiftrt lshiftrt]) ++ ++(define_code_iterator sat_plus [ss_plus us_plus]) ++ ++(define_code_iterator all_plus [plus ss_plus us_plus]) ++ ++(define_code_iterator sat_minus [ss_minus us_minus]) ++ ++(define_code_iterator all_minus [minus ss_minus us_minus]) ++ ++(define_code_iterator plus_minus [plus minus]) ++ ++(define_code_iterator extend [sign_extend zero_extend]) ++ ++(define_code_iterator sumax [smax umax]) ++ ++(define_code_iterator sumin [smin umin]) ++ ++(define_code_iterator sumin_max [smax umax smin umin]) ++ + ;;---------------------------------------------------------------------------- + ;; Code attributes. + ;;---------------------------------------------------------------------------- +@@ -76,5 +98,23 @@ + (define_code_attr shift + [(ashift "ashl") (ashiftrt "ashr") (lshiftrt "lshr") (rotatert "rotr")]) + ++(define_code_attr su ++ [(ashiftrt "") (lshiftrt "u") (sign_extend "s") (zero_extend "u")]) ++ ++(define_code_attr zs ++ [(sign_extend "s") (zero_extend "z")]) ++ ++(define_code_attr uk ++ [(plus "") (ss_plus "k") (us_plus "uk") ++ (minus "") (ss_minus "k") (us_minus "uk")]) ++ ++(define_code_attr opcode ++ [(plus "add") (minus "sub") (smax "smax") (umax "umax") (smin "smin") (umin "umin")]) ++ ++(define_code_attr add_rsub ++ [(plus "a") (minus "rs")]) ++ ++(define_code_attr add_sub ++ [(plus "a") (minus "s")]) + + ;;---------------------------------------------------------------------------- +diff -urN gcc-8.2.0.orig/gcc/config/nds32/linux.h gcc-8.2.0/gcc/config/nds32/linux.h +--- gcc-8.2.0.orig/gcc/config/nds32/linux.h 1970-01-01 01:00:00.000000000 +0100 ++++ gcc-8.2.0/gcc/config/nds32/linux.h 2019-01-25 15:38:32.821242637 +0100 +@@ -0,0 +1,86 @@ ++/* Definitions of target machine of Andes NDS32 cpu for GNU compiler ++ Copyright (C) 2012-2014 Free Software Foundation, Inc. ++ Contributed by Andes Technology Corporation. ++ ++ This file is part of GCC. ++ ++ GCC is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published ++ by the Free Software Foundation; either version 3, or (at your ++ option) any later version. ++ ++ GCC is distributed in the hope that it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ++ License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with GCC; see the file COPYING3. If not see ++ <http://www.gnu.org/licenses/>. */ ++ ++ ++/* ------------------------------------------------------------------------ */ ++ ++#define TARGET_LINUX_ABI 1 ++ ++#undef SIZE_TYPE ++#define SIZE_TYPE "unsigned int" ++ ++#undef PTRDIFF_TYPE ++#define PTRDIFF_TYPE "int" ++ ++#define TARGET_OS_CPP_BUILTINS() \ ++ do \ ++ { \ ++ GNU_USER_TARGET_OS_CPP_BUILTINS(); \ ++ } \ ++ while (0) ++ ++#ifdef TARGET_BIG_ENDIAN_DEFAULT ++#define LD_SO_ENDIAN_SPEC "%{mlittle-endian:le}%{!mlittle-endian:be}" ++#else ++#define LD_SO_ENDIAN_SPEC "%{mbig-endian:be}%{!mbig-endian:le}" ++#endif ++ ++/* Record arch version in TARGET_ARCH_DEFAULT. 0 means soft ABI, ++ 1 means hard ABI and using full floating-point instruction, ++ 2 means hard ABI and only using single-precision floating-point ++ instruction */ ++#if TARGET_ARCH_DEFAULT ++#define LD_SO_ABI_SPEC "%{!mabi=2:f}" ++#else ++#define LD_SO_ABI_SPEC "%{mabi=2fp+:f}" ++#endif ++ ++#define GLIBC_DYNAMIC_LINKER \ ++ "/lib/ld-linux-nds32" LD_SO_ENDIAN_SPEC LD_SO_ABI_SPEC ".so.1" ++ ++/* In the configure stage we may use options --enable-default-relax, ++ --enable-Os-default-ifc and --enable-Os-default-ex9. They effect ++ the default spec of passing --relax, --mifc, and --mex9 to linker. ++ We use NDS32_RELAX_SPEC, NDS32_IFC_SPEC, and NDS32_EX9_SPEC ++ so that we can customize them conveniently. */ ++#define LINK_SPEC \ ++ " %{G*}" \ ++ " %{mbig-endian:-EB} %{mlittle-endian:-EL}" \ ++ " %{shared:-shared} \ ++ %{!shared: \ ++ %{!static: \ ++ %{rdynamic:-export-dynamic} \ ++ -dynamic-linker " GNU_USER_DYNAMIC_LINKER "} \ ++ %{static:-static}}" \ ++ NDS32_RELAX_SPEC ++ ++#define LINK_PIE_SPEC "%{pie:%{!fno-pie:%{!fno-PIE:%{!static:-pie}}}} " ++ ++#define CPP_SPEC "%{pthread:-D_REENTRANT}" ++ ++/* The SYNC operations are implemented as library functions, not ++ INSN patterns. As a result, the HAVE defines for the patterns are ++ not defined. We need to define them to generate the corresponding ++ __GCC_HAVE_SYNC_COMPARE_AND_SWAP_* and __GCC_ATOMIC_*_LOCK_FREE ++ defines. ++ Ref: https://sourceware.org/ml/libc-alpha/2014-09/msg00322.html */ ++#define HAVE_sync_compare_and_swapqi 1 ++#define HAVE_sync_compare_and_swaphi 1 ++#define HAVE_sync_compare_and_swapsi 1 +diff -urN gcc-8.2.0.orig/gcc/config/nds32/nds32.c gcc-8.2.0/gcc/config/nds32/nds32.c +--- gcc-8.2.0.orig/gcc/config/nds32/nds32.c 2018-05-07 03:38:02.000000000 +0200 ++++ gcc-8.2.0/gcc/config/nds32/nds32.c 2019-01-25 15:38:32.833242671 +0100 +@@ -305,6 +305,7 @@ + { "nested", 0, 0, false, false, false, false, NULL, NULL }, + { "not_nested", 0, 0, false, false, false, false, NULL, NULL }, + { "nested_ready", 0, 0, false, false, false, false, NULL, NULL }, ++ { "critical", 0, 0, false, false, false, false, NULL, NULL }, + + /* The attributes describing isr register save scheme. */ + { "save_all", 0, 0, false, false, false, false, NULL, NULL }, +@@ -314,9 +315,19 @@ + { "nmi", 1, 1, false, false, false, false, NULL, NULL }, + { "warm", 1, 1, false, false, false, false, NULL, NULL }, + ++ /* The attributes describing isr security level. */ ++ { "secure", 1, 1, false, false, false, false, NULL, NULL }, ++ + /* The attribute telling no prologue/epilogue. */ + { "naked", 0, 0, false, false, false, false, NULL, NULL }, + ++ /* The attribute is used to tell this function to be ROM patch. */ ++ { "indirect_call",0, 0, false, false, false, false, NULL, NULL }, ++ ++ /* FOR BACKWARD COMPATIBILITY, ++ this attribute also tells no prologue/epilogue. */ ++ { "no_prologue", 0, 0, false, false, false, false, NULL, NULL }, ++ + /* The last attribute spec is set to be NULL. */ + { NULL, 0, 0, false, false, false, false, NULL, NULL } + }; +@@ -345,6 +356,10 @@ + /* Initially this function is not under strictly aligned situation. */ + machine->strict_aligned_p = 0; + ++ /* Initially this function has no naked and no_prologue attributes. */ ++ machine->attr_naked_p = 0; ++ machine->attr_no_prologue_p = 0; ++ + return machine; + } + +@@ -362,6 +377,15 @@ + needs prologue/epilogue. */ + cfun->machine->naked_p = 0; + ++ /* We need to mark whether this function has naked and no_prologue ++ attribute so that we can distinguish the difference if users applies ++ -mret-in-naked-func option. */ ++ cfun->machine->attr_naked_p ++ = lookup_attribute ("naked", DECL_ATTRIBUTES (current_function_decl)) ++ ? 1 : 0; ++ cfun->machine->attr_no_prologue_p ++ = lookup_attribute ("no_prologue", DECL_ATTRIBUTES (current_function_decl)) ++ ? 1 : 0; + + /* If __builtin_eh_return is used, we better have frame pointer needed + so that we can easily locate the stack slot of return address. */ +@@ -432,7 +456,8 @@ + + /* If $gp value is required to be saved on stack, it needs 4 bytes space. + Check whether we are using PIC code genration. */ +- cfun->machine->gp_size = (flag_pic) ? 4 : 0; ++ cfun->machine->gp_size = ++ (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)) ? 4 : 0; + + /* If $lp value is required to be saved on stack, it needs 4 bytes space. + Check whether $lp is ever live. */ +@@ -497,7 +522,7 @@ + } + + /* Check if this function can omit prologue/epilogue code fragment. +- If there is 'naked' attribute in this function, ++ If there is 'no_prologue'/'naked'/'secure' attribute in this function, + we can set 'naked_p' flag to indicate that + we do not have to generate prologue/epilogue. + Or, if all the following conditions succeed, +@@ -510,14 +535,17 @@ + is no outgoing size. + condition 3: There is no local_size, which means + we do not need to adjust $sp. */ +- if (lookup_attribute ("naked", DECL_ATTRIBUTES (current_function_decl)) ++ if (lookup_attribute ("no_prologue", DECL_ATTRIBUTES (current_function_decl)) ++ || lookup_attribute ("naked", DECL_ATTRIBUTES (current_function_decl)) ++ || lookup_attribute ("secure", DECL_ATTRIBUTES (current_function_decl)) + || (cfun->machine->callee_saved_first_gpr_regno == SP_REGNUM + && cfun->machine->callee_saved_last_gpr_regno == SP_REGNUM + && cfun->machine->callee_saved_first_fpr_regno == SP_REGNUM + && cfun->machine->callee_saved_last_fpr_regno == SP_REGNUM + && !df_regs_ever_live_p (FP_REGNUM) + && !df_regs_ever_live_p (LP_REGNUM) +- && cfun->machine->local_size == 0)) ++ && cfun->machine->local_size == 0 ++ && !flag_pic)) + { + /* Set this function 'naked_p' and other functions can check this flag. + Note that in nds32 port, the 'naked_p = 1' JUST means there is no +@@ -1259,6 +1287,32 @@ + REG_NOTES (parallel_insn) = dwarf; + } + ++static void ++nds32_emit_load_gp (void) ++{ ++ rtx got_symbol, pat; ++ ++ /* Initial GLOBAL OFFSET TABLE don't do the scheduling. */ ++ emit_insn (gen_blockage ()); ++ ++ got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_"); ++ /* sethi $gp, _GLOBAL_OFFSET_TABLE_ -8 */ ++ pat = gen_rtx_UNSPEC (SImode, gen_rtvec (1, got_symbol), UNSPEC_GOTINIT); ++ pat = gen_rtx_CONST (SImode, gen_rtx_PLUS (Pmode, pat, GEN_INT (-8))); ++ emit_insn (gen_sethi (pic_offset_table_rtx,pat)); ++ ++ /* ori $gp, $gp, _GLOBAL_OFFSET_TABLE_ -4 */ ++ pat = gen_rtx_UNSPEC (SImode, gen_rtvec (1, got_symbol), UNSPEC_GOTINIT); ++ pat = gen_rtx_CONST (SImode, gen_rtx_PLUS (Pmode, pat, GEN_INT (-4))); ++ emit_insn (gen_lo_sum (pic_offset_table_rtx, pic_offset_table_rtx, pat)); ++ ++ /* add5.pc $gp */ ++ emit_insn (gen_add_pc (pic_offset_table_rtx, pic_offset_table_rtx)); ++ ++ /* Initial GLOBAL OFFSET TABLE don't do the scheduling. */ ++ emit_insn (gen_blockage ()); ++} ++ + /* Function that may creates more instructions + for large value on adjusting stack pointer. + +@@ -1342,17 +1396,25 @@ + } + + /* Return true if FUNC is a naked function. */ +-static bool ++bool + nds32_naked_function_p (tree func) + { +- tree t; ++ /* FOR BACKWARD COMPATIBILITY, ++ we need to support 'no_prologue' attribute as well. */ ++ tree t_naked; ++ tree t_no_prologue; + + if (TREE_CODE (func) != FUNCTION_DECL) + abort (); + +- t = lookup_attribute ("naked", DECL_ATTRIBUTES (func)); ++ /* We have to use lookup_attribute() to check attributes. ++ Because attr_naked_p and attr_no_prologue_p are set in ++ nds32_compute_stack_frame() and the function has not been ++ invoked yet. */ ++ t_naked = lookup_attribute ("naked", DECL_ATTRIBUTES (func)); ++ t_no_prologue = lookup_attribute ("no_prologue", DECL_ATTRIBUTES (func)); + +- return (t != NULL_TREE); ++ return ((t_naked != NULL_TREE) || (t_no_prologue != NULL_TREE)); + } + + /* Function that determine whether a load postincrement is a good thing to use +@@ -1570,6 +1632,11 @@ + nds32_register_passes (void) + { + nds32_register_pass ( ++ make_pass_nds32_fp_as_gp, ++ PASS_POS_INSERT_BEFORE, ++ "ira"); ++ ++ nds32_register_pass ( + make_pass_nds32_relax_opt, + PASS_POS_INSERT_AFTER, + "mach"); +@@ -1636,6 +1703,9 @@ + { + int regno; + ++ if (TARGET_LINUX_ABI) ++ fixed_regs[TP_REGNUM] = 1; ++ + if (TARGET_HARD_FLOAT) + { + for (regno = NDS32_FIRST_FPR_REGNUM; +@@ -1987,6 +2057,16 @@ + : PARM_BOUNDARY); + } + ++bool ++nds32_vector_mode_supported_p (machine_mode mode) ++{ ++ if (mode == V4QImode ++ || mode == V2HImode) ++ return NDS32_EXT_DSP_P (); ++ ++ return false; ++} ++ + /* -- How Scalar Function Values Are Returned. */ + + static rtx +@@ -2124,56 +2204,12 @@ + nds32_asm_function_end_prologue (FILE *file) + { + fprintf (file, "\t! END PROLOGUE\n"); +- +- /* If frame pointer is NOT needed and -mfp-as-gp is issued, +- we can generate special directive: ".omit_fp_begin" +- to guide linker doing fp-as-gp optimization. +- However, for a naked function, which means +- it should not have prologue/epilogue, +- using fp-as-gp still requires saving $fp by push/pop behavior and +- there is no benefit to use fp-as-gp on such small function. +- So we need to make sure this function is NOT naked as well. */ +- if (!frame_pointer_needed +- && !cfun->machine->naked_p +- && cfun->machine->fp_as_gp_p) +- { +- fprintf (file, "\t! ----------------------------------------\n"); +- fprintf (file, "\t! Guide linker to do " +- "link time optimization: fp-as-gp\n"); +- fprintf (file, "\t! We add one more instruction to " +- "initialize $fp near to $gp location.\n"); +- fprintf (file, "\t! If linker fails to use fp-as-gp transformation,\n"); +- fprintf (file, "\t! this extra instruction should be " +- "eliminated at link stage.\n"); +- fprintf (file, "\t.omit_fp_begin\n"); +- fprintf (file, "\tla\t$fp,_FP_BASE_\n"); +- fprintf (file, "\t! ----------------------------------------\n"); +- } + } + + /* Before rtl epilogue has been expanded, this function is used. */ + static void + nds32_asm_function_begin_epilogue (FILE *file) + { +- /* If frame pointer is NOT needed and -mfp-as-gp is issued, +- we can generate special directive: ".omit_fp_end" +- to claim fp-as-gp optimization range. +- However, for a naked function, +- which means it should not have prologue/epilogue, +- using fp-as-gp still requires saving $fp by push/pop behavior and +- there is no benefit to use fp-as-gp on such small function. +- So we need to make sure this function is NOT naked as well. */ +- if (!frame_pointer_needed +- && !cfun->machine->naked_p +- && cfun->machine->fp_as_gp_p) +- { +- fprintf (file, "\t! ----------------------------------------\n"); +- fprintf (file, "\t! Claim the range of fp-as-gp " +- "link time optimization\n"); +- fprintf (file, "\t.omit_fp_end\n"); +- fprintf (file, "\t! ----------------------------------------\n"); +- } +- + fprintf (file, "\t! BEGIN EPILOGUE\n"); + } + +@@ -2200,6 +2236,26 @@ + ? 1 + : 0); + ++ if (flag_pic) ++ { ++ fprintf (file, "\tsmw.adm\t$r31, [$r31], $r31, 4\n"); ++ fprintf (file, "\tsethi\t%s, hi20(_GLOBAL_OFFSET_TABLE_-8)\n", ++ reg_names [PIC_OFFSET_TABLE_REGNUM]); ++ fprintf (file, "\tori\t%s, %s, lo12(_GLOBAL_OFFSET_TABLE_-4)\n", ++ reg_names [PIC_OFFSET_TABLE_REGNUM], ++ reg_names [PIC_OFFSET_TABLE_REGNUM]); ++ ++ if (TARGET_ISA_V3) ++ fprintf (file, "\tadd5.pc\t$gp\n"); ++ else ++ { ++ fprintf (file, "\tmfusr\t$ta, $pc\n"); ++ fprintf (file, "\tadd\t%s, $ta, %s\n", ++ reg_names [PIC_OFFSET_TABLE_REGNUM], ++ reg_names [PIC_OFFSET_TABLE_REGNUM]); ++ } ++ } ++ + if (delta != 0) + { + if (satisfies_constraint_Is15 (GEN_INT (delta))) +@@ -2224,9 +2280,23 @@ + } + } + +- fprintf (file, "\tb\t"); +- assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0)); +- fprintf (file, "\n"); ++ if (flag_pic) ++ { ++ fprintf (file, "\tla\t$ta, "); ++ assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0)); ++ fprintf (file, "@PLT\n"); ++ fprintf (file, "\t! epilogue\n"); ++ fprintf (file, "\tlwi.bi\t%s, [%s], 4\n", ++ reg_names[PIC_OFFSET_TABLE_REGNUM], ++ reg_names[STACK_POINTER_REGNUM]); ++ fprintf (file, "\tbr\t$ta\n"); ++ } ++ else ++ { ++ fprintf (file, "\tb\t"); ++ assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0)); ++ fprintf (file, "\n"); ++ } + + final_end_function (); + } +@@ -2242,15 +2312,20 @@ + + /* 1. Do not apply sibling call if -mv3push is enabled, + because pop25 instruction also represents return behavior. +- 2. If this function is a variadic function, do not apply sibling call ++ 2. If this function is a isr function, do not apply sibling call ++ because it may perform the behavior that user does not expect. ++ 3. If this function is a variadic function, do not apply sibling call + because the stack layout may be a mess. +- 3. We don't want to apply sibling call optimization for indirect ++ 4. We don't want to apply sibling call optimization for indirect + sibcall because the pop behavior in epilogue may pollute the + content of caller-saved regsiter when the register is used for +- indirect sibcall. */ ++ indirect sibcall. ++ 5. In pic mode, it may use some registers for PLT call. */ + return (!TARGET_V3PUSH ++ && !nds32_isr_function_p (current_function_decl) + && (cfun->machine->va_args_size == 0) +- && decl); ++ && decl ++ && !flag_pic); + } + + /* Determine whether we need to enable warning for function return check. */ +@@ -2566,6 +2641,13 @@ + + case SYMBOL_REF: + /* (mem (symbol_ref A)) => [symbol_ref] */ ++ ++ if (flag_pic || SYMBOL_REF_TLS_MODEL (x)) ++ return false; ++ ++ if (TARGET_ICT_MODEL_LARGE && nds32_indirect_call_referenced_p (x)) ++ return false; ++ + /* If -mcmodel=large, the 'symbol_ref' is not a valid address + during or after LRA/reload phase. */ + if (TARGET_CMODEL_LARGE +@@ -2577,7 +2659,8 @@ + the 'symbol_ref' is not a valid address during or after + LRA/reload phase. */ + if (TARGET_CMODEL_MEDIUM +- && NDS32_SYMBOL_REF_RODATA_P (x) ++ && (NDS32_SYMBOL_REF_RODATA_P (x) ++ || CONSTANT_POOL_ADDRESS_P (x)) + && (reload_completed + || reload_in_progress + || lra_in_progress)) +@@ -2599,6 +2682,10 @@ + { + /* Now we see the [ + const_addr ] pattern, but we need + some further checking. */ ++ ++ if (flag_pic || SYMBOL_REF_TLS_MODEL (op0)) ++ return false; ++ + /* If -mcmodel=large, the 'const_addr' is not a valid address + during or after LRA/reload phase. */ + if (TARGET_CMODEL_LARGE +@@ -2675,17 +2762,202 @@ + + case LO_SUM: + /* (mem (lo_sum (reg) (symbol_ref))) */ +- /* (mem (lo_sum (reg) (const))) */ +- gcc_assert (REG_P (XEXP (x, 0))); +- if (GET_CODE (XEXP (x, 1)) == SYMBOL_REF +- || GET_CODE (XEXP (x, 1)) == CONST) +- return nds32_legitimate_address_p (mode, XEXP (x, 1), strict); +- else ++ /* (mem (lo_sum (reg) (const (plus (symbol_ref) (reg)))) */ ++ /* TLS case: (mem (lo_sum (reg) (const (unspec symbol_ref X)))) */ ++ /* The LO_SUM is a valid address if and only if we would like to ++ generate 32-bit full address memory access with any of following ++ circumstance: ++ 1. -mcmodel=large. ++ 2. -mcmodel=medium and the symbol_ref references to rodata. */ ++ { ++ rtx sym = NULL_RTX; ++ ++ if (flag_pic) ++ return false; ++ ++ if (!REG_P (XEXP (x, 0))) ++ return false; ++ ++ if (GET_CODE (XEXP (x, 1)) == SYMBOL_REF) ++ sym = XEXP (x, 1); ++ else if (GET_CODE (XEXP (x, 1)) == CONST) ++ { ++ rtx plus = XEXP(XEXP (x, 1), 0); ++ if (GET_CODE (plus) == PLUS) ++ sym = XEXP (plus, 0); ++ else if (GET_CODE (plus) == UNSPEC) ++ sym = XVECEXP (plus, 0, 0); ++ } ++ else ++ return false; ++ ++ gcc_assert (GET_CODE (sym) == SYMBOL_REF); ++ ++ if (TARGET_ICT_MODEL_LARGE ++ && nds32_indirect_call_referenced_p (sym)) ++ return true; ++ ++ if (TARGET_CMODEL_LARGE) ++ return true; ++ else if (TARGET_CMODEL_MEDIUM ++ && NDS32_SYMBOL_REF_RODATA_P (sym)) ++ return true; ++ else ++ return false; ++ } ++ ++ default: ++ return false; ++ } ++} ++ ++static rtx ++nds32_legitimize_address (rtx x, ++ rtx oldx ATTRIBUTE_UNUSED, ++ machine_mode mode ATTRIBUTE_UNUSED) ++{ ++ if (nds32_tls_referenced_p (x)) ++ x = nds32_legitimize_tls_address (x); ++ else if (flag_pic && SYMBOLIC_CONST_P (x)) ++ x = nds32_legitimize_pic_address (x); ++ else if (TARGET_ICT_MODEL_LARGE && nds32_indirect_call_referenced_p (x)) ++ x = nds32_legitimize_ict_address (x); ++ ++ return x; ++} ++ ++static bool ++nds32_legitimate_constant_p (machine_mode mode, rtx x) ++{ ++ switch (GET_CODE (x)) ++ { ++ case CONST_DOUBLE: ++ if ((TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE) ++ && (mode == DFmode || mode == SFmode)) + return false; ++ break; ++ case CONST: ++ x = XEXP (x, 0); ++ ++ if (GET_CODE (x) == PLUS) ++ { ++ if (!CONST_INT_P (XEXP (x, 1))) ++ return false; ++ x = XEXP (x, 0); ++ } ++ ++ if (GET_CODE (x) == UNSPEC) ++ { ++ switch (XINT (x, 1)) ++ { ++ case UNSPEC_GOT: ++ case UNSPEC_GOTOFF: ++ case UNSPEC_PLT: ++ case UNSPEC_TLSGD: ++ case UNSPEC_TLSLD: ++ case UNSPEC_TLSIE: ++ case UNSPEC_TLSLE: ++ case UNSPEC_ICT: ++ return false; ++ default: ++ return true; ++ } ++ } ++ break; ++ case SYMBOL_REF: ++ /* TLS symbols need a call to resolve in ++ precompute_register_parameters. */ ++ if (SYMBOL_REF_TLS_MODEL (x)) ++ return false; ++ break; ++ default: ++ return true; ++ } ++ ++ return true; ++} ++ ++/* Reorgnize the UNSPEC CONST and return its direct symbol. */ ++static rtx ++nds32_delegitimize_address (rtx x) ++{ ++ x = delegitimize_mem_from_attrs (x); ++ ++ if (GET_CODE(x) == CONST) ++ { ++ rtx inner = XEXP (x, 0); ++ ++ /* Handle for GOTOFF. */ ++ if (GET_CODE (inner) == PLUS) ++ inner = XEXP (inner, 0); ++ ++ if (GET_CODE (inner) == UNSPEC) ++ { ++ switch (XINT (inner, 1)) ++ { ++ case UNSPEC_GOTINIT: ++ case UNSPEC_GOT: ++ case UNSPEC_GOTOFF: ++ case UNSPEC_PLT: ++ case UNSPEC_TLSGD: ++ case UNSPEC_TLSLD: ++ case UNSPEC_TLSIE: ++ case UNSPEC_TLSLE: ++ case UNSPEC_ICT: ++ x = XVECEXP (inner, 0, 0); ++ break; ++ default: ++ break; ++ } ++ } ++ } ++ return x; ++} + ++static machine_mode ++nds32_vectorize_preferred_simd_mode (scalar_mode mode) ++{ ++ if (!NDS32_EXT_DSP_P ()) ++ return word_mode; ++ ++ switch (mode) ++ { ++ case E_QImode: ++ return V4QImode; ++ case E_HImode: ++ return V2HImode; ++ default: ++ return word_mode; ++ } ++} ++ ++static bool ++nds32_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x) ++{ ++ switch (GET_CODE (x)) ++ { ++ case CONST: ++ return !nds32_legitimate_constant_p (mode, x); ++ case SYMBOL_REF: ++ /* All symbols have to be accessed through gp-relative in PIC mode. */ ++ /* We don't want to force symbol as constant pool in .text section, ++ because we use the gp-relatived instruction to load in small ++ or medium model. */ ++ if (flag_pic ++ || SYMBOL_REF_TLS_MODEL (x) ++ || TARGET_CMODEL_SMALL ++ || TARGET_CMODEL_MEDIUM) ++ return true; ++ break; ++ case CONST_INT: ++ case CONST_DOUBLE: ++ if (flag_pic && (lra_in_progress || reload_completed)) ++ return true; ++ break; + default: + return false; + } ++ return false; + } + + +@@ -2731,13 +3003,33 @@ + /* Describing Relative Costs of Operations. */ + + static int +-nds32_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED, ++nds32_register_move_cost (machine_mode mode, + reg_class_t from, + reg_class_t to) + { ++ /* In garywolf cpu, FPR to GPR is chaper than other cpu. */ ++ if (TARGET_PIPELINE_GRAYWOLF) ++ { ++ if (GET_MODE_SIZE (mode) == 8) ++ { ++ /* DPR to GPR. */ ++ if (from == FP_REGS && to != FP_REGS) ++ return 3; ++ /* GPR to DPR. */ ++ if (from != FP_REGS && to == FP_REGS) ++ return 2; ++ } ++ else ++ { ++ if ((from == FP_REGS && to != FP_REGS) ++ || (from != FP_REGS && to == FP_REGS)) ++ return 2; ++ } ++ } ++ + if ((from == FP_REGS && to != FP_REGS) + || (from != FP_REGS && to == FP_REGS)) +- return 9; ++ return 3; + else if (from == HIGH_REGS || to == HIGH_REGS) + return optimize_size ? 6 : 2; + else +@@ -2825,6 +3117,9 @@ + { + default_file_start (); + ++ if (flag_pic) ++ fprintf (asm_out_file, "\t.pic\n"); ++ + /* Tell assembler which ABI we are using. */ + fprintf (asm_out_file, "\t! ABI version\n"); + if (TARGET_HARD_FLOAT) +@@ -2835,10 +3130,36 @@ + /* Tell assembler that this asm code is generated by compiler. */ + fprintf (asm_out_file, "\t! This asm file is generated by compiler\n"); + fprintf (asm_out_file, "\t.flag\tverbatim\n"); +- /* Give assembler the size of each vector for interrupt handler. */ +- fprintf (asm_out_file, "\t! This vector size directive is required " +- "for checking inconsistency on interrupt handler\n"); +- fprintf (asm_out_file, "\t.vec_size\t%d\n", nds32_isr_vector_size); ++ ++ /* Insert directive for linker to distinguish object's ict flag. */ ++ if (!TARGET_LINUX_ABI) ++ { ++ if (TARGET_ICT_MODEL_LARGE) ++ fprintf (asm_out_file, "\t.ict_model\tlarge\n"); ++ else ++ fprintf (asm_out_file, "\t.ict_model\tsmall\n"); ++ } ++ ++ /* We need to provide the size of each vector for interrupt handler ++ under elf toolchain. */ ++ if (!TARGET_LINUX_ABI) ++ { ++ fprintf (asm_out_file, "\t! This vector size directive is required " ++ "for checking inconsistency on interrupt handler\n"); ++ fprintf (asm_out_file, "\t.vec_size\t%d\n", nds32_isr_vector_size); ++ } ++ ++ /* If user enables '-mforce-fp-as-gp' or compiles programs with -Os, ++ the compiler may produce 'la $fp,_FP_BASE_' instruction ++ at prologue for fp-as-gp optimization. ++ We should emit weak reference of _FP_BASE_ to avoid undefined reference ++ in case user does not pass '--relax' option to linker. */ ++ if (!TARGET_LINUX_ABI && (TARGET_FORCE_FP_AS_GP || optimize_size)) ++ { ++ fprintf (asm_out_file, "\t! This weak reference is required to do " ++ "fp-as-gp link time optimization\n"); ++ fprintf (asm_out_file, "\t.weak\t_FP_BASE_\n"); ++ } + + fprintf (asm_out_file, "\t! ------------------------------------\n"); + +@@ -2849,6 +3170,49 @@ + if (TARGET_ISA_V3M) + fprintf (asm_out_file, "\t! ISA family\t\t: %s\n", "V3M"); + ++ switch (nds32_cpu_option) ++ { ++ case CPU_N6: ++ fprintf (asm_out_file, "\t! Pipeline model\t: %s\n", "N6"); ++ break; ++ ++ case CPU_N7: ++ fprintf (asm_out_file, "\t! Pipeline model\t: %s\n", "N7"); ++ break; ++ ++ case CPU_N8: ++ fprintf (asm_out_file, "\t! Pipeline model\t: %s\n", "N8"); ++ break; ++ ++ case CPU_E8: ++ fprintf (asm_out_file, "\t! Pipeline model\t: %s\n", "E8"); ++ break; ++ ++ case CPU_N9: ++ fprintf (asm_out_file, "\t! Pipeline model\t: %s\n", "N9"); ++ break; ++ ++ case CPU_N10: ++ fprintf (asm_out_file, "\t! Pipeline model\t: %s\n", "N10"); ++ break; ++ ++ case CPU_GRAYWOLF: ++ fprintf (asm_out_file, "\t! Pipeline model\t: %s\n", "Graywolf"); ++ break; ++ ++ case CPU_N12: ++ case CPU_N13: ++ fprintf (asm_out_file, "\t! Pipeline model\t: %s\n", "N13"); ++ break; ++ ++ case CPU_SIMPLE: ++ fprintf (asm_out_file, "\t! Pipeline model\t: %s\n", "SIMPLE"); ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ + if (TARGET_CMODEL_SMALL) + fprintf (asm_out_file, "\t! Code model\t\t: %s\n", "SMALL"); + if (TARGET_CMODEL_MEDIUM) +@@ -2926,9 +3290,65 @@ + { + nds32_asm_file_end_for_isr (); + ++ /* The NDS32 Linux stack is mapped non-executable by default, so add a ++ .note.GNU-stack section. */ ++ if (TARGET_LINUX_ABI) ++ file_end_indicate_exec_stack (); ++ + fprintf (asm_out_file, "\t! ------------------------------------\n"); + } + ++static bool ++nds32_asm_output_addr_const_extra (FILE *file, rtx x) ++{ ++ if (GET_CODE (x) == UNSPEC) ++ { ++ switch (XINT (x, 1)) ++ { ++ case UNSPEC_GOTINIT: ++ output_addr_const (file, XVECEXP (x, 0, 0)); ++ break; ++ case UNSPEC_GOTOFF: ++ output_addr_const (file, XVECEXP (x, 0, 0)); ++ fputs ("@GOTOFF", file); ++ break; ++ case UNSPEC_GOT: ++ output_addr_const (file, XVECEXP (x, 0, 0)); ++ fputs ("@GOT", file); ++ break; ++ case UNSPEC_PLT: ++ output_addr_const (file, XVECEXP (x, 0, 0)); ++ fputs ("@PLT", file); ++ break; ++ case UNSPEC_TLSGD: ++ output_addr_const (file, XVECEXP (x, 0, 0)); ++ fputs ("@TLSDESC", file); ++ break; ++ case UNSPEC_TLSLD: ++ output_addr_const (file, XVECEXP (x, 0, 0)); ++ fputs ("@TLSDESC", file); ++ break; ++ case UNSPEC_TLSIE: ++ output_addr_const (file, XVECEXP (x, 0, 0)); ++ fputs ("@GOTTPOFF", file); ++ break; ++ case UNSPEC_TLSLE: ++ output_addr_const (file, XVECEXP (x, 0, 0)); ++ fputs ("@TPOFF", file); ++ break; ++ case UNSPEC_ICT: ++ output_addr_const (file, XVECEXP (x, 0, 0)); ++ fputs ("@ICT", file); ++ break; ++ default: ++ return false; ++ } ++ return true; ++ } ++ else ++ return false; ++} ++ + /* -- Output and Generation of Labels. */ + + static void +@@ -2978,6 +3398,18 @@ + + /* No need to handle following process, so return immediately. */ + return; ++ ++ case 'v': ++ gcc_assert (CONST_INT_P (x) ++ && (INTVAL (x) == 0 ++ || INTVAL (x) == 8 ++ || INTVAL (x) == 16 ++ || INTVAL (x) == 24)); ++ fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) / 8); ++ ++ /* No need to handle following process, so return immediately. */ ++ return; ++ + case 'B': + /* Use exact_log2() to search the 1-bit position. */ + gcc_assert (CONST_INT_P (x)); +@@ -3084,8 +3516,15 @@ + switch (GET_CODE (x)) + { + case LABEL_REF: ++ output_addr_const (stream, x); ++ break; ++ + case SYMBOL_REF: + output_addr_const (stream, x); ++ ++ if (!TARGET_LINUX_ABI && nds32_indirect_call_referenced_p (x)) ++ fprintf (stream, "@ICT"); ++ + break; + + case REG: +@@ -3168,6 +3607,17 @@ + output_addr_const (stream, x); + break; + ++ case CONST_VECTOR: ++ fprintf (stream, HOST_WIDE_INT_PRINT_HEX, const_vector_to_hwint (x)); ++ break; ++ ++ case LO_SUM: ++ /* This is a special case for inline assembly using memory address 'p'. ++ The inline assembly code is expected to use pesudo instruction ++ for the operand. EX: la */ ++ output_addr_const (stream, XEXP(x, 1)); ++ break; ++ + default: + /* Generally, output_addr_const () is able to handle most cases. + We want to see what CODE could appear, +@@ -3179,7 +3629,9 @@ + } + + static void +-nds32_print_operand_address (FILE *stream, machine_mode /*mode*/, rtx x) ++nds32_print_operand_address (FILE *stream, ++ machine_mode mode ATTRIBUTE_UNUSED, ++ rtx x) + { + rtx op0, op1; + +@@ -3194,6 +3646,16 @@ + fputs ("]", stream); + break; + ++ case LO_SUM: ++ /* This is a special case for inline assembly using memory operand 'm'. ++ The inline assembly code is expected to use pesudo instruction ++ for the operand. EX: [ls].[bhw] */ ++ fputs ("[ + ", stream); ++ op1 = XEXP (x, 1); ++ output_addr_const (stream, op1); ++ fputs ("]", stream); ++ break; ++ + case REG: + /* Forbid using static chain register ($r16) + on reduced-set registers configuration. */ +@@ -3260,6 +3722,20 @@ + reg_names[REGNO (XEXP (op0, 0))], + sv); + } ++ else if (GET_CODE (op0) == ASHIFT && REG_P (op1)) ++ { ++ /* [Ra + Rb << sv] ++ In normal, ASHIFT can be converted to MULT like above case. ++ But when the address rtx does not go through canonicalize_address ++ defined in fwprop, we'll need this case. */ ++ int sv = INTVAL (XEXP (op0, 1)); ++ gcc_assert (sv <= 3 && sv >=0); ++ ++ fprintf (stream, "[%s + %s << %d]", ++ reg_names[REGNO (op1)], ++ reg_names[REGNO (XEXP (op0, 0))], ++ sv); ++ } + else + { + /* The control flow is not supposed to be here. */ +@@ -3454,6 +3930,27 @@ + static void + nds32_insert_attributes (tree decl, tree *attributes) + { ++ /* A "indirect_call" function attribute implies "noinline" and "noclone" ++ for elf toolchain to support ROM patch mechanism. */ ++ if (TREE_CODE (decl) == FUNCTION_DECL ++ && lookup_attribute ("indirect_call", *attributes) != NULL) ++ { ++ tree new_attrs = *attributes; ++ ++ if (TARGET_LINUX_ABI) ++ error("cannot use indirect_call attribute under linux toolchain"); ++ ++ if (lookup_attribute ("noinline", new_attrs) == NULL) ++ new_attrs = tree_cons (get_identifier ("noinline"), NULL, new_attrs); ++ if (lookup_attribute ("noclone", new_attrs) == NULL) ++ new_attrs = tree_cons (get_identifier ("noclone"), NULL, new_attrs); ++ ++ if (!TREE_PUBLIC (decl)) ++ error("indirect_call attribute can't apply for static function"); ++ ++ *attributes = new_attrs; ++ } ++ + /* For function declaration, we need to check isr-specific attributes: + 1. Call nds32_check_isr_attrs_conflict() to check any conflict. + 2. Check valid integer value for interrupt/exception. +@@ -3479,6 +3976,38 @@ + excp = lookup_attribute ("exception", func_attrs); + reset = lookup_attribute ("reset", func_attrs); + ++ /* The following code may use attribute arguments. If there is no ++ argument from source code, it will cause segmentation fault. ++ Therefore, return dircetly and report error message later. */ ++ if ((intr && TREE_VALUE (intr) == NULL) ++ || (excp && TREE_VALUE (excp) == NULL) ++ || (reset && TREE_VALUE (reset) == NULL)) ++ return; ++ ++ /* ------------------------------------------------------------- */ ++ /* FIXME: ++ FOR BACKWARD COMPATIBILITY, we need to support following patterns: ++ ++ __attribute__((interrupt("XXX;YYY;id=ZZZ"))) ++ __attribute__((exception("XXX;YYY;id=ZZZ"))) ++ __attribute__((reset("vectors=XXX;nmi_func=YYY;warm_func=ZZZ"))) ++ ++ If interrupt/exception/reset appears and its argument is a ++ STRING_CST, we will use other functions to parse string in the ++ nds32_construct_isr_vectors_information() and then set necessary ++ isr information in the nds32_isr_vectors[] array. Here we can ++ just return immediately to avoid new-syntax checking. */ ++ if (intr != NULL_TREE ++ && TREE_CODE (TREE_VALUE (TREE_VALUE (intr))) == STRING_CST) ++ return; ++ if (excp != NULL_TREE ++ && TREE_CODE (TREE_VALUE (TREE_VALUE (excp))) == STRING_CST) ++ return; ++ if (reset != NULL_TREE ++ && TREE_CODE (TREE_VALUE (TREE_VALUE (reset))) == STRING_CST) ++ return; ++ /* ------------------------------------------------------------- */ ++ + if (intr || excp) + { + /* Deal with interrupt/exception. */ +@@ -3598,7 +4127,9 @@ + } + if (TARGET_ISA_V3) + { +- /* Under V3 ISA, currently nothing should be strictly set. */ ++ /* If this is ARCH_V3J, we need to enable TARGET_REDUCED_REGS. */ ++ if (nds32_arch_option == ARCH_V3J) ++ target_flags |= MASK_REDUCED_REGS; + } + if (TARGET_ISA_V3M) + { +@@ -3610,6 +4141,9 @@ + target_flags &= ~MASK_EXT_PERF2; + /* Under V3M ISA, we need to strictly disable TARGET_EXT_STRING. */ + target_flags &= ~MASK_EXT_STRING; ++ ++ if (flag_pic) ++ error ("not support -fpic option for v3m toolchain"); + } + + /* See if we are using reduced-set registers: +@@ -3627,6 +4161,12 @@ + fixed_regs[r] = call_used_regs[r] = 1; + } + ++ /* See if user explicitly would like to use fp-as-gp optimization. ++ If so, we must prevent $fp from being allocated ++ during register allocation. */ ++ if (TARGET_FORCE_FP_AS_GP) ++ fixed_regs[FP_REGNUM] = call_used_regs[FP_REGNUM] = 1; ++ + if (!TARGET_16_BIT) + { + /* Under no 16 bit ISA, we need to strictly disable TARGET_V3PUSH. */ +@@ -3643,9 +4183,7 @@ + "must be enable '-mext-fpu-sp' or '-mext-fpu-dp'"); + } + +- /* Currently, we don't support PIC code generation yet. */ +- if (flag_pic) +- sorry ("position-independent code not supported"); ++ nds32_init_rtx_costs (); + + nds32_register_passes (); + } +@@ -3659,8 +4197,11 @@ + vec<const char *> &constraints ATTRIBUTE_UNUSED, + vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs) + { +- clobbers.safe_push (gen_rtx_REG (SImode, TA_REGNUM)); +- SET_HARD_REG_BIT (clobbered_regs, TA_REGNUM); ++ if (!flag_inline_asm_r15) ++ { ++ clobbers.safe_push (gen_rtx_REG (SImode, TA_REGNUM)); ++ SET_HARD_REG_BIT (clobbered_regs, TA_REGNUM); ++ } + return NULL; + } + +@@ -3687,6 +4228,13 @@ + return nds32_expand_builtin_impl (exp, target, subtarget, mode, ignore); + } + ++/* Implement TARGET_INIT_LIBFUNCS. */ ++static void ++nds32_init_libfuncs (void) ++{ ++ if (TARGET_LINUX_ABI) ++ init_sync_libfuncs (UNITS_PER_WORD); ++} + + /* ------------------------------------------------------------------------ */ + +@@ -3703,6 +4251,16 @@ + builtin_define ("__nds32__"); + builtin_define ("__NDS32__"); + ++ /* We need to provide builtin macro to describe the size of ++ each vector for interrupt handler under elf toolchain. */ ++ if (!TARGET_LINUX_ABI) ++ { ++ if (TARGET_ISR_VECTOR_SIZE_4_BYTE) ++ builtin_define ("__NDS32_ISR_VECTOR_SIZE_4__"); ++ else ++ builtin_define ("__NDS32_ISR_VECTOR_SIZE_16__"); ++ } ++ + if (TARGET_HARD_FLOAT) + builtin_define ("__NDS32_ABI_2FP_PLUS__"); + else +@@ -3770,6 +4328,8 @@ + builtin_define ("__NDS32_GP_DIRECT__"); + if (TARGET_VH) + builtin_define ("__NDS32_VH__"); ++ if (NDS32_EXT_DSP_P ()) ++ builtin_define ("__NDS32_EXT_DSP__"); + + if (TARGET_BIG_ENDIAN) + builtin_define ("__big_endian__"); +@@ -4042,6 +4602,10 @@ + The result will be in cfun->machine. */ + nds32_compute_stack_frame (); + ++ /* Check frame_pointer_needed again to prevent fp is need after reload. */ ++ if (frame_pointer_needed) ++ cfun->machine->fp_as_gp_p = false; ++ + /* If this is a variadic function, first we need to push argument + registers that hold the unnamed argument value. */ + if (cfun->machine->va_args_size != 0) +@@ -4066,7 +4630,7 @@ + + /* If the function is 'naked', + we do not have to generate prologue code fragment. */ +- if (cfun->machine->naked_p) ++ if (cfun->machine->naked_p && !flag_pic) + return; + + /* Get callee_first_regno and callee_last_regno. */ +@@ -4195,9 +4759,15 @@ + -1 * sp_adjust); + } + +- /* Prevent the instruction scheduler from +- moving instructions across the boundary. */ +- emit_insn (gen_blockage ()); ++ /* Emit gp setup instructions for -fpic. */ ++ if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)) ++ nds32_emit_load_gp (); ++ ++ /* If user applies -mno-sched-prolog-epilog option, ++ we need to prevent instructions of function body from being ++ scheduled with stack adjustment in prologue. */ ++ if (!flag_sched_prolog_epilog) ++ emit_insn (gen_blockage ()); + } + + /* Function for normal multiple pop epilogue. */ +@@ -4211,9 +4781,11 @@ + The result will be in cfun->machine. */ + nds32_compute_stack_frame (); + +- /* Prevent the instruction scheduler from +- moving instructions across the boundary. */ +- emit_insn (gen_blockage ()); ++ /* If user applies -mno-sched-prolog-epilog option, ++ we need to prevent instructions of function body from being ++ scheduled with stack adjustment in epilogue. */ ++ if (!flag_sched_prolog_epilog) ++ emit_insn (gen_blockage ()); + + /* If the function is 'naked', we do not have to generate + epilogue code fragment BUT 'ret' instruction. +@@ -4239,7 +4811,16 @@ + /* Generate return instruction by using 'return_internal' pattern. + Make sure this instruction is after gen_blockage(). */ + if (!sibcall_p) +- emit_jump_insn (gen_return_internal ()); ++ { ++ /* We need to further check attributes to determine whether ++ there should be return instruction at epilogue. ++ If the attribute naked exists but -mno-ret-in-naked-func ++ is issued, there is NO need to generate return instruction. */ ++ if (cfun->machine->attr_naked_p && !flag_ret_in_naked_func) ++ return; ++ ++ emit_jump_insn (gen_return_internal ()); ++ } + return; + } + +@@ -4436,9 +5017,13 @@ + if (cfun->machine->callee_saved_gpr_regs_size > 0) + df_set_regs_ever_live (FP_REGNUM, 1); + ++ /* Check frame_pointer_needed again to prevent fp is need after reload. */ ++ if (frame_pointer_needed) ++ cfun->machine->fp_as_gp_p = false; ++ + /* If the function is 'naked', + we do not have to generate prologue code fragment. */ +- if (cfun->machine->naked_p) ++ if (cfun->machine->naked_p && !flag_pic) + return; + + /* Get callee_first_regno and callee_last_regno. */ +@@ -4566,6 +5151,10 @@ + -1 * sp_adjust); + } + ++ /* Emit gp setup instructions for -fpic. */ ++ if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)) ++ nds32_emit_load_gp (); ++ + /* Prevent the instruction scheduler from + moving instructions across the boundary. */ + emit_insn (gen_blockage ()); +@@ -4591,9 +5180,19 @@ + if (cfun->machine->naked_p) + { + /* Generate return instruction by using 'return_internal' pattern. +- Make sure this instruction is after gen_blockage(). */ ++ Make sure this instruction is after gen_blockage(). ++ First we need to check this is a function without sibling call. */ + if (!sibcall_p) +- emit_jump_insn (gen_return_internal ()); ++ { ++ /* We need to further check attributes to determine whether ++ there should be return instruction at epilogue. ++ If the attribute naked exists but -mno-ret-in-naked-func ++ is issued, there is NO need to generate return instruction. */ ++ if (cfun->machine->attr_naked_p && !flag_ret_in_naked_func) ++ return; ++ ++ emit_jump_insn (gen_return_internal ()); ++ } + return; + } + +@@ -4757,6 +5356,11 @@ + if (!reload_completed) + return 0; + ++ /* If attribute 'naked' appears but -mno-ret-in-naked-func is used, ++ we cannot use return instruction. */ ++ if (cfun->machine->attr_naked_p && !flag_ret_in_naked_func) ++ return 0; ++ + sp_adjust = cfun->machine->local_size + + cfun->machine->out_args_size + + cfun->machine->callee_saved_area_gpr_padding_bytes +@@ -5010,6 +5614,9 @@ + #undef TARGET_FUNCTION_ARG_BOUNDARY + #define TARGET_FUNCTION_ARG_BOUNDARY nds32_function_arg_boundary + ++#undef TARGET_VECTOR_MODE_SUPPORTED_P ++#define TARGET_VECTOR_MODE_SUPPORTED_P nds32_vector_mode_supported_p ++ + /* -- How Scalar Function Values Are Returned. */ + + #undef TARGET_FUNCTION_VALUE +@@ -5087,6 +5694,21 @@ + #undef TARGET_LEGITIMATE_ADDRESS_P + #define TARGET_LEGITIMATE_ADDRESS_P nds32_legitimate_address_p + ++#undef TARGET_LEGITIMIZE_ADDRESS ++#define TARGET_LEGITIMIZE_ADDRESS nds32_legitimize_address ++ ++#undef TARGET_LEGITIMATE_CONSTANT_P ++#define TARGET_LEGITIMATE_CONSTANT_P nds32_legitimate_constant_p ++ ++#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE ++#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE nds32_vectorize_preferred_simd_mode ++ ++#undef TARGET_CANNOT_FORCE_CONST_MEM ++#define TARGET_CANNOT_FORCE_CONST_MEM nds32_cannot_force_const_mem ++ ++#undef TARGET_DELEGITIMIZE_ADDRESS ++#define TARGET_DELEGITIMIZE_ADDRESS nds32_delegitimize_address ++ + + /* Anchored Addresses. */ + +@@ -5147,6 +5769,9 @@ + #undef TARGET_ASM_ALIGNED_SI_OP + #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t" + ++#undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA ++#define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA nds32_asm_output_addr_const_extra ++ + /* -- Output of Uninitialized Variables. */ + + /* -- Output and Generation of Labels. */ +@@ -5216,6 +5841,9 @@ + + /* Emulating TLS. */ + ++#undef TARGET_HAVE_TLS ++#define TARGET_HAVE_TLS TARGET_LINUX_ABI ++ + + /* Defining coprocessor specifics for MIPS targets. */ + +@@ -5243,6 +5871,8 @@ + #undef TARGET_EXPAND_BUILTIN + #define TARGET_EXPAND_BUILTIN nds32_expand_builtin + ++#undef TARGET_INIT_LIBFUNCS ++#define TARGET_INIT_LIBFUNCS nds32_init_libfuncs + + #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P + #define TARGET_USE_BLOCKS_FOR_CONSTANT_P nds32_use_blocks_for_constant_p +diff -urN gcc-8.2.0.orig/gcc/config/nds32/nds32-cost.c gcc-8.2.0/gcc/config/nds32/nds32-cost.c +--- gcc-8.2.0.orig/gcc/config/nds32/nds32-cost.c 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/gcc/config/nds32/nds32-cost.c 2019-01-25 15:38:32.821242637 +0100 +@@ -34,66 +34,379 @@ + #include "optabs.h" /* For GEN_FCN. */ + #include "recog.h" + #include "tm-constrs.h" ++#include "tree-pass.h" + + /* ------------------------------------------------------------------------ */ + +-bool +-nds32_rtx_costs_impl (rtx x, +- machine_mode mode ATTRIBUTE_UNUSED, +- int outer_code, +- int opno ATTRIBUTE_UNUSED, +- int *total, +- bool speed) +-{ +- int code = GET_CODE (x); ++typedef bool (*rtx_cost_func) (rtx, int, int, int, int*); + +- /* According to 'speed', goto suitable cost model section. */ +- if (speed) +- goto performance_cost; +- else +- goto size_cost; +- +- +-performance_cost: +- /* This is section for performance cost model. */ ++struct rtx_cost_model_t { ++ rtx_cost_func speed_prefer; ++ rtx_cost_func size_prefer; ++}; ++ ++static rtx_cost_model_t rtx_cost_model; ++ ++static int insn_size_16bit; /* Initial at nds32_init_rtx_costs. */ ++static const int insn_size_32bit = 4; ++ ++static bool ++nds32_rtx_costs_speed_prefer (rtx x ATTRIBUTE_UNUSED, ++ int code, ++ int outer_code ATTRIBUTE_UNUSED, ++ int opno ATTRIBUTE_UNUSED, ++ int *total) ++{ ++ rtx op0; ++ rtx op1; ++ machine_mode mode = GET_MODE (x); ++ /* Scale cost by mode size. */ ++ int cost = COSTS_N_INSNS (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode)); + +- /* In gcc/rtl.h, the default value of COSTS_N_INSNS(N) is N*4. +- We treat it as 4-cycle cost for each instruction +- under performance consideration. */ + switch (code) + { +- case SET: +- /* For 'SET' rtx, we need to return false +- so that it can recursively calculate costs. */ +- return false; +- + case USE: + /* Used in combine.c as a marker. */ + *total = 0; +- break; ++ return true; ++ ++ case CONST_INT: ++ /* When not optimizing for size, we care more about the cost ++ of hot code, and hot code is often in a loop. If a constant ++ operand needs to be forced into a register, we will often be ++ able to hoist the constant load out of the loop, so the load ++ should not contribute to the cost. */ ++ if (outer_code == SET || outer_code == PLUS) ++ *total = satisfies_constraint_Is20 (x) ? 0 : 4; ++ else if (outer_code == AND || outer_code == IOR || outer_code == XOR ++ || outer_code == MINUS) ++ *total = satisfies_constraint_Iu15 (x) ? 0 : 4; ++ else if (outer_code == ASHIFT || outer_code == ASHIFTRT ++ || outer_code == LSHIFTRT) ++ *total = satisfies_constraint_Iu05 (x) ? 0 : 4; ++ else if (GET_RTX_CLASS (outer_code) == RTX_COMPARE ++ || GET_RTX_CLASS (outer_code) == RTX_COMM_COMPARE) ++ *total = satisfies_constraint_Is16 (x) ? 0 : 4; ++ else ++ *total = COSTS_N_INSNS (1); ++ return true; ++ ++ case CONST: ++ case LO_SUM: ++ case HIGH: ++ case SYMBOL_REF: ++ *total = COSTS_N_INSNS (1); ++ return true; ++ ++ case MEM: ++ *total = COSTS_N_INSNS (1); ++ return true; ++ ++ case SET: ++ op0 = SET_DEST (x); ++ op1 = SET_SRC (x); ++ mode = GET_MODE (op0); ++ /* Scale cost by mode size. */ ++ cost = COSTS_N_INSNS (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode)); ++ ++ switch (GET_CODE (op1)) ++ { ++ case REG: ++ case SUBREG: ++ /* Register move and Store instructions. */ ++ if ((REG_P (op0) || MEM_P (op0)) ++ && GET_MODE_SIZE (mode) <= GET_MODE_SIZE (DImode)) ++ *total = COSTS_N_INSNS (1); ++ else ++ *total = cost; ++ return true; ++ ++ case MEM: ++ /* Load instructions. */ ++ if (REG_P (op0) && GET_MODE_SIZE (mode) <= GET_MODE_SIZE (DImode)) ++ *total = COSTS_N_INSNS (1); ++ else ++ *total = cost; ++ return true; ++ ++ case CONST_INT: ++ /* movi instruction. */ ++ if (REG_P (op0) && GET_MODE_SIZE (mode) < GET_MODE_SIZE (DImode)) ++ { ++ if (satisfies_constraint_Is20 (op1)) ++ *total = COSTS_N_INSNS (1) - 1; ++ else ++ *total = COSTS_N_INSNS (2); ++ } ++ else ++ *total = cost; ++ return true; ++ ++ case CONST: ++ case SYMBOL_REF: ++ case LABEL_REF: ++ /* la instruction. */ ++ if (REG_P (op0) && GET_MODE_SIZE (mode) < GET_MODE_SIZE (DImode)) ++ *total = COSTS_N_INSNS (1) - 1; ++ else ++ *total = cost; ++ return true; ++ case VEC_SELECT: ++ *total = cost; ++ return true; ++ ++ default: ++ *total = cost; ++ return true; ++ } ++ ++ case PLUS: ++ op0 = XEXP (x, 0); ++ op1 = XEXP (x, 1); ++ ++ if (GET_MODE_SIZE (mode) >= GET_MODE_SIZE (DImode)) ++ *total = cost; ++ else if (GET_CODE (op0) == MULT || GET_CODE (op0) == LSHIFTRT ++ || GET_CODE (op1) == MULT || GET_CODE (op1) == LSHIFTRT) ++ /* ALU_SHIFT */ ++ *total = COSTS_N_INSNS (2); ++ ++ else if ((GET_CODE (op1) == CONST_INT ++ && satisfies_constraint_Is15 (op1)) ++ || REG_P (op1)) ++ /* ADD instructions */ ++ *total = COSTS_N_INSNS (1); ++ else ++ /* ADD instructions: IMM out of range. */ ++ *total = COSTS_N_INSNS (2); ++ return true; ++ ++ case MINUS: ++ op0 = XEXP (x, 0); ++ op1 = XEXP (x, 1); ++ ++ if (GET_MODE_SIZE (mode) >= GET_MODE_SIZE (DImode)) ++ *total = cost; ++ else if (GET_CODE (op0) == MULT || GET_CODE (op0) == LSHIFTRT ++ || GET_CODE (op1) == MULT || GET_CODE (op1) == LSHIFTRT) ++ /* ALU_SHIFT */ ++ *total = COSTS_N_INSNS (2); ++ else if ((GET_CODE (op0) == CONST_INT ++ && satisfies_constraint_Is15 (op0)) ++ || REG_P (op0)) ++ /* SUB instructions */ ++ *total = COSTS_N_INSNS (1); ++ else ++ /* SUB instructions: IMM out of range. */ ++ *total = COSTS_N_INSNS (2); ++ return true; ++ ++ case TRUNCATE: ++ /* TRUNCATE and AND behavior is same. */ ++ *total = COSTS_N_INSNS (1); ++ return true; ++ ++ case AND: ++ case IOR: ++ case XOR: ++ op0 = XEXP (x, 0); ++ op1 = XEXP (x, 1); ++ ++ if (NDS32_EXT_DSP_P ()) ++ { ++ /* We prefer (and (ior) (ior)) than (ior (and) (and)) for ++ synthetize pk** and insb instruction. */ ++ if (code == AND && GET_CODE (op0) == IOR && GET_CODE (op1) == IOR) ++ return COSTS_N_INSNS (1); ++ ++ if (code == IOR && GET_CODE (op0) == AND && GET_CODE (op1) == AND) ++ return COSTS_N_INSNS (10); ++ } ++ ++ if (GET_MODE_SIZE (mode) >= GET_MODE_SIZE (DImode)) ++ *total = cost; ++ else if (GET_CODE (op0) == ASHIFT || GET_CODE (op0) == LSHIFTRT) ++ *total = COSTS_N_INSNS (2); ++ else if ((GET_CODE (op1) == CONST_INT ++ && satisfies_constraint_Iu15 (op1)) ++ || REG_P (op1)) ++ /* AND, OR, XOR instructions */ ++ *total = COSTS_N_INSNS (1); ++ else if (code == AND || GET_CODE (op0) == NOT) ++ /* BITC instruction */ ++ *total = COSTS_N_INSNS (1); ++ else ++ /* AND, OR, XOR instructions: IMM out of range. */ ++ *total = COSTS_N_INSNS (2); ++ return true; + + case MULT: ++ if (GET_MODE (x) == DImode ++ || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND ++ || GET_CODE (XEXP (x, 1)) == ZERO_EXTEND) ++ /* MUL instructions */ ++ *total = COSTS_N_INSNS (1); ++ else if (GET_MODE_SIZE (mode) >= GET_MODE_SIZE (DImode)) ++ *total = cost; ++ else if (outer_code == PLUS || outer_code == MINUS) ++ *total = COSTS_N_INSNS (2); ++ else if ((GET_CODE (XEXP (x, 1)) == CONST_INT ++ && satisfies_constraint_Iu05 (XEXP (x, 1))) ++ || REG_P (XEXP (x, 1))) ++ /* MUL instructions */ ++ *total = COSTS_N_INSNS (1); ++ else ++ /* MUL instructions: IMM out of range. */ ++ *total = COSTS_N_INSNS (2); ++ ++ if (TARGET_MUL_SLOW) ++ *total += COSTS_N_INSNS (4); ++ ++ return true; ++ ++ case LSHIFTRT: ++ if (GET_MODE_SIZE (mode) >= GET_MODE_SIZE (DImode)) ++ *total = cost; ++ else if (outer_code == PLUS || outer_code == MINUS ++ || outer_code == AND || outer_code == IOR ++ || outer_code == XOR) ++ *total = COSTS_N_INSNS (2); ++ else if ((GET_CODE (XEXP (x, 1)) == CONST_INT ++ && satisfies_constraint_Iu05 (XEXP (x, 1))) ++ || REG_P (XEXP (x, 1))) ++ /* SRL instructions */ ++ *total = COSTS_N_INSNS (1); ++ else ++ /* SRL instructions: IMM out of range. */ ++ *total = COSTS_N_INSNS (2); ++ return true; ++ ++ case ASHIFT: ++ if (GET_MODE_SIZE (mode) >= GET_MODE_SIZE (DImode)) ++ *total = cost; ++ else if (outer_code == AND || outer_code == IOR ++ || outer_code == XOR) ++ *total = COSTS_N_INSNS (2); ++ else if ((GET_CODE (XEXP (x, 1)) == CONST_INT ++ && satisfies_constraint_Iu05 (XEXP (x, 1))) ++ || REG_P (XEXP (x, 1))) ++ /* SLL instructions */ ++ *total = COSTS_N_INSNS (1); ++ else ++ /* SLL instructions: IMM out of range. */ ++ *total = COSTS_N_INSNS (2); ++ return true; ++ ++ case ASHIFTRT: ++ case ROTATERT: ++ if (GET_MODE_SIZE (mode) >= GET_MODE_SIZE (DImode)) ++ *total = cost; ++ else if ((GET_CODE (XEXP (x, 1)) == CONST_INT ++ && satisfies_constraint_Iu05 (XEXP (x, 1))) ++ || REG_P (XEXP (x, 1))) ++ /* ROTR, SLL instructions */ ++ *total = COSTS_N_INSNS (1); ++ else ++ /* ROTR, SLL instructions: IMM out of range. */ ++ *total = COSTS_N_INSNS (2); ++ return true; ++ ++ case LT: ++ case LTU: ++ if (outer_code == SET) ++ { ++ if ((GET_CODE (XEXP (x, 1)) == CONST_INT ++ && satisfies_constraint_Iu15 (XEXP (x, 1))) ++ || REG_P (XEXP (x, 1))) ++ /* SLT, SLTI instructions */ ++ *total = COSTS_N_INSNS (1); ++ else ++ /* SLT, SLT instructions: IMM out of range. */ ++ *total = COSTS_N_INSNS (2); ++ } ++ else ++ /* branch */ ++ *total = COSTS_N_INSNS (2); ++ return true; ++ ++ case EQ: ++ case NE: ++ case GE: ++ case LE: ++ case GT: ++ /* branch */ ++ *total = COSTS_N_INSNS (2); ++ return true; ++ ++ case IF_THEN_ELSE: ++ if (GET_CODE (XEXP (x, 1)) == LABEL_REF) ++ /* branch */ ++ *total = COSTS_N_INSNS (2); ++ else ++ /* cmovz, cmovn instructions */ ++ *total = COSTS_N_INSNS (1); ++ return true; ++ ++ case LABEL_REF: ++ if (outer_code == IF_THEN_ELSE) ++ /* branch */ ++ *total = COSTS_N_INSNS (2); ++ else ++ *total = COSTS_N_INSNS (1); ++ return true; ++ ++ case ZERO_EXTEND: ++ case SIGN_EXTEND: ++ if (MEM_P (XEXP (x, 0))) ++ /* Using memory access. */ ++ *total = COSTS_N_INSNS (1); ++ else ++ /* Zero extend and sign extend instructions. */ ++ *total = COSTS_N_INSNS (1); ++ return true; ++ ++ case NEG: ++ case NOT: + *total = COSTS_N_INSNS (1); +- break; ++ return true; + + case DIV: + case UDIV: + case MOD: + case UMOD: +- *total = COSTS_N_INSNS (7); +- break; +- +- default: +- *total = COSTS_N_INSNS (1); +- break; +- } ++ *total = COSTS_N_INSNS (20); ++ return true; + +- return true; ++ case CALL: ++ *total = COSTS_N_INSNS (2); ++ return true; + ++ case CLZ: ++ case SMIN: ++ case SMAX: ++ case ZERO_EXTRACT: ++ if (TARGET_EXT_PERF) ++ *total = COSTS_N_INSNS (1); ++ else ++ *total = COSTS_N_INSNS (3); ++ return true; ++ case VEC_SELECT: ++ *total = COSTS_N_INSNS (1); ++ return true; + +-size_cost: +- /* This is section for size cost model. */ ++ default: ++ *total = COSTS_N_INSNS (3); ++ return true; ++ } ++} + ++static bool ++nds32_rtx_costs_size_prefer (rtx x, ++ int code, ++ int outer_code, ++ int opno ATTRIBUTE_UNUSED, ++ int *total) ++{ + /* In gcc/rtl.h, the default value of COSTS_N_INSNS(N) is N*4. + We treat it as 4-byte cost for each instruction + under code size consideration. */ +@@ -118,85 +431,162 @@ + (set X imm20s), use movi, 4-byte cost. + (set X BIG_INT), use sethi/ori, 8-byte cost. */ + if (satisfies_constraint_Is05 (x)) +- *total = COSTS_N_INSNS (1) - 2; ++ *total = insn_size_16bit; + else if (satisfies_constraint_Is20 (x)) +- *total = COSTS_N_INSNS (1); ++ *total = insn_size_32bit; + else +- *total = COSTS_N_INSNS (2); ++ *total = insn_size_32bit * 2; + } + else if (outer_code == PLUS || outer_code == MINUS) + { + /* Possible addi333/subi333 or subi45/addi45, 2-byte cost. + General case, cost 1 instruction with 4-byte. */ + if (satisfies_constraint_Iu05 (x)) +- *total = COSTS_N_INSNS (1) - 2; ++ *total = insn_size_16bit; + else +- *total = COSTS_N_INSNS (1); ++ *total = insn_size_32bit; + } + else if (outer_code == ASHIFT) + { + /* Possible slli333, 2-byte cost. + General case, cost 1 instruction with 4-byte. */ + if (satisfies_constraint_Iu03 (x)) +- *total = COSTS_N_INSNS (1) - 2; ++ *total = insn_size_16bit; + else +- *total = COSTS_N_INSNS (1); ++ *total = insn_size_32bit; + } + else if (outer_code == ASHIFTRT || outer_code == LSHIFTRT) + { + /* Possible srai45 or srli45, 2-byte cost. + General case, cost 1 instruction with 4-byte. */ + if (satisfies_constraint_Iu05 (x)) +- *total = COSTS_N_INSNS (1) - 2; ++ *total = insn_size_16bit; + else +- *total = COSTS_N_INSNS (1); ++ *total = insn_size_32bit; + } + else + { + /* For other cases, simply set it 4-byte cost. */ +- *total = COSTS_N_INSNS (1); ++ *total = insn_size_32bit; + } + break; + + case CONST_DOUBLE: + /* It requires high part and low part processing, set it 8-byte cost. */ +- *total = COSTS_N_INSNS (2); ++ *total = insn_size_32bit * 2; ++ break; ++ ++ case CONST: ++ case SYMBOL_REF: ++ *total = insn_size_32bit * 2; + break; + + default: + /* For other cases, generally we set it 4-byte cost +- and stop resurively traversing. */ +- *total = COSTS_N_INSNS (1); ++ and stop resurively traversing. */ ++ *total = insn_size_32bit; + break; + } + + return true; + } + +-int +-nds32_address_cost_impl (rtx address, +- machine_mode mode ATTRIBUTE_UNUSED, +- addr_space_t as ATTRIBUTE_UNUSED, +- bool speed) ++void ++nds32_init_rtx_costs (void) ++{ ++ rtx_cost_model.speed_prefer = nds32_rtx_costs_speed_prefer; ++ rtx_cost_model.size_prefer = nds32_rtx_costs_size_prefer; ++ ++ if (TARGET_16_BIT) ++ insn_size_16bit = 2; ++ else ++ insn_size_16bit = 4; ++} ++ ++/* This target hook describes the relative costs of RTL expressions. ++ Return 'true' when all subexpressions of x have been processed. ++ Return 'false' to sum the costs of sub-rtx, plus cost of this operation. ++ Refer to gcc/rtlanal.c for more information. */ ++bool ++nds32_rtx_costs_impl (rtx x, ++ machine_mode mode ATTRIBUTE_UNUSED, ++ int outer_code, ++ int opno, ++ int *total, ++ bool speed) ++{ ++ int code = GET_CODE (x); ++ ++ /* According to 'speed', use suitable cost model section. */ ++ if (speed) ++ return rtx_cost_model.speed_prefer(x, code, outer_code, opno, total); ++ else ++ return rtx_cost_model.size_prefer(x, code, outer_code, opno, total); ++} ++ ++ ++int nds32_address_cost_speed_prefer (rtx address) + { + rtx plus0, plus1; + enum rtx_code code; + + code = GET_CODE (address); + +- /* According to 'speed', goto suitable cost model section. */ +- if (speed) +- goto performance_cost; +- else +- goto size_cost; ++ switch (code) ++ { ++ case POST_MODIFY: ++ case POST_INC: ++ case POST_DEC: ++ /* We encourage that rtx contains ++ POST_MODIFY/POST_INC/POST_DEC behavior. */ ++ return COSTS_N_INSNS (1) - 2; ++ ++ case SYMBOL_REF: ++ /* We can have gp-relative load/store for symbol_ref. ++ Have it 4-byte cost. */ ++ return COSTS_N_INSNS (2); ++ ++ case CONST: ++ /* It is supposed to be the pattern (const (plus symbol_ref const_int)). ++ Have it 4-byte cost. */ ++ return COSTS_N_INSNS (2); ++ ++ case REG: ++ /* Simply return 4-byte costs. */ ++ return COSTS_N_INSNS (1) - 2; ++ ++ case PLUS: ++ /* We do not need to check if the address is a legitimate address, ++ because this hook is never called with an invalid address. ++ But we better check the range of ++ const_int value for cost, if it exists. */ ++ plus0 = XEXP (address, 0); ++ plus1 = XEXP (address, 1); + +-performance_cost: +- /* This is section for performance cost model. */ ++ if (REG_P (plus0) && CONST_INT_P (plus1)) ++ return COSTS_N_INSNS (1) - 2; ++ else if (ARITHMETIC_P (plus0) || ARITHMETIC_P (plus1)) ++ return COSTS_N_INSNS (1) - 1; ++ else if (REG_P (plus0) && REG_P (plus1)) ++ return COSTS_N_INSNS (1); + +- /* FALLTHRU, currently we use same cost model as size_cost. */ ++ /* For other 'plus' situation, make it cost 4-byte. */ ++ return COSTS_N_INSNS (1); + +-size_cost: +- /* This is section for size cost model. */ ++ default: ++ break; ++ } ++ ++ return COSTS_N_INSNS (4); ++ ++} ++ ++int nds32_address_cost_speed_fwprop (rtx address) ++{ ++ rtx plus0, plus1; ++ enum rtx_code code; ++ ++ code = GET_CODE (address); + + switch (code) + { +@@ -210,12 +600,12 @@ + case SYMBOL_REF: + /* We can have gp-relative load/store for symbol_ref. + Have it 4-byte cost. */ +- return COSTS_N_INSNS (1); ++ return COSTS_N_INSNS (2); + + case CONST: + /* It is supposed to be the pattern (const (plus symbol_ref const_int)). + Have it 4-byte cost. */ +- return COSTS_N_INSNS (1); ++ return COSTS_N_INSNS (2); + + case REG: + /* Simply return 4-byte costs. */ +@@ -233,11 +623,78 @@ + { + /* If it is possible to be lwi333/swi333 form, + make it 2-byte cost. */ +- if (satisfies_constraint_Iu05 (plus1)) ++ if (satisfies_constraint_Iu03 (plus1)) + return (COSTS_N_INSNS (1) - 2); + else + return COSTS_N_INSNS (1); + } ++ if (ARITHMETIC_P (plus0) || ARITHMETIC_P (plus1)) ++ return COSTS_N_INSNS (1) - 2; ++ else if (REG_P (plus0) && REG_P (plus1)) ++ return COSTS_N_INSNS (1); ++ ++ /* For other 'plus' situation, make it cost 4-byte. */ ++ return COSTS_N_INSNS (1); ++ ++ default: ++ break; ++ } ++ ++ return COSTS_N_INSNS (4); ++} ++ ++ ++int nds32_address_cost_size_prefer (rtx address) ++{ ++ rtx plus0, plus1; ++ enum rtx_code code; ++ ++ code = GET_CODE (address); ++ ++ switch (code) ++ { ++ case POST_MODIFY: ++ case POST_INC: ++ case POST_DEC: ++ /* We encourage that rtx contains ++ POST_MODIFY/POST_INC/POST_DEC behavior. */ ++ return 0; ++ ++ case SYMBOL_REF: ++ /* We can have gp-relative load/store for symbol_ref. ++ Have it 4-byte cost. */ ++ return COSTS_N_INSNS (2); ++ ++ case CONST: ++ /* It is supposed to be the pattern (const (plus symbol_ref const_int)). ++ Have it 4-byte cost. */ ++ return COSTS_N_INSNS (2); ++ ++ case REG: ++ /* Simply return 4-byte costs. */ ++ return COSTS_N_INSNS (1) - 1; ++ ++ case PLUS: ++ /* We do not need to check if the address is a legitimate address, ++ because this hook is never called with an invalid address. ++ But we better check the range of ++ const_int value for cost, if it exists. */ ++ plus0 = XEXP (address, 0); ++ plus1 = XEXP (address, 1); ++ ++ if (REG_P (plus0) && CONST_INT_P (plus1)) ++ { ++ /* If it is possible to be lwi333/swi333 form, ++ make it 2-byte cost. */ ++ if (satisfies_constraint_Iu03 (plus1)) ++ return (COSTS_N_INSNS (1) - 2); ++ else ++ return COSTS_N_INSNS (1) - 1; ++ } ++ ++ /* (plus (reg) (mult (reg) (const))) */ ++ if (ARITHMETIC_P (plus0) || ARITHMETIC_P (plus1)) ++ return (COSTS_N_INSNS (1) - 1); + + /* For other 'plus' situation, make it cost 4-byte. */ + return COSTS_N_INSNS (1); +@@ -247,6 +704,23 @@ + } + + return COSTS_N_INSNS (4); ++ ++} ++ ++int nds32_address_cost_impl (rtx address, ++ machine_mode mode ATTRIBUTE_UNUSED, ++ addr_space_t as ATTRIBUTE_UNUSED, ++ bool speed_p) ++{ ++ if (speed_p) ++ { ++ if (current_pass->tv_id == TV_FWPROP) ++ return nds32_address_cost_speed_fwprop (address); ++ else ++ return nds32_address_cost_speed_prefer (address); ++ } ++ else ++ return nds32_address_cost_size_prefer (address); + } + + /* ------------------------------------------------------------------------ */ +diff -urN gcc-8.2.0.orig/gcc/config/nds32/nds32-doubleword.md gcc-8.2.0/gcc/config/nds32/nds32-doubleword.md +--- gcc-8.2.0.orig/gcc/config/nds32/nds32-doubleword.md 2018-05-07 04:09:58.000000000 +0200 ++++ gcc-8.2.0/gcc/config/nds32/nds32-doubleword.md 2019-01-25 15:38:32.821242637 +0100 +@@ -136,10 +136,28 @@ + } + ) + ++;; Split move_di pattern when the hard register is odd. ++(define_split ++ [(set (match_operand:DIDF 0 "register_operand" "") ++ (match_operand:DIDF 1 "register_operand" ""))] ++ "(NDS32_IS_GPR_REGNUM (REGNO (operands[0])) ++ && ((REGNO (operands[0]) & 0x1) == 1)) ++ || (NDS32_IS_GPR_REGNUM (REGNO (operands[1])) ++ && ((REGNO (operands[1]) & 0x1) == 1))" ++ [(set (match_dup 2) (match_dup 3)) ++ (set (match_dup 4) (match_dup 5))] ++ { ++ operands[2] = gen_lowpart (SImode, operands[0]); ++ operands[4] = gen_highpart (SImode, operands[0]); ++ operands[3] = gen_lowpart (SImode, operands[1]); ++ operands[5] = gen_highpart (SImode, operands[1]); ++ } ++) ++ + (define_split + [(set (match_operand:DIDF 0 "register_operand" "") + (match_operand:DIDF 1 "const_double_operand" ""))] +- "reload_completed" ++ "flag_pic || reload_completed" + [(set (match_dup 2) (match_dup 3)) + (set (match_dup 4) (match_dup 5))] + { +diff -urN gcc-8.2.0.orig/gcc/config/nds32/nds32-dspext.md gcc-8.2.0/gcc/config/nds32/nds32-dspext.md +--- gcc-8.2.0.orig/gcc/config/nds32/nds32-dspext.md 1970-01-01 01:00:00.000000000 +0100 ++++ gcc-8.2.0/gcc/config/nds32/nds32-dspext.md 2019-01-25 15:38:32.825242648 +0100 +@@ -0,0 +1,5278 @@ ++;; Machine description of Andes NDS32 cpu for GNU compiler ++;; Copyright (C) 2012-2018 Free Software Foundation, Inc. ++;; Contributed by Andes Technology Corporation. ++;; ++;; This file is part of GCC. ++;; ++;; GCC is free software; you can redistribute it and/or modify it ++;; under the terms of the GNU General Public License as published ++;; by the Free Software Foundation; either version 3, or (at your ++;; option) any later version. ++;; ++;; GCC is distributed in the hope that it will be useful, but WITHOUT ++;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ++;; License for more details. ++;; ++;; You should have received a copy of the GNU General Public License ++;; along with GCC; see the file COPYING3. If not see ++;; <http://www.gnu.org/licenses/>. ++ ++(define_expand "mov<mode>" ++ [(set (match_operand:VQIHI 0 "general_operand" "") ++ (match_operand:VQIHI 1 "general_operand" ""))] ++ "NDS32_EXT_DSP_P ()" ++{ ++ /* Need to force register if mem <- !reg. */ ++ if (MEM_P (operands[0]) && !REG_P (operands[1])) ++ operands[1] = force_reg (<MODE>mode, operands[1]); ++ ++ /* If operands[1] is a large constant and cannot be performed ++ by a single instruction, we need to split it. */ ++ if (GET_CODE (operands[1]) == CONST_VECTOR ++ && !satisfies_constraint_CVs2 (operands[1]) ++ && !satisfies_constraint_CVhi (operands[1])) ++ { ++ HOST_WIDE_INT ival = const_vector_to_hwint (operands[1]); ++ rtx tmp_rtx; ++ ++ tmp_rtx = can_create_pseudo_p () ++ ? gen_reg_rtx (SImode) ++ : simplify_gen_subreg (SImode, operands[0], <MODE>mode, 0); ++ ++ emit_move_insn (tmp_rtx, gen_int_mode (ival, SImode)); ++ convert_move (operands[0], tmp_rtx, false); ++ DONE; ++ } ++ ++ if (REG_P (operands[0]) && SYMBOLIC_CONST_P (operands[1])) ++ { ++ if (nds32_tls_referenced_p (operands [1])) ++ { ++ nds32_expand_tls_move (operands); ++ DONE; ++ } ++ else if (flag_pic) ++ { ++ nds32_expand_pic_move (operands); ++ DONE; ++ } ++ } ++}) ++ ++(define_insn "*mov<mode>" ++ [(set (match_operand:VQIHI 0 "nonimmediate_operand" "=r, r,$U45,$U33,$U37,$U45, m,$ l,$ l,$ l,$ d, d, r,$ d, r, r, r, *f, *f, r, *f, Q") ++ (match_operand:VQIHI 1 "nds32_vmove_operand" " r, r, l, l, l, d, r, U45, U33, U37, U45,Ufe, m, CVp5, CVs5, CVs2, CVhi, *f, r, *f, Q, *f"))] ++ "NDS32_EXT_DSP_P () ++ && (register_operand(operands[0], <MODE>mode) ++ || register_operand(operands[1], <MODE>mode))" ++{ ++ switch (which_alternative) ++ { ++ case 0: ++ return "mov55\t%0, %1"; ++ case 1: ++ return "ori\t%0, %1, 0"; ++ case 2: ++ case 3: ++ case 4: ++ case 5: ++ return nds32_output_16bit_store (operands, <byte>); ++ case 6: ++ return nds32_output_32bit_store (operands, <byte>); ++ case 7: ++ case 8: ++ case 9: ++ case 10: ++ case 11: ++ return nds32_output_16bit_load (operands, <byte>); ++ case 12: ++ return nds32_output_32bit_load (operands, <byte>); ++ case 13: ++ return "movpi45\t%0, %1"; ++ case 14: ++ return "movi55\t%0, %1"; ++ case 15: ++ return "movi\t%0, %1"; ++ case 16: ++ return "sethi\t%0, hi20(%1)"; ++ case 17: ++ if (TARGET_FPU_SINGLE) ++ return "fcpyss\t%0, %1, %1"; ++ else ++ return "#"; ++ case 18: ++ return "fmtsr\t%1, %0"; ++ case 19: ++ return "fmfsr\t%0, %1"; ++ case 20: ++ return nds32_output_float_load (operands); ++ case 21: ++ return nds32_output_float_store (operands); ++ default: ++ gcc_unreachable (); ++ } ++} ++ [(set_attr "type" "alu,alu,store,store,store,store,store,load,load,load,load,load,load,alu,alu,alu,alu,fcpy,fmtsr,fmfsr,fload,fstore") ++ (set_attr "length" " 2, 4, 2, 2, 2, 2, 4, 2, 2, 2, 2, 2, 4, 2, 2, 4, 4, 4, 4, 4, 4, 4") ++ (set_attr "feature" " v1, v1, v1, v1, v1, v1, v1, v1, v1, v1, v1, v3m, v1, v1, v1, v1, v1, fpu, fpu, fpu, fpu, fpu")]) ++ ++(define_expand "movv2si" ++ [(set (match_operand:V2SI 0 "general_operand" "") ++ (match_operand:V2SI 1 "general_operand" ""))] ++ "NDS32_EXT_DSP_P ()" ++{ ++ /* Need to force register if mem <- !reg. */ ++ if (MEM_P (operands[0]) && !REG_P (operands[1])) ++ operands[1] = force_reg (V2SImode, operands[1]); ++}) ++ ++(define_insn "*movv2si" ++ [(set (match_operand:V2SI 0 "nonimmediate_operand" "=r, r, r, r, Da, m, f, Q, f, r, f") ++ (match_operand:V2SI 1 "general_operand" " r, i, Da, m, r, r, Q, f, f, f, r"))] ++ "NDS32_EXT_DSP_P () ++ && (register_operand(operands[0], V2SImode) ++ || register_operand(operands[1], V2SImode))" ++{ ++ switch (which_alternative) ++ { ++ case 0: ++ return "movd44\t%0, %1"; ++ case 1: ++ /* reg <- const_int, we ask gcc to split instruction. */ ++ return "#"; ++ case 2: ++ /* The memory format is (mem (reg)), ++ we can generate 'lmw.bi' instruction. */ ++ return nds32_output_double (operands, true); ++ case 3: ++ /* We haven't 64-bit load instruction, ++ we split this pattern to two SImode pattern. */ ++ return "#"; ++ case 4: ++ /* The memory format is (mem (reg)), ++ we can generate 'smw.bi' instruction. */ ++ return nds32_output_double (operands, false); ++ case 5: ++ /* We haven't 64-bit store instruction, ++ we split this pattern to two SImode pattern. */ ++ return "#"; ++ case 6: ++ return nds32_output_float_load (operands); ++ case 7: ++ return nds32_output_float_store (operands); ++ case 8: ++ return "fcpysd\t%0, %1, %1"; ++ case 9: ++ return "fmfdr\t%0, %1"; ++ case 10: ++ return "fmtdr\t%1, %0"; ++ default: ++ gcc_unreachable (); ++ } ++} ++ [(set_attr "type" "alu,alu,load,load,store,store,unknown,unknown,unknown,unknown,unknown") ++ (set_attr_alternative "length" ++ [ ++ ;; Alternative 0 ++ (if_then_else (match_test "!TARGET_16_BIT") ++ (const_int 4) ++ (const_int 2)) ++ ;; Alternative 1 ++ (const_int 16) ++ ;; Alternative 2 ++ (const_int 4) ++ ;; Alternative 3 ++ (const_int 8) ++ ;; Alternative 4 ++ (const_int 4) ++ ;; Alternative 5 ++ (const_int 8) ++ ;; Alternative 6 ++ (const_int 4) ++ ;; Alternative 7 ++ (const_int 4) ++ ;; Alternative 8 ++ (const_int 4) ++ ;; Alternative 9 ++ (const_int 4) ++ ;; Alternative 10 ++ (const_int 4) ++ ]) ++ (set_attr "feature" " v1, v1, v1, v1, v1, v1, fpu, fpu, fpu, fpu, fpu")]) ++ ++(define_expand "movmisalign<mode>" ++ [(set (match_operand:VQIHI 0 "general_operand" "") ++ (match_operand:VQIHI 1 "general_operand" ""))] ++ "NDS32_EXT_DSP_P ()" ++{ ++ rtx addr; ++ if (MEM_P (operands[0]) && !REG_P (operands[1])) ++ operands[1] = force_reg (<MODE>mode, operands[1]); ++ ++ if (MEM_P (operands[0])) ++ { ++ addr = force_reg (Pmode, XEXP (operands[0], 0)); ++ emit_insn (gen_unaligned_store<mode> (addr, operands[1])); ++ } ++ else ++ { ++ addr = force_reg (Pmode, XEXP (operands[1], 0)); ++ emit_insn (gen_unaligned_load<mode> (operands[0], addr)); ++ } ++ DONE; ++}) ++ ++(define_expand "unaligned_load<mode>" ++ [(set (match_operand:VQIHI 0 "register_operand" "=r") ++ (unspec:VQIHI [(mem:VQIHI (match_operand:SI 1 "register_operand" "r"))] UNSPEC_UALOAD_W))] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_ISA_V3M) ++ nds32_expand_unaligned_load (operands, <MODE>mode); ++ else ++ emit_insn (gen_unaligned_load_w<mode> (operands[0], gen_rtx_MEM (<MODE>mode, operands[1]))); ++ DONE; ++}) ++ ++(define_insn "unaligned_load_w<mode>" ++ [(set (match_operand:VQIHI 0 "register_operand" "= r") ++ (unspec:VQIHI [(match_operand:VQIHI 1 "nds32_lmw_smw_base_operand" " Umw")] UNSPEC_UALOAD_W))] ++ "NDS32_EXT_DSP_P ()" ++{ ++ return nds32_output_lmw_single_word (operands); ++} ++ [(set_attr "type" "load") ++ (set_attr "length" "4")] ++) ++ ++(define_expand "unaligned_store<mode>" ++ [(set (mem:VQIHI (match_operand:SI 0 "register_operand" "r")) ++ (unspec:VQIHI [(match_operand:VQIHI 1 "register_operand" "r")] UNSPEC_UASTORE_W))] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_ISA_V3M) ++ nds32_expand_unaligned_store (operands, <MODE>mode); ++ else ++ emit_insn (gen_unaligned_store_w<mode> (gen_rtx_MEM (<MODE>mode, operands[0]), operands[1])); ++ DONE; ++}) ++ ++(define_insn "unaligned_store_w<mode>" ++ [(set (match_operand:VQIHI 0 "nds32_lmw_smw_base_operand" "=Umw") ++ (unspec:VQIHI [(match_operand:VQIHI 1 "register_operand" " r")] UNSPEC_UASTORE_W))] ++ "NDS32_EXT_DSP_P ()" ++{ ++ return nds32_output_smw_single_word (operands); ++} ++ [(set_attr "type" "store") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "<uk>add<mode>3" ++ [(set (match_operand:VQIHI 0 "register_operand" "=r") ++ (all_plus:VQIHI (match_operand:VQIHI 1 "register_operand" " r") ++ (match_operand:VQIHI 2 "register_operand" " r")))] ++ "NDS32_EXT_DSP_P ()" ++ "<uk>add<bits> %0, %1, %2" ++ [(set_attr "type" "dalu") ++ (set_attr "length" "4") ++ (set_attr "feature" "v1")]) ++ ++(define_insn "<uk>adddi3" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (all_plus:DI (match_operand:DI 1 "register_operand" " r") ++ (match_operand:DI 2 "register_operand" " r")))] ++ "NDS32_EXT_DSP_P ()" ++ "<uk>add64 %0, %1, %2" ++ [(set_attr "type" "dalu64") ++ (set_attr "length" "4") ++ (set_attr "feature" "v1")]) ++ ++(define_insn "raddv4qi3" ++ [(set (match_operand:V4QI 0 "register_operand" "=r") ++ (truncate:V4QI ++ (ashiftrt:V4HI ++ (plus:V4HI (sign_extend:V4HI (match_operand:V4QI 1 "register_operand" " r")) ++ (sign_extend:V4HI (match_operand:V4QI 2 "register_operand" " r"))) ++ (const_int 1))))] ++ "NDS32_EXT_DSP_P ()" ++ "radd8\t%0, %1, %2" ++ [(set_attr "type" "dalu") ++ (set_attr "length" "4") ++ (set_attr "feature" "v1")]) ++ ++ ++(define_insn "uraddv4qi3" ++ [(set (match_operand:V4QI 0 "register_operand" "=r") ++ (truncate:V4QI ++ (lshiftrt:V4HI ++ (plus:V4HI (zero_extend:V4HI (match_operand:V4QI 1 "register_operand" " r")) ++ (zero_extend:V4HI (match_operand:V4QI 2 "register_operand" " r"))) ++ (const_int 1))))] ++ "NDS32_EXT_DSP_P ()" ++ "uradd8\t%0, %1, %2" ++ [(set_attr "type" "dalu") ++ (set_attr "length" "4") ++ (set_attr "feature" "v1")]) ++ ++(define_insn "raddv2hi3" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (truncate:V2HI ++ (ashiftrt:V2SI ++ (plus:V2SI (sign_extend:V2SI (match_operand:V2HI 1 "register_operand" " r")) ++ (sign_extend:V2SI (match_operand:V2HI 2 "register_operand" " r"))) ++ (const_int 1))))] ++ "NDS32_EXT_DSP_P ()" ++ "radd16\t%0, %1, %2" ++ [(set_attr "type" "dalu") ++ (set_attr "length" "4") ++ (set_attr "feature" "v1")]) ++ ++(define_insn "uraddv2hi3" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (truncate:V2HI ++ (lshiftrt:V2SI ++ (plus:V2SI (zero_extend:V2SI (match_operand:V2HI 1 "register_operand" " r")) ++ (zero_extend:V2SI (match_operand:V2HI 2 "register_operand" " r"))) ++ (const_int 1))))] ++ "NDS32_EXT_DSP_P ()" ++ "uradd16\t%0, %1, %2" ++ [(set_attr "type" "dalu") ++ (set_attr "length" "4") ++ (set_attr "feature" "v1")]) ++ ++(define_insn "radddi3" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (truncate:DI ++ (ashiftrt:TI ++ (plus:TI (sign_extend:TI (match_operand:DI 1 "register_operand" " r")) ++ (sign_extend:TI (match_operand:DI 2 "register_operand" " r"))) ++ (const_int 1))))] ++ "NDS32_EXT_DSP_P ()" ++ "radd64\t%0, %1, %2" ++ [(set_attr "type" "dalu64") ++ (set_attr "length" "4") ++ (set_attr "feature" "v1")]) ++ ++ ++(define_insn "uradddi3" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (truncate:DI ++ (lshiftrt:TI ++ (plus:TI (zero_extend:TI (match_operand:DI 1 "register_operand" " r")) ++ (zero_extend:TI (match_operand:DI 2 "register_operand" " r"))) ++ (const_int 1))))] ++ "NDS32_EXT_DSP_P ()" ++ "uradd64\t%0, %1, %2" ++ [(set_attr "type" "dalu64") ++ (set_attr "length" "4") ++ (set_attr "feature" "v1")]) ++ ++(define_insn "<uk>sub<mode>3" ++ [(set (match_operand:VQIHI 0 "register_operand" "=r") ++ (all_minus:VQIHI (match_operand:VQIHI 1 "register_operand" " r") ++ (match_operand:VQIHI 2 "register_operand" " r")))] ++ "NDS32_EXT_DSP_P ()" ++ "<uk>sub<bits> %0, %1, %2" ++ [(set_attr "type" "dalu") ++ (set_attr "length" "4") ++ (set_attr "feature" "v1")]) ++ ++(define_insn "<uk>subdi3" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (all_minus:DI (match_operand:DI 1 "register_operand" " r") ++ (match_operand:DI 2 "register_operand" " r")))] ++ "NDS32_EXT_DSP_P ()" ++ "<uk>sub64 %0, %1, %2" ++ [(set_attr "type" "dalu64") ++ (set_attr "length" "4") ++ (set_attr "feature" "v1")]) ++ ++(define_insn "rsubv4qi3" ++ [(set (match_operand:V4QI 0 "register_operand" "=r") ++ (truncate:V4QI ++ (ashiftrt:V4HI ++ (minus:V4HI (sign_extend:V4HI (match_operand:V4QI 1 "register_operand" " r")) ++ (sign_extend:V4HI (match_operand:V4QI 2 "register_operand" " r"))) ++ (const_int 1))))] ++ "NDS32_EXT_DSP_P ()" ++ "rsub8\t%0, %1, %2" ++ [(set_attr "type" "dalu") ++ (set_attr "length" "4")]) ++ ++(define_insn "ursubv4qi3" ++ [(set (match_operand:V4QI 0 "register_operand" "=r") ++ (truncate:V4QI ++ (lshiftrt:V4HI ++ (minus:V4HI (zero_extend:V4HI (match_operand:V4QI 1 "register_operand" " r")) ++ (zero_extend:V4HI (match_operand:V4QI 2 "register_operand" " r"))) ++ (const_int 1))))] ++ "NDS32_EXT_DSP_P ()" ++ "ursub8\t%0, %1, %2" ++ [(set_attr "type" "dalu") ++ (set_attr "length" "4")]) ++ ++(define_insn "rsubv2hi3" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (truncate:V2HI ++ (ashiftrt:V2SI ++ (minus:V2SI (sign_extend:V2SI (match_operand:V2HI 1 "register_operand" " r")) ++ (sign_extend:V2SI (match_operand:V2HI 2 "register_operand" " r"))) ++ (const_int 1))))] ++ "NDS32_EXT_DSP_P ()" ++ "rsub16\t%0, %1, %2" ++ [(set_attr "type" "dalu") ++ (set_attr "length" "4")]) ++ ++(define_insn "ursubv2hi3" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (truncate:V2HI ++ (lshiftrt:V2SI ++ (minus:V2SI (zero_extend:V2SI (match_operand:V2HI 1 "register_operand" " r")) ++ (zero_extend:V2SI (match_operand:V2HI 2 "register_operand" " r"))) ++ (const_int 1))))] ++ "NDS32_EXT_DSP_P ()" ++ "ursub16\t%0, %1, %2" ++ [(set_attr "type" "dalu") ++ (set_attr "length" "4")]) ++ ++(define_insn "rsubdi3" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (truncate:DI ++ (ashiftrt:TI ++ (minus:TI (sign_extend:TI (match_operand:DI 1 "register_operand" " r")) ++ (sign_extend:TI (match_operand:DI 2 "register_operand" " r"))) ++ (const_int 1))))] ++ "NDS32_EXT_DSP_P ()" ++ "rsub64\t%0, %1, %2" ++ [(set_attr "type" "dalu64") ++ (set_attr "length" "4")]) ++ ++ ++(define_insn "ursubdi3" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (truncate:DI ++ (lshiftrt:TI ++ (minus:TI (zero_extend:TI (match_operand:DI 1 "register_operand" " r")) ++ (zero_extend:TI (match_operand:DI 2 "register_operand" " r"))) ++ (const_int 1))))] ++ "NDS32_EXT_DSP_P ()" ++ "ursub64\t%0, %1, %2" ++ [(set_attr "type" "dalu64") ++ (set_attr "length" "4")]) ++ ++(define_expand "cras16_1" ++ [(match_operand:V2HI 0 "register_operand" "") ++ (match_operand:V2HI 1 "register_operand" "") ++ (match_operand:V2HI 2 "register_operand" "")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ emit_insn (gen_cras16_1_be (operands[0], operands[1], operands[2])); ++ else ++ emit_insn (gen_cras16_1_le (operands[0], operands[1], operands[2])); ++ DONE; ++}) ++ ++(define_insn "cras16_1_le" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI ++ (minus:HI ++ (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" " r") ++ (parallel [(const_int 0)])) ++ (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 1)])))) ++ (vec_duplicate:V2HI ++ (plus:HI ++ (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 0)])) ++ (vec_select:HI ++ (match_dup 1) ++ (parallel [(const_int 1)])))) ++ (const_int 1)))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "cras16\t%0, %1, %2" ++ [(set_attr "type" "dalu")] ++) ++ ++(define_insn "cras16_1_be" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI ++ (minus:HI ++ (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" " r") ++ (parallel [(const_int 1)])) ++ (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 0)])))) ++ (vec_duplicate:V2HI ++ (plus:HI ++ (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 1)])) ++ (vec_select:HI ++ (match_dup 1) ++ (parallel [(const_int 0)])))) ++ (const_int 2)))] ++ "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" ++ "cras16\t%0, %1, %2" ++ [(set_attr "type" "dalu")] ++) ++ ++(define_expand "kcras16_1" ++ [(match_operand:V2HI 0 "register_operand" "") ++ (match_operand:V2HI 1 "register_operand" "") ++ (match_operand:V2HI 2 "register_operand" "")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ emit_insn (gen_kcras16_1_be (operands[0], operands[1], operands[2])); ++ else ++ emit_insn (gen_kcras16_1_le (operands[0], operands[1], operands[2])); ++ DONE; ++}) ++ ++(define_insn "kcras16_1_le" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI ++ (ss_minus:HI ++ (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" " r") ++ (parallel [(const_int 0)])) ++ (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 1)])))) ++ (vec_duplicate:V2HI ++ (ss_plus:HI ++ (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 0)])) ++ (vec_select:HI ++ (match_dup 1) ++ (parallel [(const_int 1)])))) ++ (const_int 1)))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "kcras16\t%0, %1, %2" ++ [(set_attr "type" "dalu")] ++) ++ ++(define_insn "kcras16_1_be" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI ++ (ss_minus:HI ++ (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" " r") ++ (parallel [(const_int 1)])) ++ (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 0)])))) ++ (vec_duplicate:V2HI ++ (ss_plus:HI ++ (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 1)])) ++ (vec_select:HI ++ (match_dup 1) ++ (parallel [(const_int 0)])))) ++ (const_int 2)))] ++ "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" ++ "kcras16\t%0, %1, %2" ++ [(set_attr "type" "dalu")] ++) ++ ++(define_expand "ukcras16_1" ++ [(match_operand:V2HI 0 "register_operand" "") ++ (match_operand:V2HI 1 "register_operand" "") ++ (match_operand:V2HI 2 "register_operand" "")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ emit_insn (gen_ukcras16_1_be (operands[0], operands[1], operands[2])); ++ else ++ emit_insn (gen_ukcras16_1_le (operands[0], operands[1], operands[2])); ++ DONE; ++}) ++ ++(define_insn "ukcras16_1_le" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI ++ (us_minus:HI ++ (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" " r") ++ (parallel [(const_int 0)])) ++ (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 1)])))) ++ (vec_duplicate:V2HI ++ (us_plus:HI ++ (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 0)])) ++ (vec_select:HI ++ (match_dup 1) ++ (parallel [(const_int 1)])))) ++ (const_int 1)))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "ukcras16\t%0, %1, %2" ++ [(set_attr "type" "dalu")] ++) ++ ++(define_insn "ukcras16_1_be" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI ++ (us_minus:HI ++ (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" " r") ++ (parallel [(const_int 1)])) ++ (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 0)])))) ++ (vec_duplicate:V2HI ++ (us_plus:HI ++ (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 1)])) ++ (vec_select:HI ++ (match_dup 1) ++ (parallel [(const_int 0)])))) ++ (const_int 2)))] ++ "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" ++ "ukcras16\t%0, %1, %2" ++ [(set_attr "type" "dalu")] ++) ++ ++(define_expand "crsa16_1" ++ [(match_operand:V2HI 0 "register_operand" "") ++ (match_operand:V2HI 1 "register_operand" "") ++ (match_operand:V2HI 2 "register_operand" "")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ emit_insn (gen_crsa16_1_be (operands[0], operands[1], operands[2])); ++ else ++ emit_insn (gen_crsa16_1_le (operands[0], operands[1], operands[2])); ++ DONE; ++}) ++ ++(define_insn "crsa16_1_le" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI ++ (minus:HI ++ (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" " r") ++ (parallel [(const_int 1)])) ++ (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 0)])))) ++ (vec_duplicate:V2HI ++ (plus:HI ++ (vec_select:HI ++ (match_dup 1) ++ (parallel [(const_int 0)])) ++ (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 1)])))) ++ (const_int 2)))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "crsa16\t%0, %1, %2" ++ [(set_attr "type" "dalu")] ++) ++ ++(define_insn "crsa16_1_be" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI ++ (minus:HI ++ (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" " r") ++ (parallel [(const_int 0)])) ++ (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 1)])))) ++ (vec_duplicate:V2HI ++ (plus:HI ++ (vec_select:HI ++ (match_dup 1) ++ (parallel [(const_int 1)])) ++ (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 0)])))) ++ (const_int 1)))] ++ "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" ++ "crsa16\t%0, %1, %2" ++ [(set_attr "type" "dalu")] ++) ++ ++(define_expand "kcrsa16_1" ++ [(match_operand:V2HI 0 "register_operand" "") ++ (match_operand:V2HI 1 "register_operand" "") ++ (match_operand:V2HI 2 "register_operand" "")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ emit_insn (gen_kcrsa16_1_be (operands[0], operands[1], operands[2])); ++ else ++ emit_insn (gen_kcrsa16_1_le (operands[0], operands[1], operands[2])); ++ DONE; ++}) ++ ++(define_insn "kcrsa16_1_le" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI ++ (ss_minus:HI ++ (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" " r") ++ (parallel [(const_int 1)])) ++ (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 0)])))) ++ (vec_duplicate:V2HI ++ (ss_plus:HI ++ (vec_select:HI ++ (match_dup 1) ++ (parallel [(const_int 0)])) ++ (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 1)])))) ++ (const_int 2)))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "kcrsa16\t%0, %1, %2" ++ [(set_attr "type" "dalu")] ++) ++ ++(define_insn "kcrsa16_1_be" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI ++ (ss_minus:HI ++ (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" " r") ++ (parallel [(const_int 0)])) ++ (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 1)])))) ++ (vec_duplicate:V2HI ++ (ss_plus:HI ++ (vec_select:HI ++ (match_dup 1) ++ (parallel [(const_int 1)])) ++ (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 0)])))) ++ (const_int 1)))] ++ "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" ++ "kcrsa16\t%0, %1, %2" ++ [(set_attr "type" "dalu")] ++) ++ ++(define_expand "ukcrsa16_1" ++ [(match_operand:V2HI 0 "register_operand" "") ++ (match_operand:V2HI 1 "register_operand" "") ++ (match_operand:V2HI 2 "register_operand" "")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ emit_insn (gen_ukcrsa16_1_be (operands[0], operands[1], operands[2])); ++ else ++ emit_insn (gen_ukcrsa16_1_le (operands[0], operands[1], operands[2])); ++ DONE; ++}) ++ ++(define_insn "ukcrsa16_1_le" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI ++ (us_minus:HI ++ (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" " r") ++ (parallel [(const_int 1)])) ++ (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 0)])))) ++ (vec_duplicate:V2HI ++ (us_plus:HI ++ (vec_select:HI ++ (match_dup 1) ++ (parallel [(const_int 0)])) ++ (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 1)])))) ++ (const_int 2)))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "ukcrsa16\t%0, %1, %2" ++ [(set_attr "type" "dalu")] ++) ++ ++(define_insn "ukcrsa16_1_be" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI ++ (us_minus:HI ++ (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" " r") ++ (parallel [(const_int 0)])) ++ (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 1)])))) ++ (vec_duplicate:V2HI ++ (us_plus:HI ++ (vec_select:HI ++ (match_dup 1) ++ (parallel [(const_int 1)])) ++ (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 0)])))) ++ (const_int 1)))] ++ "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" ++ "ukcrsa16\t%0, %1, %2" ++ [(set_attr "type" "dalu")] ++) ++ ++(define_expand "rcras16_1" ++ [(match_operand:V2HI 0 "register_operand" "") ++ (match_operand:V2HI 1 "register_operand" "") ++ (match_operand:V2HI 2 "register_operand" "")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ emit_insn (gen_rcras16_1_be (operands[0], operands[1], operands[2])); ++ else ++ emit_insn (gen_rcras16_1_le (operands[0], operands[1], operands[2])); ++ DONE; ++}) ++ ++(define_insn "rcras16_1_le" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI ++ (truncate:HI ++ (ashiftrt:SI ++ (minus:SI ++ (sign_extend:SI ++ (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" " r") ++ (parallel [(const_int 0)]))) ++ (sign_extend:SI ++ (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 1)])))) ++ (const_int 1)))) ++ (vec_duplicate:V2HI ++ (truncate:HI ++ (ashiftrt:SI ++ (plus:SI ++ (sign_extend:SI ++ (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 0)]))) ++ (sign_extend:SI ++ (vec_select:HI ++ (match_dup 1) ++ (parallel [(const_int 1)])))) ++ (const_int 1)))) ++ (const_int 1)))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "rcras16\t%0, %1, %2" ++ [(set_attr "type" "dalu")] ++) ++ ++(define_insn "rcras16_1_be" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI ++ (truncate:HI ++ (ashiftrt:SI ++ (minus:SI ++ (sign_extend:SI ++ (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" " r") ++ (parallel [(const_int 1)]))) ++ (sign_extend:SI ++ (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 0)])))) ++ (const_int 1)))) ++ (vec_duplicate:V2HI ++ (truncate:HI ++ (ashiftrt:SI ++ (plus:SI ++ (sign_extend:SI ++ (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 1)]))) ++ (sign_extend:SI ++ (vec_select:HI ++ (match_dup 1) ++ (parallel [(const_int 0)])))) ++ (const_int 1)))) ++ (const_int 2)))] ++ "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" ++ "rcras16\t%0, %1, %2" ++ [(set_attr "type" "dalu")] ++) ++ ++(define_expand "urcras16_1" ++ [(match_operand:V2HI 0 "register_operand" "") ++ (match_operand:V2HI 1 "register_operand" "") ++ (match_operand:V2HI 2 "register_operand" "")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ emit_insn (gen_urcras16_1_be (operands[0], operands[1], operands[2])); ++ else ++ emit_insn (gen_urcras16_1_le (operands[0], operands[1], operands[2])); ++ DONE; ++}) ++ ++(define_insn "urcras16_1_le" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI ++ (truncate:HI ++ (lshiftrt:SI ++ (minus:SI ++ (zero_extend:SI ++ (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" " r") ++ (parallel [(const_int 0)]))) ++ (zero_extend:SI ++ (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 1)])))) ++ (const_int 1)))) ++ (vec_duplicate:V2HI ++ (truncate:HI ++ (lshiftrt:SI ++ (plus:SI ++ (zero_extend:SI ++ (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 0)]))) ++ (zero_extend:SI ++ (vec_select:HI ++ (match_dup 1) ++ (parallel [(const_int 1)])))) ++ (const_int 1)))) ++ (const_int 1)))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "urcras16\t%0, %1, %2" ++ [(set_attr "type" "dalu")] ++) ++ ++(define_insn "urcras16_1_be" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI ++ (truncate:HI ++ (lshiftrt:SI ++ (minus:SI ++ (zero_extend:SI ++ (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" " r") ++ (parallel [(const_int 1)]))) ++ (zero_extend:SI ++ (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 0)])))) ++ (const_int 1)))) ++ (vec_duplicate:V2HI ++ (truncate:HI ++ (lshiftrt:SI ++ (plus:SI ++ (zero_extend:SI ++ (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 1)]))) ++ (zero_extend:SI ++ (vec_select:HI ++ (match_dup 1) ++ (parallel [(const_int 0)])))) ++ (const_int 1)))) ++ (const_int 2)))] ++ "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" ++ "urcras16\t%0, %1, %2" ++ [(set_attr "type" "dalu")] ++) ++ ++(define_expand "rcrsa16_1" ++ [(match_operand:V2HI 0 "register_operand" "") ++ (match_operand:V2HI 1 "register_operand" "") ++ (match_operand:V2HI 2 "register_operand" "")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ emit_insn (gen_rcrsa16_1_be (operands[0], operands[1], operands[2])); ++ else ++ emit_insn (gen_rcrsa16_1_le (operands[0], operands[1], operands[2])); ++ DONE; ++}) ++ ++(define_insn "rcrsa16_1_le" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI ++ (truncate:HI ++ (ashiftrt:SI ++ (minus:SI ++ (sign_extend:SI ++ (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" " r") ++ (parallel [(const_int 1)]))) ++ (sign_extend:SI ++ (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 0)])))) ++ (const_int 1)))) ++ (vec_duplicate:V2HI ++ (truncate:HI ++ (ashiftrt:SI ++ (plus:SI ++ (sign_extend:SI ++ (vec_select:HI ++ (match_dup 1) ++ (parallel [(const_int 0)]))) ++ (sign_extend:SI ++ (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 1)])))) ++ (const_int 1)))) ++ (const_int 2)))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "rcrsa16\t%0, %1, %2" ++ [(set_attr "type" "dalu")] ++) ++ ++(define_insn "rcrsa16_1_be" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI ++ (truncate:HI ++ (ashiftrt:SI ++ (minus:SI ++ (sign_extend:SI ++ (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" " r") ++ (parallel [(const_int 0)]))) ++ (sign_extend:SI ++ (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 1)])))) ++ (const_int 1)))) ++ (vec_duplicate:V2HI ++ (truncate:HI ++ (ashiftrt:SI ++ (plus:SI ++ (sign_extend:SI ++ (vec_select:HI ++ (match_dup 1) ++ (parallel [(const_int 1)]))) ++ (sign_extend:SI ++ (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 0)])))) ++ (const_int 1)))) ++ (const_int 1)))] ++ "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" ++ "rcrsa16\t%0, %1, %2" ++ [(set_attr "type" "dalu")] ++) ++ ++(define_expand "urcrsa16_1" ++ [(match_operand:V2HI 0 "register_operand" "") ++ (match_operand:V2HI 1 "register_operand" "") ++ (match_operand:V2HI 2 "register_operand" "")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ emit_insn (gen_urcrsa16_1_be (operands[0], operands[1], operands[2])); ++ else ++ emit_insn (gen_urcrsa16_1_le (operands[0], operands[1], operands[2])); ++ DONE; ++}) ++ ++(define_insn "urcrsa16_1_le" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI ++ (truncate:HI ++ (lshiftrt:SI ++ (minus:SI ++ (zero_extend:SI ++ (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" " r") ++ (parallel [(const_int 1)]))) ++ (zero_extend:SI ++ (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 0)])))) ++ (const_int 1)))) ++ (vec_duplicate:V2HI ++ (truncate:HI ++ (lshiftrt:SI ++ (plus:SI ++ (zero_extend:SI ++ (vec_select:HI ++ (match_dup 1) ++ (parallel [(const_int 0)]))) ++ (zero_extend:SI ++ (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 1)])))) ++ (const_int 1)))) ++ (const_int 2)))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "urcrsa16\t%0, %1, %2" ++ [(set_attr "type" "dalu")] ++) ++ ++(define_insn "urcrsa16_1_be" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI ++ (truncate:HI ++ (lshiftrt:SI ++ (minus:SI ++ (zero_extend:SI ++ (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" " r") ++ (parallel [(const_int 0)]))) ++ (zero_extend:SI ++ (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 1)])))) ++ (const_int 1)))) ++ (vec_duplicate:V2HI ++ (truncate:HI ++ (lshiftrt:SI ++ (plus:SI ++ (zero_extend:SI ++ (vec_select:HI ++ (match_dup 1) ++ (parallel [(const_int 1)]))) ++ (zero_extend:SI ++ (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 0)])))) ++ (const_int 1)))) ++ (const_int 1)))] ++ "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" ++ "urcrsa16\t%0, %1, %2" ++ [(set_attr "type" "dalu")] ++) ++ ++(define_expand "<shift>v2hi3" ++ [(set (match_operand:V2HI 0 "register_operand" "") ++ (shifts:V2HI (match_operand:V2HI 1 "register_operand" "") ++ (match_operand:SI 2 "nds32_rimm4u_operand" "")))] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (operands[2] == const0_rtx) ++ { ++ emit_move_insn (operands[0], operands[1]); ++ DONE; ++ } ++}) ++ ++(define_insn "*ashlv2hi3" ++ [(set (match_operand:V2HI 0 "register_operand" "= r, r") ++ (ashift:V2HI (match_operand:V2HI 1 "register_operand" " r, r") ++ (match_operand:SI 2 "nds32_rimm4u_operand" " Iu04, r")))] ++ "NDS32_EXT_DSP_P ()" ++ "@ ++ slli16\t%0, %1, %2 ++ sll16\t%0, %1, %2" ++ [(set_attr "type" "dalu,dalu") ++ (set_attr "length" " 4, 4")]) ++ ++(define_insn "kslli16" ++ [(set (match_operand:V2HI 0 "register_operand" "= r, r") ++ (ss_ashift:V2HI (match_operand:V2HI 1 "register_operand" " r, r") ++ (match_operand:SI 2 "nds32_rimm4u_operand" " Iu04, r")))] ++ "NDS32_EXT_DSP_P ()" ++ "@ ++ kslli16\t%0, %1, %2 ++ ksll16\t%0, %1, %2" ++ [(set_attr "type" "dalu,dalu") ++ (set_attr "length" " 4, 4")]) ++ ++(define_insn "*ashrv2hi3" ++ [(set (match_operand:V2HI 0 "register_operand" "= r, r") ++ (ashiftrt:V2HI (match_operand:V2HI 1 "register_operand" " r, r") ++ (match_operand:SI 2 "nds32_rimm4u_operand" " Iu04, r")))] ++ "NDS32_EXT_DSP_P ()" ++ "@ ++ srai16\t%0, %1, %2 ++ sra16\t%0, %1, %2" ++ [(set_attr "type" "dalu,dalu") ++ (set_attr "length" " 4, 4")]) ++ ++(define_insn "sra16_round" ++ [(set (match_operand:V2HI 0 "register_operand" "= r, r") ++ (unspec:V2HI [(ashiftrt:V2HI (match_operand:V2HI 1 "register_operand" " r, r") ++ (match_operand:SI 2 "nds32_rimm4u_operand" " Iu04, r"))] ++ UNSPEC_ROUND))] ++ "NDS32_EXT_DSP_P ()" ++ "@ ++ srai16.u\t%0, %1, %2 ++ sra16.u\t%0, %1, %2" ++ [(set_attr "type" "daluround,daluround") ++ (set_attr "length" " 4, 4")]) ++ ++(define_insn "*lshrv2hi3" ++ [(set (match_operand:V2HI 0 "register_operand" "= r, r") ++ (lshiftrt:V2HI (match_operand:V2HI 1 "register_operand" " r, r") ++ (match_operand:SI 2 "nds32_rimm4u_operand" " Iu04, r")))] ++ "NDS32_EXT_DSP_P ()" ++ "@ ++ srli16\t%0, %1, %2 ++ srl16\t%0, %1, %2" ++ [(set_attr "type" "dalu,dalu") ++ (set_attr "length" " 4, 4")]) ++ ++(define_insn "srl16_round" ++ [(set (match_operand:V2HI 0 "register_operand" "= r, r") ++ (unspec:V2HI [(lshiftrt:V2HI (match_operand:V2HI 1 "register_operand" " r, r") ++ (match_operand:SI 2 "nds32_rimm4u_operand" " Iu04, r"))] ++ UNSPEC_ROUND))] ++ "NDS32_EXT_DSP_P ()" ++ "@ ++ srli16.u\t%0, %1, %2 ++ srl16.u\t%0, %1, %2" ++ [(set_attr "type" "daluround,daluround") ++ (set_attr "length" " 4, 4")]) ++ ++(define_insn "kslra16" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (if_then_else:V2HI ++ (lt:SI (match_operand:SI 2 "register_operand" " r") ++ (const_int 0)) ++ (ashiftrt:V2HI (match_operand:V2HI 1 "register_operand" " r") ++ (neg:SI (match_dup 2))) ++ (ashift:V2HI (match_dup 1) ++ (match_dup 2))))] ++ "NDS32_EXT_DSP_P ()" ++ "kslra16\t%0, %1, %2" ++ [(set_attr "type" "dalu") ++ (set_attr "length" "4")]) ++ ++(define_insn "kslra16_round" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (if_then_else:V2HI ++ (lt:SI (match_operand:SI 2 "register_operand" " r") ++ (const_int 0)) ++ (unspec:V2HI [(ashiftrt:V2HI (match_operand:V2HI 1 "register_operand" " r") ++ (neg:SI (match_dup 2)))] ++ UNSPEC_ROUND) ++ (ashift:V2HI (match_dup 1) ++ (match_dup 2))))] ++ "NDS32_EXT_DSP_P ()" ++ "kslra16.u\t%0, %1, %2" ++ [(set_attr "type" "daluround") ++ (set_attr "length" "4")]) ++ ++(define_insn "cmpeq<bits>" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (unspec:SI [(eq:SI (match_operand:VQIHI 1 "register_operand" " r") ++ (match_operand:VQIHI 2 "register_operand" " r"))] ++ UNSPEC_VEC_COMPARE))] ++ "NDS32_EXT_DSP_P ()" ++ "cmpeq<bits>\t%0, %1, %2" ++ [(set_attr "type" "dcmp") ++ (set_attr "length" "4")]) ++ ++(define_insn "scmplt<bits>" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (unspec:SI [(lt:SI (match_operand:VQIHI 1 "register_operand" " r") ++ (match_operand:VQIHI 2 "register_operand" " r"))] ++ UNSPEC_VEC_COMPARE))] ++ "NDS32_EXT_DSP_P ()" ++ "scmplt<bits>\t%0, %1, %2" ++ [(set_attr "type" "dcmp") ++ (set_attr "length" "4")]) ++ ++(define_insn "scmple<bits>" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (unspec:SI [(le:SI (match_operand:VQIHI 1 "register_operand" " r") ++ (match_operand:VQIHI 2 "register_operand" " r"))] ++ UNSPEC_VEC_COMPARE))] ++ "NDS32_EXT_DSP_P ()" ++ "scmple<bits>\t%0, %1, %2" ++ [(set_attr "type" "dcmp") ++ (set_attr "length" "4")]) ++ ++(define_insn "ucmplt<bits>" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (unspec:SI [(ltu:SI (match_operand:VQIHI 1 "register_operand" " r") ++ (match_operand:VQIHI 2 "register_operand" " r"))] ++ UNSPEC_VEC_COMPARE))] ++ "NDS32_EXT_DSP_P ()" ++ "ucmplt<bits>\t%0, %1, %2" ++ [(set_attr "type" "dcmp") ++ (set_attr "length" "4")]) ++ ++(define_insn "ucmple<bits>" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (unspec:SI [(leu:SI (match_operand:VQIHI 1 "register_operand" " r") ++ (match_operand:VQIHI 2 "register_operand" " r"))] ++ UNSPEC_VEC_COMPARE))] ++ "NDS32_EXT_DSP_P ()" ++ "ucmple<bits>\t%0, %1, %2" ++ [(set_attr "type" "dcmp") ++ (set_attr "length" "4")]) ++ ++(define_insn "sclip16" ++ [(set (match_operand:V2HI 0 "register_operand" "= r") ++ (unspec:V2HI [(match_operand:V2HI 1 "register_operand" " r") ++ (match_operand:SI 2 "nds32_imm4u_operand" " Iu04")] ++ UNSPEC_CLIPS))] ++ "NDS32_EXT_DSP_P ()" ++ "sclip16\t%0, %1, %2" ++ [(set_attr "type" "dclip") ++ (set_attr "length" "4")]) ++ ++(define_insn "uclip16" ++ [(set (match_operand:V2HI 0 "register_operand" "= r") ++ (unspec:V2HI [(match_operand:V2HI 1 "register_operand" " r") ++ (match_operand:SI 2 "nds32_imm4u_operand" " Iu04")] ++ UNSPEC_CLIP))] ++ "NDS32_EXT_DSP_P ()" ++ "uclip16\t%0, %1, %2" ++ [(set_attr "type" "dclip") ++ (set_attr "length" "4")]) ++ ++(define_insn "khm16" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (unspec:V2HI [(match_operand:V2HI 1 "register_operand" " r") ++ (match_operand:V2HI 2 "register_operand" " r")] ++ UNSPEC_KHM))] ++ "NDS32_EXT_DSP_P ()" ++ "khm16\t%0, %1, %2" ++ [(set_attr "type" "dmul") ++ (set_attr "length" "4")]) ++ ++(define_insn "khmx16" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (unspec:V2HI [(match_operand:V2HI 1 "register_operand" " r") ++ (match_operand:V2HI 2 "register_operand" " r")] ++ UNSPEC_KHMX))] ++ "NDS32_EXT_DSP_P ()" ++ "khmx16\t%0, %1, %2" ++ [(set_attr "type" "dmul") ++ (set_attr "length" "4")]) ++ ++(define_expand "vec_setv4qi" ++ [(match_operand:V4QI 0 "register_operand" "") ++ (match_operand:QI 1 "register_operand" "") ++ (match_operand:SI 2 "immediate_operand" "")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ HOST_WIDE_INT pos = INTVAL (operands[2]); ++ if (pos > 4) ++ gcc_unreachable (); ++ HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << pos; ++ emit_insn (gen_vec_setv4qi_internal (operands[0], operands[1], ++ operands[0], GEN_INT (elem))); ++ DONE; ++}) ++ ++(define_expand "insb" ++ [(match_operand:V4QI 0 "register_operand" "") ++ (match_operand:V4QI 1 "register_operand" "") ++ (match_operand:SI 2 "register_operand" "") ++ (match_operand:SI 3 "const_int_operand" "")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (INTVAL (operands[3]) > 3 || INTVAL (operands[3]) < 0) ++ gcc_unreachable (); ++ ++ rtx src = gen_reg_rtx (QImode); ++ ++ convert_move (src, operands[2], false); ++ ++ HOST_WIDE_INT selector_index; ++ /* Big endian need reverse index. */ ++ if (TARGET_BIG_ENDIAN) ++ selector_index = 4 - INTVAL (operands[3]) - 1; ++ else ++ selector_index = INTVAL (operands[3]); ++ rtx selector = gen_int_mode (1 << selector_index, SImode); ++ emit_insn (gen_vec_setv4qi_internal (operands[0], src, ++ operands[1], selector)); ++ DONE; ++}) ++ ++(define_expand "insvsi" ++ [(set (zero_extract:SI (match_operand:SI 0 "register_operand" "") ++ (match_operand:SI 1 "const_int_operand" "") ++ (match_operand:SI 2 "nds32_insv_operand" "")) ++ (match_operand:SI 3 "register_operand" ""))] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (INTVAL (operands[1]) != 8) ++ FAIL; ++} ++ [(set_attr "type" "dinsb") ++ (set_attr "length" "4")]) ++ ++ ++(define_insn "insvsi_internal" ++ [(set (zero_extract:SI (match_operand:SI 0 "register_operand" "+r") ++ (const_int 8) ++ (match_operand:SI 1 "nds32_insv_operand" "i")) ++ (match_operand:SI 2 "register_operand" "r"))] ++ "NDS32_EXT_DSP_P ()" ++ "insb\t%0, %2, %v1" ++ [(set_attr "type" "dinsb") ++ (set_attr "length" "4")]) ++ ++(define_insn "insvsiqi_internal" ++ [(set (zero_extract:SI (match_operand:SI 0 "register_operand" "+r") ++ (const_int 8) ++ (match_operand:SI 1 "nds32_insv_operand" "i")) ++ (zero_extend:SI (match_operand:QI 2 "register_operand" "r")))] ++ "NDS32_EXT_DSP_P ()" ++ "insb\t%0, %2, %v1" ++ [(set_attr "type" "dinsb") ++ (set_attr "length" "4")]) ++ ++;; Intermedium pattern for synthetize insvsiqi_internal ++;; v0 = ((v1 & 0xff) << 8) ++(define_insn_and_split "and0xff_s8" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r") ++ (const_int 8)) ++ (const_int 65280)))] ++ "NDS32_EXT_DSP_P () && !reload_completed" ++ "#" ++ "NDS32_EXT_DSP_P () && !reload_completed" ++ [(const_int 1)] ++{ ++ rtx tmp = gen_reg_rtx (SImode); ++ emit_insn (gen_ashlsi3 (tmp, operands[1], gen_int_mode (8, SImode))); ++ emit_insn (gen_andsi3 (operands[0], tmp, gen_int_mode (0xffff, SImode))); ++ DONE; ++}) ++ ++;; v0 = (v1 & 0xff00ffff) | ((v2 << 16) | 0xff0000) ++(define_insn_and_split "insbsi2" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (ior:SI (and:SI (match_operand:SI 1 "register_operand" "0") ++ (const_int -16711681)) ++ (and:SI (ashift:SI (match_operand:SI 2 "register_operand" "r") ++ (const_int 16)) ++ (const_int 16711680))))] ++ "NDS32_EXT_DSP_P () && !reload_completed" ++ "#" ++ "NDS32_EXT_DSP_P () && !reload_completed" ++ [(const_int 1)] ++{ ++ rtx tmp = gen_reg_rtx (SImode); ++ emit_move_insn (tmp, operands[1]); ++ emit_insn (gen_insvsi_internal (tmp, gen_int_mode(16, SImode), operands[2])); ++ emit_move_insn (operands[0], tmp); ++ DONE; ++}) ++ ++;; v0 = (v1 & 0xff00ffff) | v2 ++(define_insn_and_split "ior_and0xff00ffff_reg" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (ior:SI (and:SI (match_operand:SI 1 "register_operand" "r") ++ (const_int -16711681)) ++ (match_operand:SI 2 "register_operand" "r")))] ++ "NDS32_EXT_DSP_P () && !reload_completed" ++ "#" ++ "NDS32_EXT_DSP_P () && !reload_completed" ++ [(const_int 1)] ++{ ++ rtx tmp = gen_reg_rtx (SImode); ++ emit_insn (gen_andsi3 (tmp, operands[1], gen_int_mode (0xff00ffff, SImode))); ++ emit_insn (gen_iorsi3 (operands[0], tmp, operands[2])); ++ DONE; ++}) ++ ++(define_insn "vec_setv4qi_internal" ++ [(set (match_operand:V4QI 0 "register_operand" "= r, r, r, r") ++ (vec_merge:V4QI ++ (vec_duplicate:V4QI ++ (match_operand:QI 1 "register_operand" " r, r, r, r")) ++ (match_operand:V4QI 2 "register_operand" " 0, 0, 0, 0") ++ (match_operand:SI 3 "nds32_imm_1_2_4_8_operand" " Iv01, Iv02, Iv04, Iv08")))] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ { ++ const char *pats[] = { "insb\t%0, %1, 3", ++ "insb\t%0, %1, 2", ++ "insb\t%0, %1, 1", ++ "insb\t%0, %1, 0" }; ++ return pats[which_alternative]; ++ } ++ else ++ { ++ const char *pats[] = { "insb\t%0, %1, 0", ++ "insb\t%0, %1, 1", ++ "insb\t%0, %1, 2", ++ "insb\t%0, %1, 3" }; ++ return pats[which_alternative]; ++ } ++} ++ [(set_attr "type" "dinsb") ++ (set_attr "length" "4")]) ++ ++(define_insn "vec_setv4qi_internal_vec" ++ [(set (match_operand:V4QI 0 "register_operand" "= r, r, r, r") ++ (vec_merge:V4QI ++ (vec_duplicate:V4QI ++ (vec_select:QI ++ (match_operand:V4QI 1 "register_operand" " r, r, r, r") ++ (parallel [(const_int 0)]))) ++ (match_operand:V4QI 2 "register_operand" " 0, 0, 0, 0") ++ (match_operand:SI 3 "nds32_imm_1_2_4_8_operand" " Iv01, Iv02, Iv04, Iv08")))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "@ ++ insb\t%0, %1, 0 ++ insb\t%0, %1, 1 ++ insb\t%0, %1, 2 ++ insb\t%0, %1, 3" ++ [(set_attr "type" "dinsb") ++ (set_attr "length" "4")]) ++ ++(define_insn "vec_mergev4qi_and_cv0_1" ++ [(set (match_operand:V4QI 0 "register_operand" "=$l,r") ++ (vec_merge:V4QI ++ (vec_duplicate:V4QI ++ (vec_select:QI ++ (match_operand:V4QI 1 "register_operand" " l,r") ++ (parallel [(const_int 0)]))) ++ (const_vector:V4QI [ ++ (const_int 0) ++ (const_int 0) ++ (const_int 0) ++ (const_int 0)]) ++ (const_int 1)))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "@ ++ zeb33\t%0, %1 ++ zeb\t%0, %1" ++ [(set_attr "type" "alu,alu") ++ (set_attr "length" " 2, 4")]) ++ ++(define_insn "vec_mergev4qi_and_cv0_2" ++ [(set (match_operand:V4QI 0 "register_operand" "=$l,r") ++ (vec_merge:V4QI ++ (const_vector:V4QI [ ++ (const_int 0) ++ (const_int 0) ++ (const_int 0) ++ (const_int 0)]) ++ (vec_duplicate:V4QI ++ (vec_select:QI ++ (match_operand:V4QI 1 "register_operand" " l,r") ++ (parallel [(const_int 0)]))) ++ (const_int 2)))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "@ ++ zeb33\t%0, %1 ++ zeb\t%0, %1" ++ [(set_attr "type" "alu,alu") ++ (set_attr "length" " 2, 4")]) ++ ++(define_insn "vec_mergeqi_and_cv0_1" ++ [(set (match_operand:V4QI 0 "register_operand" "=$l,r") ++ (vec_merge:V4QI ++ (vec_duplicate:V4QI (match_operand:QI 1 "register_operand" " l,r")) ++ (const_vector:V4QI [ ++ (const_int 0) ++ (const_int 0) ++ (const_int 0) ++ (const_int 0)]) ++ (const_int 1)))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "@ ++ zeb33\t%0, %1 ++ zeb\t%0, %1" ++ [(set_attr "type" "alu,alu") ++ (set_attr "length" " 2, 4")]) ++ ++(define_insn "vec_mergeqi_and_cv0_2" ++ [(set (match_operand:V4QI 0 "register_operand" "=$l,r") ++ (vec_merge:V4QI ++ (const_vector:V4QI [ ++ (const_int 0) ++ (const_int 0) ++ (const_int 0) ++ (const_int 0)]) ++ (vec_duplicate:V4QI (match_operand:QI 1 "register_operand" " l,r")) ++ (const_int 2)))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "@ ++ zeb33\t%0, %1 ++ zeb\t%0, %1" ++ [(set_attr "type" "alu,alu") ++ (set_attr "length" " 2, 4")]) ++ ++(define_expand "vec_setv2hi" ++ [(match_operand:V2HI 0 "register_operand" "") ++ (match_operand:HI 1 "register_operand" "") ++ (match_operand:SI 2 "immediate_operand" "")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ HOST_WIDE_INT pos = INTVAL (operands[2]); ++ if (pos > 2) ++ gcc_unreachable (); ++ HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << pos; ++ emit_insn (gen_vec_setv2hi_internal (operands[0], operands[1], ++ operands[0], GEN_INT (elem))); ++ DONE; ++}) ++ ++(define_insn "vec_setv2hi_internal" ++ [(set (match_operand:V2HI 0 "register_operand" "= r, r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI ++ (match_operand:HI 1 "register_operand" " r, r")) ++ (match_operand:V2HI 2 "register_operand" " r, r") ++ (match_operand:SI 3 "nds32_imm_1_2_operand" " Iv01, Iv02")))] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ { ++ const char *pats[] = { "pkbb16\t%0, %1, %2", ++ "pktb16\t%0, %2, %1" }; ++ return pats[which_alternative]; ++ } ++ else ++ { ++ const char *pats[] = { "pktb16\t%0, %2, %1", ++ "pkbb16\t%0, %1, %2" }; ++ return pats[which_alternative]; ++ } ++} ++ [(set_attr "type" "dpack") ++ (set_attr "length" "4")]) ++ ++(define_insn "vec_mergev2hi_and_cv0_1" ++ [(set (match_operand:V2HI 0 "register_operand" "=$l,r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI ++ (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" " l,r") ++ (parallel [(const_int 0)]))) ++ (const_vector:V2HI [ ++ (const_int 0) ++ (const_int 0)]) ++ (const_int 1)))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "@ ++ zeh33\t%0, %1 ++ zeh\t%0, %1" ++ [(set_attr "type" "alu,alu") ++ (set_attr "length" " 2, 4")]) ++ ++(define_insn "vec_mergev2hi_and_cv0_2" ++ [(set (match_operand:V2HI 0 "register_operand" "=$l,r") ++ (vec_merge:V2HI ++ (const_vector:V2HI [ ++ (const_int 0) ++ (const_int 0)]) ++ (vec_duplicate:V2HI ++ (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" " l,r") ++ (parallel [(const_int 0)]))) ++ (const_int 2)))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "@ ++ zeh33\t%0, %1 ++ zeh\t%0, %1" ++ [(set_attr "type" "alu,alu") ++ (set_attr "length" " 2, 4")]) ++ ++(define_insn "vec_mergehi_and_cv0_1" ++ [(set (match_operand:V2HI 0 "register_operand" "=$l,r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI (match_operand:HI 1 "register_operand" " l,r")) ++ (const_vector:V2HI [ ++ (const_int 0) ++ (const_int 0)]) ++ (const_int 1)))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "@ ++ zeh33\t%0, %1 ++ zeh\t%0, %1" ++ [(set_attr "type" "alu,alu") ++ (set_attr "length" " 2, 4")]) ++ ++(define_insn "vec_mergehi_and_cv0_2" ++ [(set (match_operand:V2HI 0 "register_operand" "=$l,r") ++ (vec_merge:V2HI ++ (const_vector:V2HI [ ++ (const_int 0) ++ (const_int 0)]) ++ (vec_duplicate:V2HI (match_operand:HI 1 "register_operand" " l,r")) ++ (const_int 2)))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "@ ++ zeh33\t%0, %1 ++ zeh\t%0, %1" ++ [(set_attr "type" "alu,alu") ++ (set_attr "length" " 2, 4")]) ++ ++(define_expand "pkbb" ++ [(match_operand:V2HI 0 "register_operand") ++ (match_operand:V2HI 1 "register_operand") ++ (match_operand:V2HI 2 "register_operand")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ { ++ emit_insn (gen_vec_mergevv (operands[0], operands[1], operands[2], ++ GEN_INT (1), GEN_INT (1), GEN_INT (1))); ++ } ++ else ++ { ++ emit_insn (gen_vec_mergevv (operands[0], operands[1], operands[2], ++ GEN_INT (2), GEN_INT (0), GEN_INT (0))); ++ } ++ DONE; ++}) ++ ++(define_insn "pkbbsi_1" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (ior:SI (and:SI (match_operand:SI 1 "register_operand" "r") ++ (const_int 65535)) ++ (ashift:SI (match_operand:SI 2 "register_operand" "r") ++ (const_int 16))))] ++ "NDS32_EXT_DSP_P ()" ++ "pkbb16\t%0, %2, %1" ++ [(set_attr "type" "dpack") ++ (set_attr "length" "4")]) ++ ++(define_insn "pkbbsi_2" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (ior:SI (ashift:SI (match_operand:SI 2 "register_operand" "r") ++ (const_int 16)) ++ (and:SI (match_operand:SI 1 "register_operand" "r") ++ (const_int 65535))))] ++ "NDS32_EXT_DSP_P ()" ++ "pkbb16\t%0, %2, %1" ++ [(set_attr "type" "dpack") ++ (set_attr "length" "4")]) ++ ++(define_insn "pkbbsi_3" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (ior:SI (zero_extend:SI (match_operand:HI 1 "register_operand" "r")) ++ (ashift:SI (match_operand:SI 2 "register_operand" "r") ++ (const_int 16))))] ++ "NDS32_EXT_DSP_P ()" ++ "pkbb16\t%0, %2, %1" ++ [(set_attr "type" "dpack") ++ (set_attr "length" "4")]) ++ ++(define_insn "pkbbsi_4" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (ior:SI (ashift:SI (match_operand:SI 2 "register_operand" "r") ++ (const_int 16)) ++ (zero_extend:SI (match_operand:HI 1 "register_operand" "r"))))] ++ "NDS32_EXT_DSP_P ()" ++ "pkbb16\t%0, %2, %1" ++ [(set_attr "type" "dpack") ++ (set_attr "length" "4")]) ++ ++;; v0 = (v1 & 0xffff0000) | (v2 & 0xffff) ++(define_insn "pktbsi_1" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (ior:SI (and:SI (match_operand:SI 1 "register_operand" "r") ++ (const_int -65536)) ++ (zero_extend:SI (match_operand:HI 2 "register_operand" "r"))))] ++ "NDS32_EXT_DSP_P ()" ++ "pktb16\t%0, %1, %2" ++ [(set_attr "type" "dpack") ++ (set_attr "length" "4")]) ++ ++(define_insn "pktbsi_2" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (ior:SI (and:SI (match_operand:SI 1 "register_operand" "r") ++ (const_int -65536)) ++ (and:SI (match_operand:SI 2 "register_operand" "r") ++ (const_int 65535))))] ++ "NDS32_EXT_DSP_P ()" ++ "pktb16\t%0, %1, %2" ++ [(set_attr "type" "alu") ++ (set_attr "length" "4")]) ++ ++(define_insn "pktbsi_3" ++ [(set (zero_extract:SI (match_operand:SI 0 "register_operand" "+r") ++ (const_int 16 ) ++ (const_int 0)) ++ (match_operand:SI 1 "register_operand" " r"))] ++ "NDS32_EXT_DSP_P ()" ++ "pktb16\t%0, %0, %1" ++ [(set_attr "type" "dpack") ++ (set_attr "length" "4")]) ++ ++(define_insn "pktbsi_4" ++ [(set (zero_extract:SI (match_operand:SI 0 "register_operand" "+r") ++ (const_int 16 ) ++ (const_int 0)) ++ (zero_extend:SI (match_operand:HI 1 "register_operand" " r")))] ++ "NDS32_EXT_DSP_P ()" ++ "pktb16\t%0, %0, %1" ++ [(set_attr "type" "dpack") ++ (set_attr "length" "4")]) ++ ++(define_insn "pkttsi" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (ior:SI (and:SI (match_operand:SI 1 "register_operand" " r") ++ (const_int -65536)) ++ (lshiftrt:SI (match_operand:SI 2 "register_operand" " r") ++ (const_int 16))))] ++ "NDS32_EXT_DSP_P ()" ++ "pktt16\t%0, %1, %2" ++ [(set_attr "type" "dpack") ++ (set_attr "length" "4")]) ++ ++(define_expand "pkbt" ++ [(match_operand:V2HI 0 "register_operand") ++ (match_operand:V2HI 1 "register_operand") ++ (match_operand:V2HI 2 "register_operand")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ { ++ emit_insn (gen_vec_mergevv (operands[0], operands[1], operands[2], ++ GEN_INT (1), GEN_INT (1), GEN_INT (0))); ++ } ++ else ++ { ++ emit_insn (gen_vec_mergevv (operands[0], operands[1], operands[2], ++ GEN_INT (2), GEN_INT (0), GEN_INT (1))); ++ } ++ DONE; ++}) ++ ++(define_expand "pktt" ++ [(match_operand:V2HI 0 "register_operand") ++ (match_operand:V2HI 1 "register_operand") ++ (match_operand:V2HI 2 "register_operand")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ { ++ emit_insn (gen_vec_mergevv (operands[0], operands[1], operands[2], ++ GEN_INT (1), GEN_INT (0), GEN_INT (0))); ++ } ++ else ++ { ++ emit_insn (gen_vec_mergevv (operands[0], operands[1], operands[2], ++ GEN_INT (2), GEN_INT (1), GEN_INT (1))); ++ } ++ DONE; ++}) ++ ++(define_expand "pktb" ++ [(match_operand:V2HI 0 "register_operand") ++ (match_operand:V2HI 1 "register_operand") ++ (match_operand:V2HI 2 "register_operand")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ { ++ emit_insn (gen_vec_mergevv (operands[0], operands[1], operands[2], ++ GEN_INT (1), GEN_INT (0), GEN_INT (1))); ++ } ++ else ++ { ++ emit_insn (gen_vec_mergevv (operands[0], operands[1], operands[2], ++ GEN_INT (2), GEN_INT (1), GEN_INT (0))); ++ } ++ DONE; ++}) ++ ++(define_insn "vec_mergerr" ++ [(set (match_operand:V2HI 0 "register_operand" "= r, r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI ++ (match_operand:HI 1 "register_operand" " r, r")) ++ (vec_duplicate:V2HI ++ (match_operand:HI 2 "register_operand" " r, r")) ++ (match_operand:SI 3 "nds32_imm_1_2_operand" " Iv01, Iv02")))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "@ ++ pkbb16\t%0, %2, %1 ++ pkbb16\t%0, %1, %2" ++ [(set_attr "type" "dpack") ++ (set_attr "length" "4")]) ++ ++ ++(define_insn "vec_merge" ++ [(set (match_operand:V2HI 0 "register_operand" "= r, r") ++ (vec_merge:V2HI ++ (match_operand:V2HI 1 "register_operand" " r, r") ++ (match_operand:V2HI 2 "register_operand" " r, r") ++ (match_operand:SI 3 "nds32_imm_1_2_operand" " Iv01, Iv02")))] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ { ++ const char *pats[] = { "pktb16\t%0, %1, %2", ++ "pktb16\t%0, %2, %1" }; ++ return pats[which_alternative]; ++ } ++ else ++ { ++ const char *pats[] = { "pktb16\t%0, %2, %1", ++ "pktb16\t%0, %1, %2" }; ++ return pats[which_alternative]; ++ } ++} ++ [(set_attr "type" "dpack") ++ (set_attr "length" "4")]) ++ ++(define_insn "vec_mergerv" ++ [(set (match_operand:V2HI 0 "register_operand" "= r, r, r, r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI ++ (match_operand:HI 1 "register_operand" " r, r, r, r")) ++ (vec_duplicate:V2HI ++ (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r, r, r, r") ++ (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iv00, Iv01, Iv00, Iv01")]))) ++ (match_operand:SI 3 "nds32_imm_1_2_operand" " Iv01, Iv01, Iv02, Iv02")))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "@ ++ pkbb16\t%0, %2, %1 ++ pktb16\t%0, %2, %1 ++ pkbb16\t%0, %1, %2 ++ pkbt16\t%0, %1, %2" ++ [(set_attr "type" "dpack") ++ (set_attr "length" "4")]) ++ ++(define_insn "vec_mergevr" ++ [(set (match_operand:V2HI 0 "register_operand" "= r, r, r, r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI ++ (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" " r, r, r, r") ++ (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iv00, Iv01, Iv00, Iv01")]))) ++ (vec_duplicate:V2HI ++ (match_operand:HI 2 "register_operand" " r, r, r, r")) ++ (match_operand:SI 3 "nds32_imm_1_2_operand" " Iv01, Iv01, Iv02, Iv02")))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "@ ++ pkbb16\t%0, %2, %1 ++ pkbt16\t%0, %2, %1 ++ pkbb16\t%0, %1, %2 ++ pktb16\t%0, %1, %2" ++ [(set_attr "type" "dpack") ++ (set_attr "length" "4")]) ++ ++(define_insn "vec_mergevv" ++ [(set (match_operand:V2HI 0 "register_operand" "= r, r, r, r, r, r, r, r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI ++ (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" " r, r, r, r, r, r, r, r") ++ (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iv00, Iv00, Iv01, Iv01, Iv00, Iv00, Iv01, Iv01")]))) ++ (vec_duplicate:V2HI ++ (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r, r, r, r, r, r, r, r") ++ (parallel [(match_operand:SI 5 "nds32_imm_0_1_operand" " Iv00, Iv01, Iv01, Iv00, Iv00, Iv01, Iv01, Iv00")]))) ++ (match_operand:SI 3 "nds32_imm_1_2_operand" " Iv01, Iv01, Iv01, Iv01, Iv02, Iv02, Iv02, Iv02")))] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ { ++ const char *pats[] = { "pktt16\t%0, %1, %2", ++ "pktb16\t%0, %1, %2", ++ "pkbb16\t%0, %1, %2", ++ "pkbt16\t%0, %1, %2", ++ "pktt16\t%0, %2, %1", ++ "pkbt16\t%0, %2, %1", ++ "pkbb16\t%0, %2, %1", ++ "pktb16\t%0, %2, %1" }; ++ return pats[which_alternative]; ++ } ++ else ++ { ++ const char *pats[] = { "pkbb16\t%0, %2, %1", ++ "pktb16\t%0, %2, %1", ++ "pktt16\t%0, %2, %1", ++ "pkbt16\t%0, %2, %1", ++ "pkbb16\t%0, %1, %2", ++ "pkbt16\t%0, %1, %2", ++ "pktt16\t%0, %1, %2", ++ "pktb16\t%0, %1, %2" }; ++ return pats[which_alternative]; ++ } ++} ++ [(set_attr "type" "dpack") ++ (set_attr "length" "4")]) ++ ++(define_expand "vec_extractv4qi" ++ [(set (match_operand:QI 0 "register_operand" "") ++ (vec_select:QI ++ (match_operand:V4QI 1 "nonimmediate_operand" "") ++ (parallel [(match_operand:SI 2 "const_int_operand" "")])))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++{ ++ if (INTVAL (operands[2]) != 0 ++ && INTVAL (operands[2]) != 1 ++ && INTVAL (operands[2]) != 2 ++ && INTVAL (operands[2]) != 3) ++ gcc_unreachable (); ++ ++ if (INTVAL (operands[2]) != 0 && MEM_P (operands[0])) ++ FAIL; ++}) ++ ++(define_insn "vec_extractv4qi0" ++ [(set (match_operand:QI 0 "register_operand" "=l,r,r") ++ (vec_select:QI ++ (match_operand:V4QI 1 "nonimmediate_operand" " l,r,m") ++ (parallel [(const_int 0)])))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++{ ++ switch (which_alternative) ++ { ++ case 0: ++ return "zeb33\t%0, %1"; ++ case 1: ++ return "zeb\t%0, %1"; ++ case 2: ++ return nds32_output_32bit_load (operands, 1); ++ default: ++ gcc_unreachable (); ++ } ++} ++ [(set_attr "type" "alu") ++ (set_attr "length" "4")]) ++ ++(define_insn "vec_extractv4qi0_ze" ++ [(set (match_operand:SI 0 "register_operand" "=l,r,r") ++ (zero_extend:SI ++ (vec_select:QI ++ (match_operand:V4QI 1 "nonimmediate_operand" " l,r,m") ++ (parallel [(const_int 0)]))))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++{ ++ switch (which_alternative) ++ { ++ case 0: ++ return "zeb33\t%0, %1"; ++ case 1: ++ return "zeb\t%0, %1"; ++ case 2: ++ return nds32_output_32bit_load (operands, 1); ++ default: ++ gcc_unreachable (); ++ } ++} ++ [(set_attr "type" "alu") ++ (set_attr "length" "4")]) ++ ++(define_insn "vec_extractv4qi0_se" ++ [(set (match_operand:SI 0 "register_operand" "=l,r,r") ++ (sign_extend:SI ++ (vec_select:QI ++ (match_operand:V4QI 1 "nonimmediate_operand" " l,r,m") ++ (parallel [(const_int 0)]))))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++{ ++ switch (which_alternative) ++ { ++ case 0: ++ return "seb33\t%0, %1"; ++ case 1: ++ return "seb\t%0, %1"; ++ case 2: ++ return nds32_output_32bit_load_s (operands, 1); ++ default: ++ gcc_unreachable (); ++ } ++} ++ [(set_attr "type" "alu") ++ (set_attr "length" "4")]) ++ ++(define_insn_and_split "vec_extractv4qi1" ++ [(set (match_operand:QI 0 "register_operand" "=r") ++ (vec_select:QI ++ (match_operand:V4QI 1 "register_operand" " r") ++ (parallel [(const_int 1)])))] ++ "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN" ++ "#" ++ "NDS32_EXT_DSP_P () && !reload_completed" ++ [(const_int 1)] ++{ ++ rtx tmp = gen_reg_rtx (V4QImode); ++ emit_insn (gen_rotrv4qi_1 (tmp, operands[1])); ++ emit_insn (gen_vec_extractv4qi0 (operands[0], tmp)); ++ DONE; ++} ++ [(set_attr "type" "alu") ++ (set_attr "length" "4")]) ++ ++(define_insn_and_split "vec_extractv4qi2" ++ [(set (match_operand:QI 0 "register_operand" "=r") ++ (vec_select:QI ++ (match_operand:V4QI 1 "register_operand" " r") ++ (parallel [(const_int 2)])))] ++ "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN" ++ "#" ++ "NDS32_EXT_DSP_P () && !reload_completed" ++ [(const_int 1)] ++{ ++ rtx tmp = gen_reg_rtx (V4QImode); ++ emit_insn (gen_rotrv4qi_2 (tmp, operands[1])); ++ emit_insn (gen_vec_extractv4qi0 (operands[0], tmp)); ++ DONE; ++} ++ [(set_attr "type" "alu") ++ (set_attr "length" "4")]) ++ ++(define_insn_and_split "vec_extractv4qi3" ++ [(set (match_operand:QI 0 "register_operand" "=r") ++ (vec_select:QI ++ (match_operand:V4QI 1 "register_operand" " r") ++ (parallel [(const_int 3)])))] ++ "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN" ++ "#" ++ "NDS32_EXT_DSP_P () && !reload_completed" ++ [(const_int 1)] ++{ ++ rtx tmp = gen_reg_rtx (V4QImode); ++ emit_insn (gen_rotrv4qi_3 (tmp, operands[1])); ++ emit_insn (gen_vec_extractv4qi0 (operands[0], tmp)); ++ DONE; ++} ++ [(set_attr "type" "alu") ++ (set_attr "length" "4")]) ++ ++(define_insn "vec_extractv4qi3_se" ++ [(set (match_operand:SI 0 "register_operand" "=$d,r") ++ (sign_extend:SI ++ (vec_select:QI ++ (match_operand:V4QI 1 "register_operand" " 0,r") ++ (parallel [(const_int 3)]))))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "@ ++ srai45\t%0, 24 ++ srai\t%0, %1, 24" ++ [(set_attr "type" "alu,alu") ++ (set_attr "length" " 2, 4")]) ++ ++(define_insn "vec_extractv4qi3_ze" ++ [(set (match_operand:SI 0 "register_operand" "=$d,r") ++ (zero_extend:SI ++ (vec_select:QI ++ (match_operand:V4QI 1 "register_operand" " 0,r") ++ (parallel [(const_int 3)]))))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "@ ++ srli45\t%0, 24 ++ srli\t%0, %1, 24" ++ [(set_attr "type" "alu,alu") ++ (set_attr "length" " 2, 4")]) ++ ++(define_insn_and_split "vec_extractv4qihi0" ++ [(set (match_operand:HI 0 "register_operand" "=r") ++ (sign_extend:HI ++ (vec_select:QI ++ (match_operand:V4QI 1 "register_operand" " r") ++ (parallel [(const_int 0)]))))] ++ "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN" ++ "#" ++ "NDS32_EXT_DSP_P () && !reload_completed" ++ [(const_int 1)] ++{ ++ rtx tmp = gen_reg_rtx (QImode); ++ emit_insn (gen_vec_extractv4qi0 (tmp, operands[1])); ++ emit_insn (gen_extendqihi2 (operands[0], tmp)); ++ DONE; ++} ++ [(set_attr "type" "alu") ++ (set_attr "length" "4")]) ++ ++(define_insn_and_split "vec_extractv4qihi1" ++ [(set (match_operand:HI 0 "register_operand" "=r") ++ (sign_extend:HI ++ (vec_select:QI ++ (match_operand:V4QI 1 "register_operand" " r") ++ (parallel [(const_int 1)]))))] ++ "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN" ++ "#" ++ "NDS32_EXT_DSP_P () && !reload_completed" ++ [(const_int 1)] ++{ ++ rtx tmp = gen_reg_rtx (QImode); ++ emit_insn (gen_vec_extractv4qi1 (tmp, operands[1])); ++ emit_insn (gen_extendqihi2 (operands[0], tmp)); ++ DONE; ++} ++ [(set_attr "type" "alu") ++ (set_attr "length" "4")]) ++ ++(define_insn_and_split "vec_extractv4qihi2" ++ [(set (match_operand:HI 0 "register_operand" "=r") ++ (sign_extend:HI ++ (vec_select:QI ++ (match_operand:V4QI 1 "register_operand" " r") ++ (parallel [(const_int 2)]))))] ++ "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN" ++ "#" ++ "NDS32_EXT_DSP_P () && !reload_completed" ++ [(const_int 1)] ++{ ++ rtx tmp = gen_reg_rtx (QImode); ++ emit_insn (gen_vec_extractv4qi2 (tmp, operands[1])); ++ emit_insn (gen_extendqihi2 (operands[0], tmp)); ++ DONE; ++} ++ [(set_attr "type" "alu") ++ (set_attr "length" "4")]) ++ ++(define_insn_and_split "vec_extractv4qihi3" ++ [(set (match_operand:HI 0 "register_operand" "=r") ++ (sign_extend:HI ++ (vec_select:QI ++ (match_operand:V4QI 1 "register_operand" " r") ++ (parallel [(const_int 3)]))))] ++ "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN" ++ "#" ++ "NDS32_EXT_DSP_P () && !reload_completed" ++ [(const_int 1)] ++{ ++ rtx tmp = gen_reg_rtx (QImode); ++ emit_insn (gen_vec_extractv4qi3 (tmp, operands[1])); ++ emit_insn (gen_extendqihi2 (operands[0], tmp)); ++ DONE; ++} ++ [(set_attr "type" "alu") ++ (set_attr "length" "4")]) ++ ++(define_expand "vec_extractv2hi" ++ [(set (match_operand:HI 0 "register_operand" "") ++ (vec_select:HI ++ (match_operand:V2HI 1 "nonimmediate_operand" "") ++ (parallel [(match_operand:SI 2 "const_int_operand" "")])))] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (INTVAL (operands[2]) != 0 ++ && INTVAL (operands[2]) != 1) ++ gcc_unreachable (); ++ ++ if (INTVAL (operands[2]) != 0 && MEM_P (operands[0])) ++ FAIL; ++}) ++ ++(define_insn "vec_extractv2hi0" ++ [(set (match_operand:HI 0 "register_operand" "=$l,r,r") ++ (vec_select:HI ++ (match_operand:V2HI 1 "nonimmediate_operand" " l,r,m") ++ (parallel [(const_int 0)])))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++{ ++ switch (which_alternative) ++ { ++ case 0: ++ return "seh33\t%0, %1"; ++ case 1: ++ return "seh\t%0, %1"; ++ case 2: ++ return nds32_output_32bit_load_s (operands, 2); ++ ++ default: ++ gcc_unreachable (); ++ } ++} ++ [(set_attr "type" "alu,alu,load") ++ (set_attr "length" " 2, 4, 4")]) ++ ++(define_insn "vec_extractv2hi0_ze" ++ [(set (match_operand:SI 0 "register_operand" "=$l, r,$ l, *r") ++ (zero_extend:SI ++ (vec_select:HI ++ (match_operand:V2HI 1 "nonimmediate_operand" " l, r, U33, m") ++ (parallel [(const_int 0)]))))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++{ ++ switch (which_alternative) ++ { ++ case 0: ++ return "zeh33\t%0, %1"; ++ case 1: ++ return "zeh\t%0, %1"; ++ case 2: ++ return nds32_output_16bit_load (operands, 2); ++ case 3: ++ return nds32_output_32bit_load (operands, 2); ++ ++ default: ++ gcc_unreachable (); ++ } ++} ++ [(set_attr "type" "alu,alu,load,load") ++ (set_attr "length" " 2, 4, 2, 4")]) ++ ++(define_insn "vec_extractv2hi0_se" ++ [(set (match_operand:SI 0 "register_operand" "=$l, r, r") ++ (sign_extend:SI ++ (vec_select:HI ++ (match_operand:V2HI 1 "nonimmediate_operand" " l,r,m") ++ (parallel [(const_int 0)]))))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++{ ++ switch (which_alternative) ++ { ++ case 0: ++ return "seh33\t%0, %1"; ++ case 1: ++ return "seh\t%0, %1"; ++ case 2: ++ return nds32_output_32bit_load_s (operands, 2); ++ ++ default: ++ gcc_unreachable (); ++ } ++} ++ [(set_attr "type" "alu,alu,load") ++ (set_attr "length" " 2, 4, 4")]) ++ ++(define_insn "vec_extractv2hi0_be" ++ [(set (match_operand:HI 0 "register_operand" "=$d,r") ++ (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" " 0,r") ++ (parallel [(const_int 0)])))] ++ "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" ++ "@ ++ srai45\t%0, 16 ++ srai\t%0, %1, 16" ++ [(set_attr "type" "alu,alu") ++ (set_attr "length" " 2, 4")]) ++ ++(define_insn "vec_extractv2hi1" ++ [(set (match_operand:HI 0 "register_operand" "=$d,r") ++ (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" " 0,r") ++ (parallel [(const_int 1)])))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "@ ++ srai45\t%0, 16 ++ srai\t%0, %1, 16" ++ [(set_attr "type" "alu,alu") ++ (set_attr "length" " 2, 4")]) ++ ++(define_insn "vec_extractv2hi1_se" ++ [(set (match_operand:SI 0 "register_operand" "=$d,r") ++ (sign_extend:SI ++ (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" " 0,r") ++ (parallel [(const_int 1)]))))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "@ ++ srai45\t%0, 16 ++ srai\t%0, %1, 16" ++ [(set_attr "type" "alu,alu") ++ (set_attr "length" " 2, 4")]) ++ ++(define_insn "vec_extractv2hi1_ze" ++ [(set (match_operand:SI 0 "register_operand" "=$d,r") ++ (zero_extend:SI ++ (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" " 0,r") ++ (parallel [(const_int 1)]))))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "@ ++ srli45\t%0, 16 ++ srli\t%0, %1, 16" ++ [(set_attr "type" "alu,alu") ++ (set_attr "length" " 2, 4")]) ++ ++(define_insn "vec_extractv2hi1_be" ++ [(set (match_operand:HI 0 "register_operand" "=$l,r,r") ++ (vec_select:HI ++ (match_operand:V2HI 1 "nonimmediate_operand" " l,r,m") ++ (parallel [(const_int 1)])))] ++ "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" ++{ ++ switch (which_alternative) ++ { ++ case 0: ++ return "seh33\t%0, %1"; ++ case 1: ++ return "seh\t%0, %1"; ++ case 2: ++ return nds32_output_32bit_load_s (operands, 2); ++ ++ default: ++ gcc_unreachable (); ++ } ++} ++ [(set_attr "type" "alu,alu,load") ++ (set_attr "length" " 2, 4, 4")]) ++ ++(define_insn "<su>mul16" ++ [(set (match_operand:V2SI 0 "register_operand" "=r") ++ (mult:V2SI (extend:V2SI (match_operand:V2HI 1 "register_operand" "%r")) ++ (extend:V2SI (match_operand:V2HI 2 "register_operand" " r"))))] ++ "NDS32_EXT_DSP_P ()" ++ "<su>mul16\t%0, %1, %2" ++ [(set_attr "type" "dmul") ++ (set_attr "length" "4")]) ++ ++(define_insn "<su>mulx16" ++ [(set (match_operand:V2SI 0 "register_operand" "=r") ++ (vec_merge:V2SI ++ (vec_duplicate:V2SI ++ (mult:SI ++ (extend:SI ++ (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" " r") ++ (parallel [(const_int 0)]))) ++ (extend:SI ++ (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 1)]))))) ++ (vec_duplicate:V2SI ++ (mult:SI ++ (extend:SI ++ (vec_select:HI ++ (match_dup 1) ++ (parallel [(const_int 1)]))) ++ (extend:SI ++ (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 0)]))))) ++ (const_int 1)))] ++ "NDS32_EXT_DSP_P ()" ++ "<su>mulx16\t%0, %1, %2" ++ [(set_attr "type" "dmul") ++ (set_attr "length" "4")]) ++ ++(define_insn "rotrv2hi_1" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (vec_select:V2HI ++ (match_operand:V2HI 1 "register_operand" " r") ++ (parallel [(const_int 1) (const_int 0)])))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "rotri\t%0, %1, 16" ++ [(set_attr "type" "alu") ++ (set_attr "length" "4")]) ++ ++(define_insn "rotrv2hi_1_be" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (vec_select:V2HI ++ (match_operand:V2HI 1 "register_operand" " r") ++ (parallel [(const_int 0) (const_int 1)])))] ++ "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" ++ "rotri\t%0, %1, 16" ++ [(set_attr "type" "alu") ++ (set_attr "length" "4")]) ++ ++(define_insn "rotrv4qi_1" ++ [(set (match_operand:V4QI 0 "register_operand" "=r") ++ (vec_select:V4QI ++ (match_operand:V4QI 1 "register_operand" " r") ++ (parallel [(const_int 1) (const_int 2) (const_int 3) (const_int 0)])))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "rotri\t%0, %1, 8" ++ [(set_attr "type" "alu") ++ (set_attr "length" "4")]) ++ ++(define_insn "rotrv4qi_1_be" ++ [(set (match_operand:V4QI 0 "register_operand" "=r") ++ (vec_select:V4QI ++ (match_operand:V4QI 1 "register_operand" " r") ++ (parallel [(const_int 2) (const_int 1) (const_int 0) (const_int 3)])))] ++ "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" ++ "rotri\t%0, %1, 8" ++ [(set_attr "type" "alu") ++ (set_attr "length" "4")]) ++ ++(define_insn "rotrv4qi_2" ++ [(set (match_operand:V4QI 0 "register_operand" "=r") ++ (vec_select:V4QI ++ (match_operand:V4QI 1 "register_operand" " r") ++ (parallel [(const_int 2) (const_int 3) (const_int 0) (const_int 1)])))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "rotri\t%0, %1, 16" ++ [(set_attr "type" "alu") ++ (set_attr "length" "4")]) ++ ++(define_insn "rotrv4qi_2_be" ++ [(set (match_operand:V4QI 0 "register_operand" "=r") ++ (vec_select:V4QI ++ (match_operand:V4QI 1 "register_operand" " r") ++ (parallel [(const_int 1) (const_int 0) (const_int 3) (const_int 2)])))] ++ "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" ++ "rotri\t%0, %1, 16" ++ [(set_attr "type" "alu") ++ (set_attr "length" "4")]) ++ ++(define_insn "rotrv4qi_3" ++ [(set (match_operand:V4QI 0 "register_operand" "=r") ++ (vec_select:V4QI ++ (match_operand:V4QI 1 "register_operand" " r") ++ (parallel [(const_int 3) (const_int 0) (const_int 1) (const_int 2)])))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "rotri\t%0, %1, 24" ++ [(set_attr "type" "alu") ++ (set_attr "length" "4")]) ++ ++(define_insn "rotrv4qi_3_be" ++ [(set (match_operand:V4QI 0 "register_operand" "=r") ++ (vec_select:V4QI ++ (match_operand:V4QI 1 "register_operand" " r") ++ (parallel [(const_int 0) (const_int 3) (const_int 2) (const_int 1)])))] ++ "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" ++ "rotri\t%0, %1, 24" ++ [(set_attr "type" "alu") ++ (set_attr "length" "4")]) ++ ++(define_insn "v4qi_dup_10" ++ [(set (match_operand:V4QI 0 "register_operand" "=r") ++ (vec_select:V4QI ++ (match_operand:V4QI 1 "register_operand" " r") ++ (parallel [(const_int 0) (const_int 1) (const_int 0) (const_int 1)])))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "pkbb\t%0, %1, %1" ++ [(set_attr "type" "dpack") ++ (set_attr "length" "4")]) ++ ++(define_insn "v4qi_dup_32" ++ [(set (match_operand:V4QI 0 "register_operand" "=r") ++ (vec_select:V4QI ++ (match_operand:V4QI 1 "register_operand" " r") ++ (parallel [(const_int 2) (const_int 3) (const_int 2) (const_int 3)])))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "pktt\t%0, %1, %1" ++ [(set_attr "type" "dpack") ++ (set_attr "length" "4")]) ++ ++(define_expand "vec_unpacks_lo_v4qi" ++ [(match_operand:V2HI 0 "register_operand" "=r") ++ (match_operand:V4QI 1 "register_operand" " r")] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++{ ++ emit_insn (gen_sunpkd810 (operands[0], operands[1])); ++ DONE; ++}) ++ ++(define_expand "sunpkd810" ++ [(match_operand:V2HI 0 "register_operand") ++ (match_operand:V4QI 1 "register_operand")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ emit_insn (gen_sunpkd810_imp_be (operands[0], operands[1])); ++ else ++ emit_insn (gen_sunpkd810_imp (operands[0], operands[1])); ++ DONE; ++}) ++ ++(define_insn "<zs>unpkd810_imp" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI ++ (extend:HI ++ (vec_select:QI ++ (match_operand:V4QI 1 "register_operand" " r") ++ (parallel [(const_int 1)])))) ++ (vec_duplicate:V2HI ++ (extend:HI ++ (vec_select:QI ++ (match_dup 1) ++ (parallel [(const_int 0)])))) ++ (const_int 2)))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "<zs>unpkd810\t%0, %1" ++ [(set_attr "type" "dpack") ++ (set_attr "length" "4")]) ++ ++(define_insn "<zs>unpkd810_imp_inv" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI ++ (extend:HI ++ (vec_select:QI ++ (match_operand:V4QI 1 "register_operand" " r") ++ (parallel [(const_int 0)])))) ++ (vec_duplicate:V2HI ++ (extend:HI ++ (vec_select:QI ++ (match_dup 1) ++ (parallel [(const_int 1)])))) ++ (const_int 1)))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "<zs>unpkd810\t%0, %1" ++ [(set_attr "type" "dpack") ++ (set_attr "length" "4")]) ++ ++(define_insn "<zs>unpkd810_imp_be" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI ++ (extend:HI ++ (vec_select:QI ++ (match_operand:V4QI 1 "register_operand" " r") ++ (parallel [(const_int 2)])))) ++ (vec_duplicate:V2HI ++ (extend:HI ++ (vec_select:QI ++ (match_dup 1) ++ (parallel [(const_int 3)])))) ++ (const_int 1)))] ++ "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" ++ "<zs>unpkd810\t%0, %1" ++ [(set_attr "type" "dpack") ++ (set_attr "length" "4")]) ++ ++(define_insn "<zs>unpkd810_imp_inv_be" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI ++ (extend:HI ++ (vec_select:QI ++ (match_operand:V4QI 1 "register_operand" " r") ++ (parallel [(const_int 3)])))) ++ (vec_duplicate:V2HI ++ (extend:HI ++ (vec_select:QI ++ (match_dup 1) ++ (parallel [(const_int 2)])))) ++ (const_int 2)))] ++ "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" ++ "<zs>unpkd810\t%0, %1" ++ [(set_attr "type" "dpack") ++ (set_attr "length" "4")]) ++ ++(define_expand "sunpkd820" ++ [(match_operand:V2HI 0 "register_operand") ++ (match_operand:V4QI 1 "register_operand")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ emit_insn (gen_sunpkd820_imp_be (operands[0], operands[1])); ++ else ++ emit_insn (gen_sunpkd820_imp (operands[0], operands[1])); ++ DONE; ++}) ++ ++(define_insn "<zs>unpkd820_imp" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI ++ (extend:HI ++ (vec_select:QI ++ (match_operand:V4QI 1 "register_operand" " r") ++ (parallel [(const_int 2)])))) ++ (vec_duplicate:V2HI ++ (extend:HI ++ (vec_select:QI ++ (match_dup 1) ++ (parallel [(const_int 0)])))) ++ (const_int 2)))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "<zs>unpkd820\t%0, %1" ++ [(set_attr "type" "dpack") ++ (set_attr "length" "4")]) ++ ++(define_insn "<zs>unpkd820_imp_inv" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI ++ (extend:HI ++ (vec_select:QI ++ (match_operand:V4QI 1 "register_operand" " r") ++ (parallel [(const_int 0)])))) ++ (vec_duplicate:V2HI ++ (extend:HI ++ (vec_select:QI ++ (match_dup 1) ++ (parallel [(const_int 2)])))) ++ (const_int 1)))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "<zs>unpkd820\t%0, %1" ++ [(set_attr "type" "dpack") ++ (set_attr "length" "4")]) ++ ++(define_insn "<zs>unpkd820_imp_be" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI ++ (extend:HI ++ (vec_select:QI ++ (match_operand:V4QI 1 "register_operand" " r") ++ (parallel [(const_int 1)])))) ++ (vec_duplicate:V2HI ++ (extend:HI ++ (vec_select:QI ++ (match_dup 1) ++ (parallel [(const_int 3)])))) ++ (const_int 1)))] ++ "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" ++ "<zs>unpkd820\t%0, %1" ++ [(set_attr "type" "dpack") ++ (set_attr "length" "4")]) ++ ++(define_insn "<zs>unpkd820_imp_inv_be" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI ++ (extend:HI ++ (vec_select:QI ++ (match_operand:V4QI 1 "register_operand" " r") ++ (parallel [(const_int 3)])))) ++ (vec_duplicate:V2HI ++ (extend:HI ++ (vec_select:QI ++ (match_dup 1) ++ (parallel [(const_int 1)])))) ++ (const_int 2)))] ++ "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" ++ "<zs>unpkd820\t%0, %1" ++ [(set_attr "type" "dpack") ++ (set_attr "length" "4")]) ++ ++(define_expand "sunpkd830" ++ [(match_operand:V2HI 0 "register_operand") ++ (match_operand:V4QI 1 "register_operand")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ emit_insn (gen_sunpkd830_imp_be (operands[0], operands[1])); ++ else ++ emit_insn (gen_sunpkd830_imp (operands[0], operands[1])); ++ DONE; ++}) ++ ++(define_insn "<zs>unpkd830_imp" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI ++ (extend:HI ++ (vec_select:QI ++ (match_operand:V4QI 1 "register_operand" " r") ++ (parallel [(const_int 3)])))) ++ (vec_duplicate:V2HI ++ (extend:HI ++ (vec_select:QI ++ (match_dup 1) ++ (parallel [(const_int 0)])))) ++ (const_int 2)))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "<zs>unpkd830\t%0, %1" ++ [(set_attr "type" "dpack") ++ (set_attr "length" "4")]) ++ ++(define_insn "<zs>unpkd830_imp_inv" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI ++ (extend:HI ++ (vec_select:QI ++ (match_operand:V4QI 1 "register_operand" " r") ++ (parallel [(const_int 0)])))) ++ (vec_duplicate:V2HI ++ (extend:HI ++ (vec_select:QI ++ (match_dup 1) ++ (parallel [(const_int 3)])))) ++ (const_int 1)))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "<zs>unpkd830\t%0, %1" ++ [(set_attr "type" "dpack") ++ (set_attr "length" "4")]) ++ ++(define_insn "<zs>unpkd830_imp_be" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI ++ (extend:HI ++ (vec_select:QI ++ (match_operand:V4QI 1 "register_operand" " r") ++ (parallel [(const_int 0)])))) ++ (vec_duplicate:V2HI ++ (extend:HI ++ (vec_select:QI ++ (match_dup 1) ++ (parallel [(const_int 3)])))) ++ (const_int 1)))] ++ "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" ++ "<zs>unpkd830\t%0, %1" ++ [(set_attr "type" "dpack") ++ (set_attr "length" "4")]) ++ ++(define_insn "<zs>unpkd830_imp_inv_be" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI ++ (extend:HI ++ (vec_select:QI ++ (match_operand:V4QI 1 "register_operand" " r") ++ (parallel [(const_int 3)])))) ++ (vec_duplicate:V2HI ++ (extend:HI ++ (vec_select:QI ++ (match_dup 1) ++ (parallel [(const_int 0)])))) ++ (const_int 2)))] ++ "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" ++ "<zs>unpkd830\t%0, %1" ++ [(set_attr "type" "dpack") ++ (set_attr "length" "4")]) ++ ++(define_expand "sunpkd831" ++ [(match_operand:V2HI 0 "register_operand") ++ (match_operand:V4QI 1 "register_operand")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ emit_insn (gen_sunpkd831_imp_be (operands[0], operands[1])); ++ else ++ emit_insn (gen_sunpkd831_imp (operands[0], operands[1])); ++ DONE; ++}) ++ ++(define_insn "<zs>unpkd831_imp" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI ++ (extend:HI ++ (vec_select:QI ++ (match_operand:V4QI 1 "register_operand" " r") ++ (parallel [(const_int 3)])))) ++ (vec_duplicate:V2HI ++ (extend:HI ++ (vec_select:QI ++ (match_dup 1) ++ (parallel [(const_int 1)])))) ++ (const_int 2)))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "<zs>unpkd831\t%0, %1" ++ [(set_attr "type" "dpack") ++ (set_attr "length" "4")]) ++ ++(define_insn "<zs>unpkd831_imp_inv" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI ++ (extend:HI ++ (vec_select:QI ++ (match_operand:V4QI 1 "register_operand" " r") ++ (parallel [(const_int 1)])))) ++ (vec_duplicate:V2HI ++ (extend:HI ++ (vec_select:QI ++ (match_dup 1) ++ (parallel [(const_int 3)])))) ++ (const_int 1)))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "<zs>unpkd831\t%0, %1" ++ [(set_attr "type" "dpack") ++ (set_attr "length" "4")]) ++ ++(define_insn "<zs>unpkd831_imp_be" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI ++ (extend:HI ++ (vec_select:QI ++ (match_operand:V4QI 1 "register_operand" " r") ++ (parallel [(const_int 0)])))) ++ (vec_duplicate:V2HI ++ (extend:HI ++ (vec_select:QI ++ (match_dup 1) ++ (parallel [(const_int 2)])))) ++ (const_int 1)))] ++ "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" ++ "<zs>unpkd831\t%0, %1" ++ [(set_attr "type" "dpack") ++ (set_attr "length" "4")]) ++ ++(define_insn "<zs>unpkd831_imp_inv_be" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI ++ (extend:HI ++ (vec_select:QI ++ (match_operand:V4QI 1 "register_operand" " r") ++ (parallel [(const_int 2)])))) ++ (vec_duplicate:V2HI ++ (extend:HI ++ (vec_select:QI ++ (match_dup 1) ++ (parallel [(const_int 0)])))) ++ (const_int 2)))] ++ "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" ++ "<zs>unpkd831\t%0, %1" ++ [(set_attr "type" "dpack") ++ (set_attr "length" "4")]) ++ ++(define_expand "zunpkd810" ++ [(match_operand:V2HI 0 "register_operand") ++ (match_operand:V4QI 1 "register_operand")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ emit_insn (gen_zunpkd810_imp_be (operands[0], operands[1])); ++ else ++ emit_insn (gen_zunpkd810_imp (operands[0], operands[1])); ++ DONE; ++}) ++ ++(define_expand "zunpkd820" ++ [(match_operand:V2HI 0 "register_operand") ++ (match_operand:V4QI 1 "register_operand")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ emit_insn (gen_zunpkd820_imp_be (operands[0], operands[1])); ++ else ++ emit_insn (gen_zunpkd820_imp (operands[0], operands[1])); ++ DONE; ++}) ++ ++(define_expand "zunpkd830" ++ [(match_operand:V2HI 0 "register_operand") ++ (match_operand:V4QI 1 "register_operand")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ emit_insn (gen_zunpkd830_imp_be (operands[0], operands[1])); ++ else ++ emit_insn (gen_zunpkd830_imp (operands[0], operands[1])); ++ DONE; ++}) ++ ++(define_expand "zunpkd831" ++ [(match_operand:V2HI 0 "register_operand") ++ (match_operand:V4QI 1 "register_operand")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ emit_insn (gen_zunpkd831_imp_be (operands[0], operands[1])); ++ else ++ emit_insn (gen_zunpkd831_imp (operands[0], operands[1])); ++ DONE; ++}) ++ ++(define_expand "smbb" ++ [(match_operand:SI 0 "register_operand" "") ++ (match_operand:V2HI 1 "register_operand" "") ++ (match_operand:V2HI 2 "register_operand" "")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ emit_insn (gen_mulhisi3v (operands[0], operands[1], operands[2], ++ GEN_INT (1), GEN_INT (1))); ++ else ++ emit_insn (gen_mulhisi3v (operands[0], operands[1], operands[2], ++ GEN_INT (0), GEN_INT (0))); ++ DONE; ++}) ++ ++(define_expand "smbt" ++ [(match_operand:SI 0 "register_operand" "") ++ (match_operand:V2HI 1 "register_operand" "") ++ (match_operand:V2HI 2 "register_operand" "")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ emit_insn (gen_mulhisi3v (operands[0], operands[1], operands[2], ++ GEN_INT (1), GEN_INT (0))); ++ else ++ emit_insn (gen_mulhisi3v (operands[0], operands[1], operands[2], ++ GEN_INT (0), GEN_INT (1))); ++ DONE; ++}) ++ ++(define_expand "smtt" ++ [(match_operand:SI 0 "register_operand" "") ++ (match_operand:V2HI 1 "register_operand" "") ++ (match_operand:V2HI 2 "register_operand" "")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ emit_insn (gen_mulhisi3v (operands[0], operands[1], operands[2], ++ GEN_INT (0), GEN_INT (0))); ++ else ++ emit_insn (gen_mulhisi3v (operands[0], operands[1], operands[2], ++ GEN_INT (1), GEN_INT (1))); ++ DONE; ++}) ++ ++(define_insn "mulhisi3v" ++ [(set (match_operand:SI 0 "register_operand" "= r, r, r, r") ++ (mult:SI ++ (sign_extend:SI ++ (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" " r, r, r, r") ++ (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iv00, Iv00, Iv01, Iv01")]))) ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r, r, r, r") ++ (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iv00, Iv01, Iv01, Iv00")])))))] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ { ++ const char *pats[] = { "smtt\t%0, %1, %2", ++ "smbt\t%0, %2, %1", ++ "smbb\t%0, %1, %2", ++ "smbt\t%0, %1, %2" }; ++ return pats[which_alternative]; ++ } ++ else ++ { ++ const char *pats[] = { "smbb\t%0, %1, %2", ++ "smbt\t%0, %1, %2", ++ "smtt\t%0, %1, %2", ++ "smbt\t%0, %2, %1" }; ++ return pats[which_alternative]; ++ } ++} ++ [(set_attr "type" "dmul") ++ (set_attr "length" "4")]) ++ ++(define_expand "kmabb" ++ [(match_operand:SI 0 "register_operand" "") ++ (match_operand:SI 1 "register_operand" "") ++ (match_operand:V2HI 2 "register_operand" "") ++ (match_operand:V2HI 3 "register_operand" "")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ emit_insn (gen_kma_internal (operands[0], operands[2], operands[3], ++ GEN_INT (1), GEN_INT (1), ++ operands[1])); ++ else ++ emit_insn (gen_kma_internal (operands[0], operands[2], operands[3], ++ GEN_INT (0), GEN_INT (0), ++ operands[1])); ++ DONE; ++}) ++ ++(define_expand "kmabt" ++ [(match_operand:SI 0 "register_operand" "") ++ (match_operand:SI 1 "register_operand" "") ++ (match_operand:V2HI 2 "register_operand" "") ++ (match_operand:V2HI 3 "register_operand" "")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ emit_insn (gen_kma_internal (operands[0], operands[2], operands[3], ++ GEN_INT (1), GEN_INT (0), ++ operands[1])); ++ else ++ emit_insn (gen_kma_internal (operands[0], operands[2], operands[3], ++ GEN_INT (0), GEN_INT (1), ++ operands[1])); ++ DONE; ++}) ++ ++(define_expand "kmatt" ++ [(match_operand:SI 0 "register_operand" "") ++ (match_operand:SI 1 "register_operand" "") ++ (match_operand:V2HI 2 "register_operand" "") ++ (match_operand:V2HI 3 "register_operand" "")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ emit_insn (gen_kma_internal (operands[0], operands[2], operands[3], ++ GEN_INT (0), GEN_INT (0), ++ operands[1])); ++ else ++ emit_insn (gen_kma_internal (operands[0], operands[2], operands[3], ++ GEN_INT (1), GEN_INT (1), ++ operands[1])); ++ DONE; ++}) ++ ++(define_insn "kma_internal" ++ [(set (match_operand:SI 0 "register_operand" "= r, r, r, r") ++ (ss_plus:SI ++ (mult:SI ++ (sign_extend:SI ++ (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" " r, r, r, r") ++ (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iv00, Iv00, Iv01, Iv01")]))) ++ (sign_extend:SI ++ (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r, r, r, r") ++ (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iv00, Iv01, Iv01, Iv00")])))) ++ (match_operand:SI 5 "register_operand" " 0, 0, 0, 0")))] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ { ++ const char *pats[] = { "kmatt\t%0, %1, %2", ++ "kmabt\t%0, %2, %1", ++ "kmabb\t%0, %1, %2", ++ "kmabt\t%0, %1, %2" }; ++ return pats[which_alternative]; ++ } ++ else ++ { ++ const char *pats[] = { "kmabb\t%0, %1, %2", ++ "kmabt\t%0, %1, %2", ++ "kmatt\t%0, %1, %2", ++ "kmabt\t%0, %2, %1" }; ++ return pats[which_alternative]; ++ } ++} ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_expand "smds" ++ [(match_operand:SI 0 "register_operand" "") ++ (match_operand:V2HI 1 "register_operand" "") ++ (match_operand:V2HI 2 "register_operand" "")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ emit_insn (gen_smds_be (operands[0], operands[1], operands[2])); ++ else ++ emit_insn (gen_smds_le (operands[0], operands[1], operands[2])); ++ DONE; ++}) ++ ++(define_expand "smds_le" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (minus:SI ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" " r") ++ (parallel [(const_int 1)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 1)])))) ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_dup 1) ++ (parallel [(const_int 0)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 0)]))))))] ++ "NDS32_EXT_DSP_P ()" ++{ ++}) ++ ++(define_expand "smds_be" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (minus:SI ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" " r") ++ (parallel [(const_int 0)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 0)])))) ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_dup 1) ++ (parallel [(const_int 1)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 1)]))))))] ++ "NDS32_EXT_DSP_P ()" ++{ ++}) ++ ++(define_expand "smdrs" ++ [(match_operand:SI 0 "register_operand" "") ++ (match_operand:V2HI 1 "register_operand" "") ++ (match_operand:V2HI 2 "register_operand" "")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ emit_insn (gen_smdrs_be (operands[0], operands[1], operands[2])); ++ else ++ emit_insn (gen_smdrs_le (operands[0], operands[1], operands[2])); ++ DONE; ++}) ++ ++(define_expand "smdrs_le" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (minus:SI ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" " r") ++ (parallel [(const_int 0)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 0)])))) ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_dup 1) ++ (parallel [(const_int 1)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 1)]))))))] ++ "NDS32_EXT_DSP_P ()" ++{ ++}) ++ ++(define_expand "smdrs_be" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (minus:SI ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" " r") ++ (parallel [(const_int 1)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 1)])))) ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_dup 1) ++ (parallel [(const_int 0)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 0)]))))))] ++ "NDS32_EXT_DSP_P ()" ++{ ++}) ++ ++(define_expand "smxdsv" ++ [(match_operand:SI 0 "register_operand" "") ++ (match_operand:V2HI 1 "register_operand" "") ++ (match_operand:V2HI 2 "register_operand" "")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ emit_insn (gen_smxdsv_be (operands[0], operands[1], operands[2])); ++ else ++ emit_insn (gen_smxdsv_le (operands[0], operands[1], operands[2])); ++ DONE; ++}) ++ ++ ++(define_expand "smxdsv_le" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (minus:SI ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" " r") ++ (parallel [(const_int 1)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 0)])))) ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_dup 1) ++ (parallel [(const_int 0)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 1)]))))))] ++ "NDS32_EXT_DSP_P ()" ++{ ++}) ++ ++(define_expand "smxdsv_be" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (minus:SI ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" " r") ++ (parallel [(const_int 0)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 1)])))) ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_dup 1) ++ (parallel [(const_int 1)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 0)]))))))] ++ "NDS32_EXT_DSP_P ()" ++{ ++}) ++ ++(define_insn "smal1" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (plus:DI (match_operand:DI 1 "register_operand" " r") ++ (sign_extend:DI ++ (mult:SI ++ (sign_extend:SI ++ (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 0)]))) ++ (sign_extend:SI ++ (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 1)])))))))] ++ "NDS32_EXT_DSP_P ()" ++ "smal\t%0, %1, %2" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_insn "smal2" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (plus:DI (match_operand:DI 1 "register_operand" " r") ++ (mult:DI ++ (sign_extend:DI ++ (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 0)]))) ++ (sign_extend:DI ++ (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 1)]))))))] ++ "NDS32_EXT_DSP_P ()" ++ "smal\t%0, %1, %2" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_insn "smal3" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (plus:DI (match_operand:DI 1 "register_operand" " r") ++ (sign_extend:DI ++ (mult:SI ++ (sign_extend:SI ++ (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 1)]))) ++ (sign_extend:SI ++ (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 0)])))))))] ++ "NDS32_EXT_DSP_P ()" ++ "smal\t%0, %1, %2" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_insn "smal4" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (plus:DI (match_operand:DI 1 "register_operand" " r") ++ (mult:DI ++ (sign_extend:DI ++ (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 1)]))) ++ (sign_extend:DI ++ (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 0)]))))))] ++ "NDS32_EXT_DSP_P ()" ++ "smal\t%0, %1, %2" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_insn "smal5" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (plus:DI ++ (sign_extend:DI ++ (mult:SI ++ (sign_extend:SI ++ (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 0)]))) ++ (sign_extend:SI ++ (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 1)]))))) ++ (match_operand:DI 1 "register_operand" " r")))] ++ "NDS32_EXT_DSP_P ()" ++ "smal\t%0, %1, %2" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_insn "smal6" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (plus:DI ++ (mult:DI ++ (sign_extend:DI ++ (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 0)]))) ++ (sign_extend:DI ++ (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 1)])))) ++ (match_operand:DI 1 "register_operand" " r")))] ++ "NDS32_EXT_DSP_P ()" ++ "smal\t%0, %1, %2" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_insn "smal7" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (plus:DI ++ (sign_extend:DI ++ (mult:SI ++ (sign_extend:SI ++ (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 1)]))) ++ (sign_extend:SI ++ (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 0)]))))) ++ (match_operand:DI 1 "register_operand" " r")))] ++ "NDS32_EXT_DSP_P ()" ++ "smal\t%0, %1, %2" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_insn "smal8" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (plus:DI ++ (mult:DI ++ (sign_extend:DI ++ (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 1)]))) ++ (sign_extend:DI ++ (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 0)])))) ++ (match_operand:DI 1 "register_operand" " r")))] ++ "NDS32_EXT_DSP_P ()" ++ "smal\t%0, %1, %2" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++;; We need this dummy pattern for smal ++(define_insn_and_split "extendsidi2" ++ [(set (match_operand:DI 0 "register_operand" "") ++ (sign_extend:DI (match_operand:SI 1 "nds32_move_operand" "")))] ++ "NDS32_EXT_DSP_P ()" ++ "#" ++ "NDS32_EXT_DSP_P ()" ++ [(const_int 0)] ++{ ++ rtx high_part_dst, low_part_dst; ++ ++ low_part_dst = nds32_di_low_part_subreg (operands[0]); ++ high_part_dst = nds32_di_high_part_subreg (operands[0]); ++ ++ emit_move_insn (low_part_dst, operands[1]); ++ emit_insn (gen_ashrsi3 (high_part_dst, low_part_dst, GEN_INT (31))); ++ DONE; ++} ++ [(set_attr "type" "alu") ++ (set_attr "length" "4")]) ++ ++;; We need this dummy pattern for usmar64/usmsr64 ++(define_insn_and_split "zero_extendsidi2" ++ [(set (match_operand:DI 0 "register_operand" "") ++ (zero_extend:DI (match_operand:SI 1 "nds32_move_operand" "")))] ++ "NDS32_EXT_DSP_P ()" ++ "#" ++ "NDS32_EXT_DSP_P ()" ++ [(const_int 0)] ++{ ++ rtx high_part_dst, low_part_dst; ++ ++ low_part_dst = nds32_di_low_part_subreg (operands[0]); ++ high_part_dst = nds32_di_high_part_subreg (operands[0]); ++ ++ emit_move_insn (low_part_dst, operands[1]); ++ emit_move_insn (high_part_dst, const0_rtx); ++ DONE; ++} ++ [(set_attr "type" "alu") ++ (set_attr "length" "4")]) ++ ++(define_insn_and_split "extendhidi2" ++ [(set (match_operand:DI 0 "register_operand" "") ++ (sign_extend:DI (match_operand:HI 1 "nonimmediate_operand" "")))] ++ "NDS32_EXT_DSP_P ()" ++ "#" ++ "NDS32_EXT_DSP_P ()" ++ [(const_int 0)] ++{ ++ rtx high_part_dst, low_part_dst; ++ ++ low_part_dst = nds32_di_low_part_subreg (operands[0]); ++ high_part_dst = nds32_di_high_part_subreg (operands[0]); ++ ++ ++ emit_insn (gen_extendhisi2 (low_part_dst, operands[1])); ++ emit_insn (gen_ashrsi3 (high_part_dst, low_part_dst, GEN_INT (31))); ++ DONE; ++} ++ [(set_attr "type" "alu") ++ (set_attr "length" "4")]) ++ ++(define_insn "extendqihi2" ++ [(set (match_operand:HI 0 "register_operand" "=r") ++ (sign_extend:HI (match_operand:QI 1 "register_operand" " r")))] ++ "NDS32_EXT_DSP_P ()" ++ "sunpkd820\t%0, %1" ++ [(set_attr "type" "dpack") ++ (set_attr "length" "4")]) ++ ++(define_insn "smulsi3_highpart" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (truncate:SI ++ (lshiftrt:DI ++ (mult:DI ++ (sign_extend:DI (match_operand:SI 1 "register_operand" " r")) ++ (sign_extend:DI (match_operand:SI 2 "register_operand" " r"))) ++ (const_int 32))))] ++ "NDS32_EXT_DSP_P ()" ++ "smmul\t%0, %1, %2" ++ [(set_attr "type" "dmul") ++ (set_attr "length" "4")]) ++ ++(define_insn "smmul_round" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (truncate:SI ++ (lshiftrt:DI ++ (unspec:DI [(mult:DI ++ (sign_extend:DI (match_operand:SI 1 "register_operand" " r")) ++ (sign_extend:DI (match_operand:SI 2 "register_operand" " r")))] ++ UNSPEC_ROUND) ++ (const_int 32))))] ++ "NDS32_EXT_DSP_P ()" ++ "smmul.u\t%0, %1, %2" ++ [(set_attr "type" "dmul") ++ (set_attr "length" "4")]) ++ ++(define_insn "kmmac" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (ss_plus:SI (match_operand:SI 1 "register_operand" " 0") ++ (truncate:SI ++ (lshiftrt:DI ++ (mult:DI ++ (sign_extend:DI (match_operand:SI 2 "register_operand" " r")) ++ (sign_extend:DI (match_operand:SI 3 "register_operand" " r"))) ++ (const_int 32)))))] ++ "NDS32_EXT_DSP_P ()" ++ "kmmac\t%0, %2, %3" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_insn "kmmac_round" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (ss_plus:SI (match_operand:SI 1 "register_operand" " 0") ++ (truncate:SI ++ (lshiftrt:DI ++ (unspec:DI [(mult:DI ++ (sign_extend:DI (match_operand:SI 2 "register_operand" " r")) ++ (sign_extend:DI (match_operand:SI 3 "register_operand" " r")))] ++ UNSPEC_ROUND) ++ (const_int 32)))))] ++ "NDS32_EXT_DSP_P ()" ++ "kmmac.u\t%0, %2, %3" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_insn "kmmsb" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (ss_minus:SI (match_operand:SI 1 "register_operand" " 0") ++ (truncate:SI ++ (lshiftrt:DI ++ (mult:DI ++ (sign_extend:DI (match_operand:SI 2 "register_operand" " r")) ++ (sign_extend:DI (match_operand:SI 3 "register_operand" " r"))) ++ (const_int 32)))))] ++ "NDS32_EXT_DSP_P ()" ++ "kmmsb\t%0, %2, %3" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_insn "kmmsb_round" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (ss_minus:SI (match_operand:SI 1 "register_operand" " 0") ++ (truncate:SI ++ (lshiftrt:DI ++ (unspec:DI [(mult:DI ++ (sign_extend:DI (match_operand:SI 2 "register_operand" " r")) ++ (sign_extend:DI (match_operand:SI 3 "register_operand" " r")))] ++ UNSPEC_ROUND) ++ (const_int 32)))))] ++ "NDS32_EXT_DSP_P ()" ++ "kmmsb.u\t%0, %2, %3" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_insn "kwmmul" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (truncate:SI ++ (lshiftrt:DI ++ (ss_mult:DI ++ (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" " r")) (const_int 2)) ++ (mult:DI (sign_extend:DI (match_operand:SI 2 "register_operand" " r")) (const_int 2))) ++ (const_int 32))))] ++ "NDS32_EXT_DSP_P ()" ++ "kwmmul\t%0, %1, %2" ++ [(set_attr "type" "dmul") ++ (set_attr "length" "4")]) ++ ++(define_insn "kwmmul_round" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (truncate:SI ++ (lshiftrt:DI ++ (unspec:DI [ ++ (ss_mult:DI ++ (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" " r")) (const_int 2)) ++ (mult:DI (sign_extend:DI (match_operand:SI 2 "register_operand" " r")) (const_int 2)))] ++ UNSPEC_ROUND) ++ (const_int 32))))] ++ "NDS32_EXT_DSP_P ()" ++ "kwmmul.u\t%0, %1, %2" ++ [(set_attr "type" "dmul") ++ (set_attr "length" "4")]) ++ ++(define_expand "smmwb" ++ [(match_operand:SI 0 "register_operand" "") ++ (match_operand:SI 1 "register_operand" "") ++ (match_operand:V2HI 2 "register_operand" "")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ emit_insn (gen_smulhisi3_highpart_1 (operands[0], operands[1], operands[2], GEN_INT (1))); ++ else ++ emit_insn (gen_smulhisi3_highpart_1 (operands[0], operands[1], operands[2], GEN_INT (0))); ++ DONE; ++}) ++ ++(define_expand "smmwt" ++ [(match_operand:SI 0 "register_operand" "") ++ (match_operand:SI 1 "register_operand" "") ++ (match_operand:V2HI 2 "register_operand" "")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ emit_insn (gen_smulhisi3_highpart_1 (operands[0], operands[1], operands[2], GEN_INT (0))); ++ else ++ emit_insn (gen_smulhisi3_highpart_1 (operands[0], operands[1], operands[2], GEN_INT (1))); ++ DONE; ++}) ++ ++(define_insn "smulhisi3_highpart_1" ++ [(set (match_operand:SI 0 "register_operand" "= r, r") ++ (truncate:SI ++ (lshiftrt:DI ++ (mult:DI ++ (sign_extend:DI (match_operand:SI 1 "register_operand" " r, r")) ++ (sign_extend:DI ++ (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r, r") ++ (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iv00, Iv01")])))) ++ (const_int 16))))] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ { ++ const char *pats[] = { "smmwt\t%0, %1, %2", ++ "smmwb\t%0, %1, %2" }; ++ return pats[which_alternative]; ++ } ++ else ++ { ++ const char *pats[] = { "smmwb\t%0, %1, %2", ++ "smmwt\t%0, %1, %2" }; ++ return pats[which_alternative]; ++ } ++} ++ [(set_attr "type" "dmul") ++ (set_attr "length" "4")]) ++ ++(define_insn "smulhisi3_highpart_2" ++ [(set (match_operand:SI 0 "register_operand" "= r, r") ++ (truncate:SI ++ (lshiftrt:DI ++ (mult:DI ++ (sign_extend:DI ++ (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" " r, r") ++ (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iv00, Iv01")]))) ++ (sign_extend:DI (match_operand:SI 2 "register_operand" " r, r"))) ++ (const_int 16))))] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ { ++ const char *pats[] = { "smmwt\t%0, %1, %2", ++ "smmwb\t%0, %1, %2" }; ++ return pats[which_alternative]; ++ } ++ else ++ { ++ const char *pats[] = { "smmwb\t%0, %1, %2", ++ "smmwt\t%0, %1, %2" }; ++ return pats[which_alternative]; ++ } ++} ++ [(set_attr "type" "dmul") ++ (set_attr "length" "4")]) ++ ++(define_expand "smmwb_round" ++ [(match_operand:SI 0 "register_operand" "") ++ (match_operand:SI 1 "register_operand" "") ++ (match_operand:V2HI 2 "register_operand" "")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ emit_insn (gen_smmw_round_internal (operands[0], operands[1], operands[2], GEN_INT (1))); ++ else ++ emit_insn (gen_smmw_round_internal (operands[0], operands[1], operands[2], GEN_INT (0))); ++ DONE; ++}) ++ ++(define_expand "smmwt_round" ++ [(match_operand:SI 0 "register_operand" "") ++ (match_operand:SI 1 "register_operand" "") ++ (match_operand:V2HI 2 "register_operand" "")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ emit_insn (gen_smmw_round_internal (operands[0], operands[1], operands[2], GEN_INT (0))); ++ else ++ emit_insn (gen_smmw_round_internal (operands[0], operands[1], operands[2], GEN_INT (1))); ++ DONE; ++}) ++ ++(define_insn "smmw_round_internal" ++ [(set (match_operand:SI 0 "register_operand" "= r, r") ++ (truncate:SI ++ (lshiftrt:DI ++ (unspec:DI ++ [(mult:DI ++ (sign_extend:DI (match_operand:SI 1 "register_operand" " r, r")) ++ (sign_extend:DI ++ (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r, r") ++ (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iv00, Iv01")]))))] ++ UNSPEC_ROUND) ++ (const_int 16))))] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ { ++ const char *pats[] = { "smmwt.u\t%0, %1, %2", ++ "smmwb.u\t%0, %1, %2" }; ++ return pats[which_alternative]; ++ } ++ else ++ { ++ const char *pats[] = { "smmwb.u\t%0, %1, %2", ++ "smmwt.u\t%0, %1, %2" }; ++ return pats[which_alternative]; ++ } ++} ++ [(set_attr "type" "dmul") ++ (set_attr "length" "4")]) ++ ++(define_expand "kmmawb" ++ [(match_operand:SI 0 "register_operand" "") ++ (match_operand:SI 1 "register_operand" "") ++ (match_operand:SI 2 "register_operand" "") ++ (match_operand:V2HI 3 "register_operand" "")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ emit_insn (gen_kmmaw_internal (operands[0], operands[2], operands[3], GEN_INT (1), operands[1])); ++ else ++ emit_insn (gen_kmmaw_internal (operands[0], operands[2], operands[3], GEN_INT (0), operands[1])); ++ DONE; ++}) ++ ++(define_expand "kmmawt" ++ [(match_operand:SI 0 "register_operand" "") ++ (match_operand:SI 1 "register_operand" "") ++ (match_operand:SI 2 "register_operand" "") ++ (match_operand:V2HI 3 "register_operand" "")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ emit_insn (gen_kmmaw_internal (operands[0], operands[2], operands[3], GEN_INT (0), operands[1])); ++ else ++ emit_insn (gen_kmmaw_internal (operands[0], operands[2], operands[3], GEN_INT (1), operands[1])); ++ DONE; ++}) ++ ++(define_insn "kmmaw_internal" ++ [(set (match_operand:SI 0 "register_operand" "= r, r") ++ (ss_plus:SI ++ (match_operand:SI 4 "register_operand" " 0, 0") ++ (truncate:SI ++ (lshiftrt:DI ++ (mult:DI ++ (sign_extend:DI (match_operand:SI 1 "register_operand" " r, r")) ++ (sign_extend:DI ++ (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r, r") ++ (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iv00, Iv01")])))) ++ (const_int 16)))))] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ { ++ const char *pats[] = { "kmmawt\t%0, %1, %2", ++ "kmmawb\t%0, %1, %2" }; ++ return pats[which_alternative]; ++ } ++ else ++ { ++ const char *pats[] = { "kmmawb\t%0, %1, %2", ++ "kmmawt\t%0, %1, %2" }; ++ return pats[which_alternative]; ++ } ++} ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_expand "kmmawb_round" ++ [(match_operand:SI 0 "register_operand" "") ++ (match_operand:SI 1 "register_operand" "") ++ (match_operand:SI 2 "register_operand" "") ++ (match_operand:V2HI 3 "register_operand" "")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ emit_insn (gen_kmmaw_round_internal (operands[0], operands[2], operands[3], GEN_INT (1), operands[1])); ++ else ++ emit_insn (gen_kmmaw_round_internal (operands[0], operands[2], operands[3], GEN_INT (0), operands[1])); ++ DONE; ++} ++ [(set_attr "type" "alu") ++ (set_attr "length" "4")]) ++ ++(define_expand "kmmawt_round" ++ [(match_operand:SI 0 "register_operand" "") ++ (match_operand:SI 1 "register_operand" "") ++ (match_operand:SI 2 "register_operand" "") ++ (match_operand:V2HI 3 "register_operand" "")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ emit_insn (gen_kmmaw_round_internal (operands[0], operands[2], operands[3], GEN_INT (0), operands[1])); ++ else ++ emit_insn (gen_kmmaw_round_internal (operands[0], operands[2], operands[3], GEN_INT (1), operands[1])); ++ DONE; ++} ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++ ++(define_insn "kmmaw_round_internal" ++ [(set (match_operand:SI 0 "register_operand" "= r, r") ++ (ss_plus:SI ++ (match_operand:SI 4 "register_operand" " 0, 0") ++ (truncate:SI ++ (lshiftrt:DI ++ (unspec:DI ++ [(mult:DI ++ (sign_extend:DI (match_operand:SI 1 "register_operand" " r, r")) ++ (sign_extend:DI ++ (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r, r") ++ (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iv00, Iv01")]))))] ++ UNSPEC_ROUND) ++ (const_int 16)))))] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ { ++ const char *pats[] = { "kmmawt.u\t%0, %1, %2", ++ "kmmawb.u\t%0, %1, %2" }; ++ return pats[which_alternative]; ++ } ++ else ++ { ++ const char *pats[] = { "kmmawb.u\t%0, %1, %2", ++ "kmmawt.u\t%0, %1, %2" }; ++ return pats[which_alternative]; ++ } ++} ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_expand "smalbb" ++ [(match_operand:DI 0 "register_operand" "") ++ (match_operand:DI 1 "register_operand" "") ++ (match_operand:V2HI 2 "register_operand" "") ++ (match_operand:V2HI 3 "register_operand" "")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ emit_insn (gen_smaddhidi (operands[0], operands[2], ++ operands[3], operands[1], ++ GEN_INT (1), GEN_INT (1))); ++ else ++ emit_insn (gen_smaddhidi (operands[0], operands[2], ++ operands[3], operands[1], ++ GEN_INT (0), GEN_INT (0))); ++ DONE; ++}) ++ ++(define_expand "smalbt" ++ [(match_operand:DI 0 "register_operand" "") ++ (match_operand:DI 1 "register_operand" "") ++ (match_operand:V2HI 2 "register_operand" "") ++ (match_operand:V2HI 3 "register_operand" "")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ emit_insn (gen_smaddhidi (operands[0], operands[2], ++ operands[3], operands[1], ++ GEN_INT (1), GEN_INT (0))); ++ else ++ emit_insn (gen_smaddhidi (operands[0], operands[2], ++ operands[3], operands[1], ++ GEN_INT (0), GEN_INT (1))); ++ DONE; ++}) ++ ++(define_expand "smaltt" ++ [(match_operand:DI 0 "register_operand" "") ++ (match_operand:DI 1 "register_operand" "") ++ (match_operand:V2HI 2 "register_operand" "") ++ (match_operand:V2HI 3 "register_operand" "")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ emit_insn (gen_smaddhidi (operands[0], operands[2], ++ operands[3], operands[1], ++ GEN_INT (0), GEN_INT (0))); ++ else ++ emit_insn (gen_smaddhidi (operands[0], operands[2], ++ operands[3], operands[1], ++ GEN_INT (1), GEN_INT (1))); ++ DONE; ++}) ++ ++(define_insn "smaddhidi" ++ [(set (match_operand:DI 0 "register_operand" "= r, r, r, r") ++ (plus:DI ++ (match_operand:DI 3 "register_operand" " 0, 0, 0, 0") ++ (mult:DI ++ (sign_extend:DI ++ (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" " r, r, r, r") ++ (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iv00, Iv00, Iv01, Iv01")]))) ++ (sign_extend:DI ++ (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r, r, r, r") ++ (parallel [(match_operand:SI 5 "nds32_imm_0_1_operand" " Iv00, Iv01, Iv01, Iv00")]))))))] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ { ++ const char *pats[] = { "smaltt\t%0, %1, %2", ++ "smalbt\t%0, %2, %1", ++ "smalbb\t%0, %1, %2", ++ "smalbt\t%0, %1, %2" }; ++ return pats[which_alternative]; ++ } ++ else ++ { ++ const char *pats[] = { "smalbb\t%0, %1, %2", ++ "smalbt\t%0, %1, %2", ++ "smaltt\t%0, %1, %2", ++ "smalbt\t%0, %2, %1" }; ++ return pats[which_alternative]; ++ } ++} ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_insn "smaddhidi2" ++ [(set (match_operand:DI 0 "register_operand" "= r, r, r, r") ++ (plus:DI ++ (mult:DI ++ (sign_extend:DI ++ (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" " r, r, r, r") ++ (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iv00, Iv00, Iv01, Iv01")]))) ++ (sign_extend:DI ++ (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r, r, r, r") ++ (parallel [(match_operand:SI 5 "nds32_imm_0_1_operand" " Iv00, Iv01, Iv01, Iv00")])))) ++ (match_operand:DI 3 "register_operand" " 0, 0, 0, 0")))] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ { ++ const char *pats[] = { "smaltt\t%0, %1, %2", ++ "smalbt\t%0, %2, %1", ++ "smalbb\t%0, %1, %2", ++ "smalbt\t%0, %1, %2" }; ++ return pats[which_alternative]; ++ } ++ else ++ { ++ const char *pats[] = { "smalbb\t%0, %1, %2", ++ "smalbt\t%0, %1, %2", ++ "smaltt\t%0, %1, %2", ++ "smalbt\t%0, %2, %1" }; ++ return pats[which_alternative]; ++ } ++} ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_expand "smalda1" ++ [(match_operand:DI 0 "register_operand" "") ++ (match_operand:DI 1 "register_operand" "") ++ (match_operand:V2HI 2 "register_operand" " r") ++ (match_operand:V2HI 3 "register_operand" " r")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ emit_insn (gen_smalda1_be (operands[0], operands[1], operands[2], operands[3])); ++ else ++ emit_insn (gen_smalda1_le (operands[0], operands[1], operands[2], operands[3])); ++ DONE; ++}) ++ ++(define_expand "smalds1" ++ [(match_operand:DI 0 "register_operand" "") ++ (match_operand:DI 1 "register_operand" "") ++ (match_operand:V2HI 2 "register_operand" " r") ++ (match_operand:V2HI 3 "register_operand" " r")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ emit_insn (gen_smalds1_be (operands[0], operands[1], operands[2], operands[3])); ++ else ++ emit_insn (gen_smalds1_le (operands[0], operands[1], operands[2], operands[3])); ++ DONE; ++}) ++ ++(define_insn "smalda1_le" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (plus:DI ++ (match_operand:DI 1 "register_operand" " 0") ++ (sign_extend:DI ++ (plus:SI ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 1)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 3 "register_operand" " r") ++ (parallel [(const_int 1)])))) ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 0)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_dup 3) ++ (parallel [(const_int 0)]))))))))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "smalda\t%0, %2, %3" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_insn "smalds1_le" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (plus:DI ++ (match_operand:DI 1 "register_operand" " 0") ++ (sign_extend:DI ++ (minus:SI ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 1)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 3 "register_operand" " r") ++ (parallel [(const_int 1)])))) ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 0)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_dup 3) ++ (parallel [(const_int 0)]))))))))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "smalds\t%0, %2, %3" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_insn "smalda1_be" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (plus:DI ++ (match_operand:DI 1 "register_operand" " 0") ++ (sign_extend:DI ++ (plus:SI ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 0)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 3 "register_operand" " r") ++ (parallel [(const_int 0)])))) ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 1)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_dup 3) ++ (parallel [(const_int 1)]))))))))] ++ "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" ++ "smalda\t%0, %2, %3" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_insn "smalds1_be" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (plus:DI ++ (match_operand:DI 1 "register_operand" " 0") ++ (sign_extend:DI ++ (minus:SI ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 0)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 3 "register_operand" " r") ++ (parallel [(const_int 0)])))) ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 1)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_dup 3) ++ (parallel [(const_int 1)]))))))))] ++ "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" ++ "smalds\t%0, %2, %3" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_expand "smaldrs3" ++ [(match_operand:DI 0 "register_operand" "") ++ (match_operand:DI 1 "register_operand" "") ++ (match_operand:V2HI 2 "register_operand" " r") ++ (match_operand:V2HI 3 "register_operand" " r")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ emit_insn (gen_smaldrs3_be (operands[0], operands[1], operands[2], operands[3])); ++ else ++ emit_insn (gen_smaldrs3_le (operands[0], operands[1], operands[2], operands[3])); ++ DONE; ++}) ++ ++(define_insn "smaldrs3_le" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (plus:DI ++ (match_operand:DI 1 "register_operand" " 0") ++ (sign_extend:DI ++ (minus:SI ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 0)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 3 "register_operand" " r") ++ (parallel [(const_int 0)])))) ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 1)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_dup 3) ++ (parallel [(const_int 1)]))))))))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "smaldrs\t%0, %2, %3" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_insn "smaldrs3_be" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (plus:DI ++ (match_operand:DI 1 "register_operand" " 0") ++ (sign_extend:DI ++ (minus:SI ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 1)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 3 "register_operand" " r") ++ (parallel [(const_int 1)])))) ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 0)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_dup 3) ++ (parallel [(const_int 0)]))))))))] ++ "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" ++ "smaldrs\t%0, %2, %3" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_expand "smalxda1" ++ [(match_operand:DI 0 "register_operand" "") ++ (match_operand:DI 1 "register_operand" "") ++ (match_operand:V2HI 2 "register_operand" " r") ++ (match_operand:V2HI 3 "register_operand" " r")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ emit_insn (gen_smalxda1_be (operands[0], operands[1], operands[2], operands[3])); ++ else ++ emit_insn (gen_smalxda1_le (operands[0], operands[1], operands[2], operands[3])); ++ DONE; ++}) ++ ++(define_expand "smalxds1" ++ [(match_operand:DI 0 "register_operand" "") ++ (match_operand:DI 1 "register_operand" "") ++ (match_operand:V2HI 2 "register_operand" " r") ++ (match_operand:V2HI 3 "register_operand" " r")] ++ "NDS32_EXT_DSP_P ()" ++{ ++ if (TARGET_BIG_ENDIAN) ++ emit_insn (gen_smalxds1_be (operands[0], operands[1], operands[2], operands[3])); ++ else ++ emit_insn (gen_smalxds1_le (operands[0], operands[1], operands[2], operands[3])); ++ DONE; ++}) ++ ++(define_insn "smalxd<add_sub>1_le" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (plus:DI ++ (match_operand:DI 1 "register_operand" " 0") ++ (sign_extend:DI ++ (plus_minus:SI ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 1)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 3 "register_operand" " r") ++ (parallel [(const_int 0)])))) ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 0)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_dup 3) ++ (parallel [(const_int 1)]))))))))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "smalxd<add_sub>\t%0, %2, %3" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++ ++(define_insn "smalxd<add_sub>1_be" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (plus:DI ++ (match_operand:DI 1 "register_operand" " 0") ++ (sign_extend:DI ++ (plus_minus:SI ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 0)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 3 "register_operand" " r") ++ (parallel [(const_int 1)])))) ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 1)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_dup 3) ++ (parallel [(const_int 0)]))))))))] ++ "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" ++ "smalxd<add_sub>\t%0, %2, %3" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_insn "smslda1" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (minus:DI ++ (minus:DI ++ (match_operand:DI 1 "register_operand" " 0") ++ (sign_extend:DI ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 1)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 3 "register_operand" " r") ++ (parallel [(const_int 1)])))))) ++ (sign_extend:DI ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 0)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_dup 3) ++ (parallel [(const_int 0)])))))))] ++ "NDS32_EXT_DSP_P ()" ++ "smslda\t%0, %2, %3" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_insn "smslxda1" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (minus:DI ++ (minus:DI ++ (match_operand:DI 1 "register_operand" " 0") ++ (sign_extend:DI ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 1)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 3 "register_operand" " r") ++ (parallel [(const_int 0)])))))) ++ (sign_extend:DI ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 0)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_dup 3) ++ (parallel [(const_int 1)])))))))] ++ "NDS32_EXT_DSP_P ()" ++ "smslxda\t%0, %2, %3" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++;; mada for synthetize smalda ++(define_insn_and_split "mada1" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (plus:SI ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" "r") ++ (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iu01")]))) ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" "r") ++ (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iu01")])))) ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_dup 1) ++ (parallel [(match_operand:SI 5 "nds32_imm_0_1_operand" " Iu01")]))) ++ (sign_extend:SI (vec_select:HI ++ (match_dup 2) ++ (parallel [(match_operand:SI 6 "nds32_imm_0_1_operand" " Iu01")]))))))] ++ "NDS32_EXT_DSP_P () && !reload_completed" ++ "#" ++ "NDS32_EXT_DSP_P () && !reload_completed" ++ [(const_int 1)] ++{ ++ rtx result0 = gen_reg_rtx (SImode); ++ rtx result1 = gen_reg_rtx (SImode); ++ emit_insn (gen_mulhisi3v (result0, operands[1], operands[2], ++ operands[3], operands[4])); ++ emit_insn (gen_mulhisi3v (result1, operands[1], operands[2], ++ operands[5], operands[6])); ++ emit_insn (gen_addsi3 (operands[0], result0, result1)); ++ DONE; ++}) ++ ++(define_insn_and_split "mada2" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (plus:SI ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" "r") ++ (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iu01")]))) ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" "r") ++ (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iu01")])))) ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_dup 2) ++ (parallel [(match_operand:SI 5 "nds32_imm_0_1_operand" " Iu01")]))) ++ (sign_extend:SI (vec_select:HI ++ (match_dup 1) ++ (parallel [(match_operand:SI 6 "nds32_imm_0_1_operand" " Iu01")]))))))] ++ "NDS32_EXT_DSP_P () && !reload_completed" ++ "#" ++ "NDS32_EXT_DSP_P () && !reload_completed" ++ [(const_int 1)] ++{ ++ rtx result0 = gen_reg_rtx (SImode); ++ rtx result1 = gen_reg_rtx (SImode); ++ emit_insn (gen_mulhisi3v (result0, operands[1], operands[2], ++ operands[3], operands[4])); ++ emit_insn (gen_mulhisi3v (result1, operands[1], operands[2], ++ operands[6], operands[5])); ++ emit_insn (gen_addsi3 (operands[0], result0, result1)); ++ DONE; ++}) ++ ++;; sms for synthetize smalds ++(define_insn_and_split "sms1" ++ [(set (match_operand:SI 0 "register_operand" "= r") ++ (minus:SI ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" " r") ++ (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iu01")]))) ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iu01")])))) ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_dup 1) ++ (parallel [(match_operand:SI 5 "nds32_imm_0_1_operand" " Iu01")]))) ++ (sign_extend:SI (vec_select:HI ++ (match_dup 2) ++ (parallel [(match_operand:SI 6 "nds32_imm_0_1_operand" " Iu01")]))))))] ++ "NDS32_EXT_DSP_P () ++ && (!reload_completed ++ || !nds32_need_split_sms_p (operands[3], operands[4], ++ operands[5], operands[6]))" ++ ++{ ++ return nds32_output_sms (operands[3], operands[4], ++ operands[5], operands[6]); ++} ++ "NDS32_EXT_DSP_P () ++ && !reload_completed ++ && nds32_need_split_sms_p (operands[3], operands[4], ++ operands[5], operands[6])" ++ [(const_int 1)] ++{ ++ nds32_split_sms (operands[0], operands[1], operands[2], ++ operands[3], operands[4], ++ operands[5], operands[6]); ++ DONE; ++} ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_insn_and_split "sms2" ++ [(set (match_operand:SI 0 "register_operand" "= r") ++ (minus:SI ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" " r") ++ (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iu01")]))) ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iu01")])))) ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_dup 2) ++ (parallel [(match_operand:SI 5 "nds32_imm_0_1_operand" " Iu01")]))) ++ (sign_extend:SI (vec_select:HI ++ (match_dup 1) ++ (parallel [(match_operand:SI 6 "nds32_imm_0_1_operand" " Iu01")]))))))] ++ "NDS32_EXT_DSP_P () ++ && (!reload_completed ++ || !nds32_need_split_sms_p (operands[3], operands[4], ++ operands[6], operands[5]))" ++{ ++ return nds32_output_sms (operands[3], operands[4], ++ operands[6], operands[5]); ++} ++ "NDS32_EXT_DSP_P () ++ && !reload_completed ++ && nds32_need_split_sms_p (operands[3], operands[4], ++ operands[6], operands[5])" ++ [(const_int 1)] ++{ ++ nds32_split_sms (operands[0], operands[1], operands[2], ++ operands[3], operands[4], ++ operands[6], operands[5]); ++ DONE; ++} ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_insn "kmda" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (ss_plus:SI ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" "r") ++ (parallel [(const_int 1)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" "r") ++ (parallel [(const_int 1)])))) ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_dup 1) ++ (parallel [(const_int 0)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 0)]))))))] ++ "NDS32_EXT_DSP_P ()" ++ "kmda\t%0, %1, %2" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_insn "kmxda" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (ss_plus:SI ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" "r") ++ (parallel [(const_int 1)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" "r") ++ (parallel [(const_int 0)])))) ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_dup 1) ++ (parallel [(const_int 0)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 1)]))))))] ++ "NDS32_EXT_DSP_P ()" ++ "kmxda\t%0, %1, %2" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_insn "kmada" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (ss_plus:SI ++ (match_operand:SI 1 "register_operand" " 0") ++ (ss_plus:SI ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 1)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 3 "register_operand" " r") ++ (parallel [(const_int 1)])))) ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 0)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_dup 3) ++ (parallel [(const_int 0)])))))))] ++ "NDS32_EXT_DSP_P ()" ++ "kmada\t%0, %2, %3" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_insn "kmada2" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (ss_plus:SI ++ (match_operand:SI 1 "register_operand" " 0") ++ (ss_plus:SI ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 0)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 3 "register_operand" " r") ++ (parallel [(const_int 0)])))) ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 1)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_dup 3) ++ (parallel [(const_int 1)])))))))] ++ "NDS32_EXT_DSP_P ()" ++ "kmada\t%0, %2, %3" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_insn "kmaxda" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (ss_plus:SI ++ (match_operand:SI 1 "register_operand" " 0") ++ (ss_plus:SI ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 1)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 3 "register_operand" " r") ++ (parallel [(const_int 0)])))) ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 0)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_dup 3) ++ (parallel [(const_int 1)])))))))] ++ "NDS32_EXT_DSP_P ()" ++ "kmaxda\t%0, %2, %3" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_insn "kmads" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (ss_plus:SI ++ (match_operand:SI 1 "register_operand" " 0") ++ (ss_minus:SI ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 1)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 3 "register_operand" " r") ++ (parallel [(const_int 1)])))) ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 0)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_dup 3) ++ (parallel [(const_int 0)])))))))] ++ "NDS32_EXT_DSP_P ()" ++ "kmads\t%0, %2, %3" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_insn "kmadrs" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (ss_plus:SI ++ (match_operand:SI 1 "register_operand" " 0") ++ (ss_minus:SI ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 0)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 3 "register_operand" " r") ++ (parallel [(const_int 0)])))) ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 1)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_dup 3) ++ (parallel [(const_int 1)])))))))] ++ "NDS32_EXT_DSP_P ()" ++ "kmadrs\t%0, %2, %3" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_insn "kmaxds" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (ss_plus:SI ++ (match_operand:SI 1 "register_operand" " 0") ++ (ss_minus:SI ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 1)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 3 "register_operand" " r") ++ (parallel [(const_int 0)])))) ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 0)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_dup 3) ++ (parallel [(const_int 1)])))))))] ++ "NDS32_EXT_DSP_P ()" ++ "kmaxds\t%0, %2, %3" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_insn "kmsda" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (ss_minus:SI ++ (match_operand:SI 1 "register_operand" " 0") ++ (ss_minus:SI ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 1)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 3 "register_operand" " r") ++ (parallel [(const_int 1)])))) ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 0)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_dup 3) ++ (parallel [(const_int 0)])))))))] ++ "NDS32_EXT_DSP_P ()" ++ "kmsda\t%0, %2, %3" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_insn "kmsxda" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (ss_minus:SI ++ (match_operand:SI 1 "register_operand" " 0") ++ (ss_minus:SI ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 1)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_operand:V2HI 3 "register_operand" " r") ++ (parallel [(const_int 0)])))) ++ (mult:SI ++ (sign_extend:SI (vec_select:HI ++ (match_dup 2) ++ (parallel [(const_int 0)]))) ++ (sign_extend:SI (vec_select:HI ++ (match_dup 3) ++ (parallel [(const_int 1)])))))))] ++ "NDS32_EXT_DSP_P ()" ++ "kmsxda\t%0, %2, %3" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++;; smax[8|16] and umax[8|16] ++(define_insn "<opcode><mode>3" ++ [(set (match_operand:VQIHI 0 "register_operand" "=r") ++ (sumax:VQIHI (match_operand:VQIHI 1 "register_operand" " r") ++ (match_operand:VQIHI 2 "register_operand" " r")))] ++ "NDS32_EXT_DSP_P ()" ++ "<opcode><bits>\t%0, %1, %2" ++ [(set_attr "type" "dalu") ++ (set_attr "length" "4")]) ++ ++;; smin[8|16] and umin[8|16] ++(define_insn "<opcode><mode>3" ++ [(set (match_operand:VQIHI 0 "register_operand" "=r") ++ (sumin:VQIHI (match_operand:VQIHI 1 "register_operand" " r") ++ (match_operand:VQIHI 2 "register_operand" " r")))] ++ "NDS32_EXT_DSP_P ()" ++ "<opcode><bits>\t%0, %1, %2" ++ [(set_attr "type" "dalu") ++ (set_attr "length" "4")]) ++ ++(define_insn "<opcode><mode>3_bb" ++ [(set (match_operand:<VELT> 0 "register_operand" "=r") ++ (sumin_max:<VELT> (vec_select:<VELT> ++ (match_operand:VQIHI 1 "register_operand" " r") ++ (parallel [(const_int 0)])) ++ (vec_select:<VELT> ++ (match_operand:VQIHI 2 "register_operand" " r") ++ (parallel [(const_int 0)]))))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "<opcode><bits>\t%0, %1, %2" ++ [(set_attr "type" "dalu") ++ (set_attr "length" "4")]) ++ ++(define_insn_and_split "<opcode><mode>3_tt" ++ [(set (match_operand:<VELT> 0 "register_operand" "=r") ++ (sumin_max:<VELT> (vec_select:<VELT> ++ (match_operand:VQIHI 1 "register_operand" " r") ++ (parallel [(const_int 1)])) ++ (vec_select:<VELT> ++ (match_operand:VQIHI 2 "register_operand" " r") ++ (parallel [(const_int 1)]))))] ++ "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN" ++ "#" ++ "NDS32_EXT_DSP_P () && !reload_completed" ++ [(const_int 0)] ++{ ++ rtx tmp = gen_reg_rtx (<MODE>mode); ++ emit_insn (gen_<opcode><mode>3 (tmp, operands[1], operands[2])); ++ emit_insn (gen_rotr<mode>_1 (tmp, tmp)); ++ emit_move_insn (operands[0], simplify_gen_subreg (<VELT>mode, tmp, <MODE>mode, 0)); ++ DONE; ++} ++ [(set_attr "type" "dalu") ++ (set_attr "length" "4")]) ++ ++(define_insn_and_split "<opcode>v4qi3_22" ++ [(set (match_operand:QI 0 "register_operand" "=r") ++ (sumin_max:QI (vec_select:QI ++ (match_operand:V4QI 1 "register_operand" " r") ++ (parallel [(const_int 2)])) ++ (vec_select:QI ++ (match_operand:V4QI 2 "register_operand" " r") ++ (parallel [(const_int 2)]))))] ++ "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN" ++ "#" ++ "NDS32_EXT_DSP_P () && !reload_completed" ++ [(const_int 0)] ++{ ++ rtx tmp = gen_reg_rtx (V4QImode); ++ emit_insn (gen_<opcode>v4qi3 (tmp, operands[1], operands[2])); ++ emit_insn (gen_rotrv4qi_2 (tmp, tmp)); ++ emit_move_insn (operands[0], simplify_gen_subreg (QImode, tmp, V4QImode, 0)); ++ DONE; ++} ++ [(set_attr "type" "dalu") ++ (set_attr "length" "4")]) ++ ++(define_insn_and_split "<opcode>v4qi3_33" ++ [(set (match_operand:QI 0 "register_operand" "=r") ++ (sumin_max:QI (vec_select:QI ++ (match_operand:V4QI 1 "register_operand" " r") ++ (parallel [(const_int 3)])) ++ (vec_select:QI ++ (match_operand:V4QI 2 "register_operand" " r") ++ (parallel [(const_int 3)]))))] ++ "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN" ++ "#" ++ "NDS32_EXT_DSP_P () && !reload_completed" ++ [(const_int 0)] ++{ ++ rtx tmp = gen_reg_rtx (V4QImode); ++ emit_insn (gen_<opcode>v4qi3 (tmp, operands[1], operands[2])); ++ emit_insn (gen_rotrv4qi_3 (tmp, tmp)); ++ emit_move_insn (operands[0], simplify_gen_subreg (QImode, tmp, V4QImode, 0)); ++ DONE; ++} ++ [(set_attr "type" "dalu") ++ (set_attr "length" "4")]) ++ ++(define_insn_and_split "<opcode>v2hi3_bbtt" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (vec_merge:V2HI ++ (vec_duplicate:V2HI ++ (sumin_max:HI (vec_select:HI ++ (match_operand:V2HI 1 "register_operand" " r") ++ (parallel [(const_int 1)])) ++ (vec_select:HI ++ (match_operand:V2HI 2 "register_operand" " r") ++ (parallel [(const_int 1)])))) ++ (vec_duplicate:V2HI ++ (sumin_max:HI (vec_select:HI ++ (match_dup:V2HI 1) ++ (parallel [(const_int 0)])) ++ (vec_select:HI ++ (match_dup:V2HI 2) ++ (parallel [(const_int 0)])))) ++ (const_int 2)))] ++ "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" ++ "#" ++ "NDS32_EXT_DSP_P ()" ++ [(const_int 0)] ++{ ++ emit_insn (gen_<opcode>v2hi3 (operands[0], operands[1], operands[2])); ++ DONE; ++} ++ [(set_attr "type" "dalu") ++ (set_attr "length" "4")]) ++ ++(define_expand "abs<mode>2" ++ [(set (match_operand:VQIHI 0 "register_operand" "=r") ++ (ss_abs:VQIHI (match_operand:VQIHI 1 "register_operand" " r")))] ++ "NDS32_EXT_DSP_P () && TARGET_HW_ABS && !flag_wrapv" ++{ ++}) ++ ++(define_insn "kabs<mode>2" ++ [(set (match_operand:VQIHI 0 "register_operand" "=r") ++ (ss_abs:VQIHI (match_operand:VQIHI 1 "register_operand" " r")))] ++ "NDS32_EXT_DSP_P ()" ++ "kabs<bits>\t%0, %1" ++ [(set_attr "type" "dalu") ++ (set_attr "length" "4")]) ++ ++(define_insn "<su>mar64_1" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (plus:DI ++ (match_operand:DI 1 "register_operand" " 0") ++ (mult:DI ++ (extend:DI ++ (match_operand:SI 2 "register_operand" " r")) ++ (extend:DI ++ (match_operand:SI 3 "register_operand" " r")))))] ++ "NDS32_EXT_DSP_P ()" ++ "<su>mar64\t%0, %2, %3" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_insn "<su>mar64_2" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (plus:DI ++ (mult:DI ++ (extend:DI ++ (match_operand:SI 2 "register_operand" " r")) ++ (extend:DI ++ (match_operand:SI 3 "register_operand" " r"))) ++ (match_operand:DI 1 "register_operand" " 0")))] ++ "NDS32_EXT_DSP_P ()" ++ "<su>mar64\t%0, %2, %3" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_insn "<su>mar64_3" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (plus:DI ++ (match_operand:DI 1 "register_operand" " 0") ++ (extend:DI ++ (mult:SI ++ (match_operand:SI 2 "register_operand" " r") ++ (match_operand:SI 3 "register_operand" " r")))))] ++ "NDS32_EXT_DSP_P ()" ++ "<su>mar64\t%0, %2, %3" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_insn "<su>mar64_4" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (plus:DI ++ (extend:DI ++ (mult:SI ++ (match_operand:SI 2 "register_operand" " r") ++ (match_operand:SI 3 "register_operand" " r"))) ++ (match_operand:DI 1 "register_operand" " 0")))] ++ "NDS32_EXT_DSP_P ()" ++ "<su>mar64\t%0, %2, %3" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_insn "<su>msr64" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (minus:DI ++ (match_operand:DI 1 "register_operand" " 0") ++ (mult:DI ++ (extend:DI ++ (match_operand:SI 2 "register_operand" " r")) ++ (extend:DI ++ (match_operand:SI 3 "register_operand" " r")))))] ++ "NDS32_EXT_DSP_P ()" ++ "<su>msr64\t%0, %2, %3" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_insn "<su>msr64_2" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (minus:DI ++ (match_operand:DI 1 "register_operand" " 0") ++ (extend:DI ++ (mult:SI ++ (match_operand:SI 2 "register_operand" " r") ++ (match_operand:SI 3 "register_operand" " r")))))] ++ "NDS32_EXT_DSP_P ()" ++ "<su>msr64\t%0, %2, %3" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++;; kmar64, kmsr64, ukmar64 and ukmsr64 ++(define_insn "kmar64_1" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (ss_plus:DI ++ (match_operand:DI 1 "register_operand" " 0") ++ (mult:DI ++ (sign_extend:DI ++ (match_operand:SI 2 "register_operand" " r")) ++ (sign_extend:DI ++ (match_operand:SI 3 "register_operand" " r")))))] ++ "NDS32_EXT_DSP_P ()" ++ "kmar64\t%0, %2, %3" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_insn "kmar64_2" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (ss_plus:DI ++ (mult:DI ++ (sign_extend:DI ++ (match_operand:SI 2 "register_operand" " r")) ++ (sign_extend:DI ++ (match_operand:SI 3 "register_operand" " r"))) ++ (match_operand:DI 1 "register_operand" " 0")))] ++ "NDS32_EXT_DSP_P ()" ++ "kmar64\t%0, %2, %3" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_insn "kmsr64" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (ss_minus:DI ++ (match_operand:DI 1 "register_operand" " 0") ++ (mult:DI ++ (sign_extend:DI ++ (match_operand:SI 2 "register_operand" " r")) ++ (sign_extend:DI ++ (match_operand:SI 3 "register_operand" " r")))))] ++ "NDS32_EXT_DSP_P ()" ++ "kmsr64\t%0, %2, %3" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_insn "ukmar64_1" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (us_plus:DI ++ (match_operand:DI 1 "register_operand" " 0") ++ (mult:DI ++ (zero_extend:DI ++ (match_operand:SI 2 "register_operand" " r")) ++ (zero_extend:DI ++ (match_operand:SI 3 "register_operand" " r")))))] ++ "NDS32_EXT_DSP_P ()" ++ "ukmar64\t%0, %2, %3" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_insn "ukmar64_2" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (us_plus:DI ++ (mult:DI ++ (zero_extend:DI ++ (match_operand:SI 2 "register_operand" " r")) ++ (zero_extend:DI ++ (match_operand:SI 3 "register_operand" " r"))) ++ (match_operand:DI 1 "register_operand" " 0")))] ++ "NDS32_EXT_DSP_P ()" ++ "ukmar64\t%0, %2, %3" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_insn "ukmsr64" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (us_minus:DI ++ (match_operand:DI 1 "register_operand" " 0") ++ (mult:DI ++ (zero_extend:DI ++ (match_operand:SI 2 "register_operand" " r")) ++ (zero_extend:DI ++ (match_operand:SI 3 "register_operand" " r")))))] ++ "NDS32_EXT_DSP_P ()" ++ "ukmsr64\t%0, %2, %3" ++ [(set_attr "type" "dmac") ++ (set_attr "length" "4")]) ++ ++(define_insn "bpick1" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (ior:SI ++ (and:SI ++ (match_operand:SI 1 "register_operand" " r") ++ (match_operand:SI 3 "register_operand" " r")) ++ (and:SI ++ (match_operand:SI 2 "register_operand" " r") ++ (not:SI (match_dup 3)))))] ++ "NDS32_EXT_DSP_P ()" ++ "bpick\t%0, %1, %2, %3" ++ [(set_attr "type" "dbpick") ++ (set_attr "length" "4")]) ++ ++(define_insn "bpick2" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (ior:SI ++ (and:SI ++ (match_operand:SI 1 "register_operand" " r") ++ (match_operand:SI 2 "register_operand" " r")) ++ (and:SI ++ (not:SI (match_dup 2)) ++ (match_operand:SI 3 "register_operand" " r"))))] ++ "NDS32_EXT_DSP_P ()" ++ "bpick\t%0, %1, %3, %2" ++ [(set_attr "type" "dbpick") ++ (set_attr "length" "4")]) ++ ++(define_insn "bpick3" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (ior:SI ++ (and:SI ++ (match_operand:SI 1 "register_operand" " r") ++ (match_operand:SI 2 "register_operand" " r")) ++ (and:SI ++ (match_operand:SI 3 "register_operand" " r") ++ (not:SI (match_dup 1)))))] ++ "NDS32_EXT_DSP_P ()" ++ "bpick\t%0, %2, %3, %1" ++ [(set_attr "type" "dbpick") ++ (set_attr "length" "4")]) ++ ++(define_insn "bpick4" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (ior:SI ++ (and:SI ++ (match_operand:SI 1 "register_operand" " r") ++ (match_operand:SI 2 "register_operand" " r")) ++ (and:SI ++ (not:SI (match_dup 1)) ++ (match_operand:SI 3 "register_operand" " r"))))] ++ "NDS32_EXT_DSP_P ()" ++ "bpick\t%0, %2, %3, %1" ++ [(set_attr "type" "dbpick") ++ (set_attr "length" "4")]) ++ ++(define_insn "bpick5" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (ior:SI ++ (and:SI ++ (match_operand:SI 1 "register_operand" " r") ++ (not:SI (match_operand:SI 2 "register_operand" " r"))) ++ (and:SI ++ (match_operand:SI 3 "register_operand" " r") ++ (match_dup 2))))] ++ "NDS32_EXT_DSP_P ()" ++ "bpick\t%0, %3, %1, %2" ++ [(set_attr "type" "dbpick") ++ (set_attr "length" "4")]) ++ ++(define_insn "bpick6" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (ior:SI ++ (and:SI ++ (not:SI (match_operand:SI 1 "register_operand" " r")) ++ (match_operand:SI 2 "register_operand" " r")) ++ (and:SI ++ (match_operand:SI 3 "register_operand" " r") ++ (match_dup 1))))] ++ "NDS32_EXT_DSP_P ()" ++ "bpick\t%0, %3, %2, %1" ++ [(set_attr "type" "dbpick") ++ (set_attr "length" "4")]) ++ ++(define_insn "bpick7" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (ior:SI ++ (and:SI ++ (match_operand:SI 1 "register_operand" " r") ++ (not:SI (match_operand:SI 2 "register_operand" " r"))) ++ (and:SI ++ (match_dup 2) ++ (match_operand:SI 3 "register_operand" " r"))))] ++ "NDS32_EXT_DSP_P ()" ++ "bpick\t%0, %3, %1, %2" ++ [(set_attr "type" "dbpick") ++ (set_attr "length" "4")]) ++ ++(define_insn "bpick8" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (ior:SI ++ (and:SI ++ (not:SI (match_operand:SI 1 "register_operand" " r")) ++ (match_operand:SI 2 "register_operand" " r")) ++ (and:SI ++ (match_dup 1) ++ (match_operand:SI 3 "register_operand" " r"))))] ++ "NDS32_EXT_DSP_P ()" ++ "bpick\t%0, %3, %2, %1" ++ [(set_attr "type" "dbpick") ++ (set_attr "length" "4")]) ++ ++(define_insn "sraiu" ++ [(set (match_operand:SI 0 "register_operand" "= r, r") ++ (unspec:SI [(ashiftrt:SI (match_operand:SI 1 "register_operand" " r, r") ++ (match_operand:SI 2 "nds32_rimm5u_operand" " Iu05, r"))] ++ UNSPEC_ROUND))] ++ "NDS32_EXT_DSP_P ()" ++ "@ ++ srai.u\t%0, %1, %2 ++ sra.u\t%0, %1, %2" ++ [(set_attr "type" "daluround") ++ (set_attr "length" "4")]) ++ ++(define_insn "kssl" ++ [(set (match_operand:SI 0 "register_operand" "= r, r") ++ (ss_ashift:SI (match_operand:SI 1 "register_operand" " r, r") ++ (match_operand:SI 2 "nds32_rimm5u_operand" " Iu05, r")))] ++ "NDS32_EXT_DSP_P ()" ++ "@ ++ kslli\t%0, %1, %2 ++ ksll\t%0, %1, %2" ++ [(set_attr "type" "dalu") ++ (set_attr "length" "4")]) ++ ++(define_insn "kslraw_round" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (if_then_else:SI ++ (lt:SI (match_operand:SI 2 "register_operand" " r") ++ (const_int 0)) ++ (unspec:SI [(ashiftrt:SI (match_operand:SI 1 "register_operand" " r") ++ (neg:SI (match_dup 2)))] ++ UNSPEC_ROUND) ++ (ss_ashift:SI (match_dup 1) ++ (match_dup 2))))] ++ "NDS32_EXT_DSP_P ()" ++ "kslraw.u\t%0, %1, %2" ++ [(set_attr "type" "daluround") ++ (set_attr "length" "4")]) ++ ++(define_insn_and_split "<shift>di3" ++ [(set (match_operand:DI 0 "register_operand" "") ++ (shift_rotate:DI (match_operand:DI 1 "register_operand" "") ++ (match_operand:SI 2 "nds32_rimm6u_operand" "")))] ++ "NDS32_EXT_DSP_P () && !reload_completed" ++ "#" ++ "NDS32_EXT_DSP_P () && !reload_completed" ++ [(const_int 0)] ++{ ++ if (REGNO (operands[0]) == REGNO (operands[1])) ++ { ++ rtx tmp = gen_reg_rtx (DImode); ++ nds32_split_<code>di3 (tmp, operands[1], operands[2]); ++ emit_move_insn (operands[0], tmp); ++ } ++ else ++ nds32_split_<code>di3 (operands[0], operands[1], operands[2]); ++ DONE; ++}) ++ ++(define_insn "sclip32" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (unspec:SI [(match_operand:SI 1 "register_operand" "r") ++ (match_operand:SI 2 "immediate_operand" "i")] UNSPEC_CLIPS_OV))] ++ "NDS32_EXT_DSP_P ()" ++ "sclip32\t%0, %1, %2" ++ [(set_attr "type" "dclip") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "uclip32" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (unspec:SI [(match_operand:SI 1 "register_operand" "r") ++ (match_operand:SI 2 "immediate_operand" "i")] UNSPEC_CLIP_OV))] ++ "NDS32_EXT_DSP_P ()" ++ "uclip32\t%0, %1, %2" ++ [(set_attr "type" "dclip") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "bitrev" ++ [(set (match_operand:SI 0 "register_operand" "=r, r") ++ (unspec:SI [(match_operand:SI 1 "register_operand" " r, r") ++ (match_operand:SI 2 "nds32_rimm5u_operand" " r, Iu05")] ++ UNSPEC_BITREV))] ++ "" ++ "@ ++ bitrev\t%0, %1, %2 ++ bitrevi\t%0, %1, %2" ++ [(set_attr "type" "dalu") ++ (set_attr "length" "4")] ++) ++ ++;; wext, wexti ++(define_insn "<su>wext" ++ [(set (match_operand:SI 0 "register_operand" "=r, r") ++ (truncate:SI ++ (shiftrt:DI ++ (match_operand:DI 1 "register_operand" " r, r") ++ (match_operand:SI 2 "nds32_rimm5u_operand" " r,Iu05"))))] ++ "NDS32_EXT_DSP_P ()" ++ "@ ++ wext\t%0, %1, %2 ++ wexti\t%0, %1, %2" ++ [(set_attr "type" "dwext") ++ (set_attr "length" "4")]) ++ ++;; 32-bit add/sub instruction: raddw and rsubw. ++(define_insn "r<opcode>si3" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (truncate:SI ++ (ashiftrt:DI ++ (plus_minus:DI ++ (sign_extend:DI (match_operand:SI 1 "register_operand" " r")) ++ (sign_extend:DI (match_operand:SI 2 "register_operand" " r"))) ++ (const_int 1))))] ++ "NDS32_EXT_DSP_P ()" ++ "r<opcode>w\t%0, %1, %2" ++ [(set_attr "type" "dalu") ++ (set_attr "length" "4")]) ++ ++;; 32-bit add/sub instruction: uraddw and ursubw. ++(define_insn "ur<opcode>si3" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (truncate:SI ++ (lshiftrt:DI ++ (plus_minus:DI ++ (zero_extend:DI (match_operand:SI 1 "register_operand" " r")) ++ (zero_extend:DI (match_operand:SI 2 "register_operand" " r"))) ++ (const_int 1))))] ++ "NDS32_EXT_DSP_P ()" ++ "ur<opcode>w\t%0, %1, %2" ++ [(set_attr "type" "dalu") ++ (set_attr "length" "4")]) +diff -urN gcc-8.2.0.orig/gcc/config/nds32/nds32-elf.opt gcc-8.2.0/gcc/config/nds32/nds32-elf.opt +--- gcc-8.2.0.orig/gcc/config/nds32/nds32-elf.opt 1970-01-01 01:00:00.000000000 +0100 ++++ gcc-8.2.0/gcc/config/nds32/nds32-elf.opt 2019-01-25 15:38:32.825242648 +0100 +@@ -0,0 +1,16 @@ ++mcmodel= ++Target RejectNegative Joined Enum(nds32_cmodel_type) Var(nds32_cmodel_option) Init(CMODEL_MEDIUM) ++Specify the address generation strategy for code model. ++ ++Enum ++Name(nds32_cmodel_type) Type(enum nds32_cmodel_type) ++Known cmodel types (for use with the -mcmodel= option): ++ ++EnumValue ++Enum(nds32_cmodel_type) String(small) Value(CMODEL_SMALL) ++ ++EnumValue ++Enum(nds32_cmodel_type) String(medium) Value(CMODEL_MEDIUM) ++ ++EnumValue ++Enum(nds32_cmodel_type) String(large) Value(CMODEL_LARGE) +diff -urN gcc-8.2.0.orig/gcc/config/nds32/nds32-fp-as-gp.c gcc-8.2.0/gcc/config/nds32/nds32-fp-as-gp.c +--- gcc-8.2.0.orig/gcc/config/nds32/nds32-fp-as-gp.c 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/gcc/config/nds32/nds32-fp-as-gp.c 2019-01-25 15:38:32.825242648 +0100 +@@ -26,19 +26,256 @@ + #include "system.h" + #include "coretypes.h" + #include "backend.h" ++#include "hard-reg-set.h" ++#include "tm_p.h" ++#include "rtl.h" ++#include "memmodel.h" ++#include "emit-rtl.h" ++#include "insn-config.h" ++#include "regs.h" ++#include "hard-reg-set.h" ++#include "ira.h" ++#include "ira-int.h" ++#include "df.h" ++#include "tree-core.h" ++#include "tree-pass.h" ++#include "nds32-protos.h" + + /* ------------------------------------------------------------------------ */ + ++/* A helper function to check if this function should contain prologue. */ ++static bool ++nds32_have_prologue_p (void) ++{ ++ int i; ++ ++ for (i = 0; i < 28; i++) ++ if (NDS32_REQUIRED_CALLEE_SAVED_P (i)) ++ return true; ++ ++ return (flag_pic ++ || NDS32_REQUIRED_CALLEE_SAVED_P (FP_REGNUM) ++ || NDS32_REQUIRED_CALLEE_SAVED_P (LP_REGNUM)); ++} ++ ++static int ++nds32_get_symbol_count (void) ++{ ++ int symbol_count = 0; ++ rtx_insn *insn; ++ basic_block bb; ++ ++ FOR_EACH_BB_FN (bb, cfun) ++ { ++ FOR_BB_INSNS (bb, insn) ++ { ++ /* Counting the insn number which the addressing mode is symbol. */ ++ if (single_set (insn) && nds32_symbol_load_store_p (insn)) ++ { ++ rtx pattern = PATTERN (insn); ++ rtx mem; ++ gcc_assert (GET_CODE (pattern) == SET); ++ if (GET_CODE (SET_SRC (pattern)) == REG ) ++ mem = SET_DEST (pattern); ++ else ++ mem = SET_SRC (pattern); ++ ++ /* We have only lwi37 and swi37 for fp-as-gp optimization, ++ so don't count any other than SImode. ++ MEM for QImode and HImode will wrap by ZERO_EXTEND ++ or SIGN_EXTEND */ ++ if (GET_CODE (mem) == MEM) ++ symbol_count++; ++ } ++ } ++ } ++ ++ return symbol_count; ++} ++ + /* Function to determine whether it is worth to do fp_as_gp optimization. +- Return 0: It is NOT worth to do fp_as_gp optimization. +- Return 1: It is APPROXIMATELY worth to do fp_as_gp optimization. ++ Return false: It is NOT worth to do fp_as_gp optimization. ++ Return true: It is APPROXIMATELY worth to do fp_as_gp optimization. + Note that if it is worth to do fp_as_gp optimization, + we MUST set FP_REGNUM ever live in this function. */ +-int ++static bool + nds32_fp_as_gp_check_available (void) + { +- /* By default we return 0. */ +- return 0; ++ basic_block bb; ++ basic_block exit_bb; ++ edge_iterator ei; ++ edge e; ++ bool first_exit_blocks_p; ++ ++ /* If there exists ANY of following conditions, ++ we DO NOT perform fp_as_gp optimization: ++ 1. TARGET_FORBID_FP_AS_GP is set ++ regardless of the TARGET_FORCE_FP_AS_GP. ++ 2. User explicitly uses 'naked'/'no_prologue' attribute. ++ We use nds32_naked_function_p() to help such checking. ++ 3. Not optimize for size. ++ 4. Need frame pointer. ++ 5. If $fp is already required to be saved, ++ it means $fp is already choosen by register allocator. ++ Thus we better not to use it for fp_as_gp optimization. ++ 6. This function is a vararg function. ++ DO NOT apply fp_as_gp optimization on this function ++ because it may change and break stack frame. ++ 7. The epilogue is empty. ++ This happens when the function uses exit() ++ or its attribute is no_return. ++ In that case, compiler will not expand epilogue ++ so that we have no chance to output .omit_fp_end directive. */ ++ if (TARGET_FORBID_FP_AS_GP ++ || nds32_naked_function_p (current_function_decl) ++ || !optimize_size ++ || frame_pointer_needed ++ || NDS32_REQUIRED_CALLEE_SAVED_P (FP_REGNUM) ++ || (cfun->stdarg == 1) ++ || (find_fallthru_edge (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) == NULL)) ++ return false; ++ ++ /* Disable fp_as_gp if there is any infinite loop since the fp may ++ reuse in infinite loops by register rename. ++ For check infinite loops we should make sure exit_bb is post dominate ++ all other basic blocks if there is no infinite loops. */ ++ first_exit_blocks_p = true; ++ exit_bb = NULL; ++ ++ FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) ++ { ++ /* More than one exit block also do not perform fp_as_gp optimization. */ ++ if (!first_exit_blocks_p) ++ return false; ++ ++ exit_bb = e->src; ++ first_exit_blocks_p = false; ++ } ++ ++ /* Not found exit_bb? just abort fp_as_gp! */ ++ if (!exit_bb) ++ return false; ++ ++ /* Each bb should post dominate by exit_bb if there is no infinite loop! */ ++ FOR_EACH_BB_FN (bb, cfun) ++ { ++ if (!dominated_by_p (CDI_POST_DOMINATORS, ++ bb, ++ exit_bb)) ++ return false; ++ } ++ ++ /* Now we can check the possibility of using fp_as_gp optimization. */ ++ if (TARGET_FORCE_FP_AS_GP) ++ { ++ /* User explicitly issues -mforce-fp-as-gp option. */ ++ return true; ++ } ++ else ++ { ++ /* In the following we are going to evaluate whether ++ it is worth to do fp_as_gp optimization. */ ++ bool good_gain = false; ++ int symbol_count; ++ ++ int threshold; ++ ++ /* We check if there already requires prologue. ++ Note that $gp will be saved in prologue for PIC code generation. ++ After that, we can set threshold by the existence of prologue. ++ Each fp-implied instruction will gain 2-byte code size ++ from gp-aware instruction, so we have following heuristics. */ ++ if (flag_pic ++ || nds32_have_prologue_p ()) ++ { ++ /* Have-prologue: ++ Compiler already intends to generate prologue content, ++ so the fp_as_gp optimization will only insert ++ 'la $fp,_FP_BASE_' instruction, which will be ++ converted into 4-byte instruction at link time. ++ The threshold is "3" symbol accesses, 2 + 2 + 2 > 4. */ ++ threshold = 3; ++ } ++ else ++ { ++ /* None-prologue: ++ Compiler originally does not generate prologue content, ++ so the fp_as_gp optimization will NOT ONLY insert ++ 'la $fp,_FP_BASE' instruction, but also causes ++ push/pop instructions. ++ If we are using v3push (push25/pop25), ++ the threshold is "5" symbol accesses, 5*2 > 4 + 2 + 2; ++ If we are using normal push (smw/lmw), ++ the threshold is "5+2" symbol accesses 7*2 > 4 + 4 + 4. */ ++ threshold = 5 + (TARGET_V3PUSH ? 0 : 2); ++ } ++ ++ symbol_count = nds32_get_symbol_count (); ++ ++ if (symbol_count >= threshold) ++ good_gain = true; ++ ++ /* Enable fp_as_gp optimization when potential gain is good enough. */ ++ return good_gain; ++ } ++} ++ ++static unsigned int ++nds32_fp_as_gp (void) ++{ ++ bool fp_as_gp_p; ++ calculate_dominance_info (CDI_POST_DOMINATORS); ++ fp_as_gp_p = nds32_fp_as_gp_check_available (); ++ ++ /* Here is a hack to IRA for enable/disable a hard register per function. ++ We *MUST* review this way after migrate gcc 4.9! */ ++ if (fp_as_gp_p) { ++ SET_HARD_REG_BIT(this_target_ira_int->x_no_unit_alloc_regs, FP_REGNUM); ++ df_set_regs_ever_live (FP_REGNUM, 1); ++ } else { ++ CLEAR_HARD_REG_BIT(this_target_ira_int->x_no_unit_alloc_regs, FP_REGNUM); ++ } ++ ++ cfun->machine->fp_as_gp_p = fp_as_gp_p; ++ ++ free_dominance_info (CDI_POST_DOMINATORS); ++ return 1; ++} ++ ++const pass_data pass_data_nds32_fp_as_gp = ++{ ++ RTL_PASS, /* type */ ++ "fp_as_gp", /* name */ ++ OPTGROUP_NONE, /* optinfo_flags */ ++ TV_MACH_DEP, /* tv_id */ ++ 0, /* properties_required */ ++ 0, /* properties_provided */ ++ 0, /* properties_destroyed */ ++ 0, /* todo_flags_start */ ++ 0 /* todo_flags_finish */ ++}; ++ ++class pass_nds32_fp_as_gp : public rtl_opt_pass ++{ ++public: ++ pass_nds32_fp_as_gp (gcc::context *ctxt) ++ : rtl_opt_pass (pass_data_nds32_fp_as_gp, ctxt) ++ {} ++ ++ /* opt_pass methods: */ ++ bool gate (function *) ++ { ++ return !TARGET_LINUX_ABI ++ && TARGET_16_BIT ++ && optimize_size; ++ } ++ unsigned int execute (function *) { return nds32_fp_as_gp (); } ++}; ++ ++rtl_opt_pass * ++make_pass_nds32_fp_as_gp (gcc::context *ctxt) ++{ ++ return new pass_nds32_fp_as_gp (ctxt); + } + + /* ------------------------------------------------------------------------ */ +diff -urN gcc-8.2.0.orig/gcc/config/nds32/nds32-fpu.md gcc-8.2.0/gcc/config/nds32/nds32-fpu.md +--- gcc-8.2.0.orig/gcc/config/nds32/nds32-fpu.md 2018-04-06 07:51:33.000000000 +0200 ++++ gcc-8.2.0/gcc/config/nds32/nds32-fpu.md 2019-01-25 15:38:32.825242648 +0100 +@@ -1,5 +1,5 @@ + ;; Machine description of Andes NDS32 cpu for GNU compiler +-;; Copyright (C) 2012-2015 Free Software Foundation, Inc. ++;; Copyright (C) 2012-2018 Free Software Foundation, Inc. + ;; Contributed by Andes Technology Corporation. + ;; + ;; This file is part of GCC. +diff -urN gcc-8.2.0.orig/gcc/config/nds32/nds32-graywolf.md gcc-8.2.0/gcc/config/nds32/nds32-graywolf.md +--- gcc-8.2.0.orig/gcc/config/nds32/nds32-graywolf.md 1970-01-01 01:00:00.000000000 +0100 ++++ gcc-8.2.0/gcc/config/nds32/nds32-graywolf.md 2019-01-25 15:38:32.825242648 +0100 +@@ -0,0 +1,471 @@ ++;; Pipeline descriptions of Andes NDS32 cpu for GNU compiler ++;; Copyright (C) 2012-2013 Free Software Foundation, Inc. ++;; Contributed by Andes Technology Corporation. ++;; ++;; This file is part of GCC. ++;; ++;; GCC is free software; you can redistribute it and/or modify it ++;; under the terms of the GNU General Public License as published ++;; by the Free Software Foundation; either version 3, or (at your ++;; option) any later version. ++;; ++;; GCC is distributed in the hope that it will be useful, but WITHOUT ++;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ++;; License for more details. ++;; ++;; You should have received a copy of the GNU General Public License ++;; along with GCC; see the file COPYING3. If not see ++;; <http://www.gnu.org/licenses/>. ++ ++;; ------------------------------------------------------------------------ ++;; Define Graywolf pipeline settings. ++;; ------------------------------------------------------------------------ ++ ++(define_automaton "nds32_graywolf_machine") ++ ++(define_cpu_unit "gw_ii_0" "nds32_graywolf_machine") ++(define_cpu_unit "gw_ii_1" "nds32_graywolf_machine") ++(define_cpu_unit "gw_ex_p0" "nds32_graywolf_machine") ++(define_cpu_unit "gw_mm_p0" "nds32_graywolf_machine") ++(define_cpu_unit "gw_wb_p0" "nds32_graywolf_machine") ++(define_cpu_unit "gw_ex_p1" "nds32_graywolf_machine") ++(define_cpu_unit "gw_mm_p1" "nds32_graywolf_machine") ++(define_cpu_unit "gw_wb_p1" "nds32_graywolf_machine") ++(define_cpu_unit "gw_iq_p2" "nds32_graywolf_machine") ++(define_cpu_unit "gw_rf_p2" "nds32_graywolf_machine") ++(define_cpu_unit "gw_e1_p2" "nds32_graywolf_machine") ++(define_cpu_unit "gw_e2_p2" "nds32_graywolf_machine") ++(define_cpu_unit "gw_e3_p2" "nds32_graywolf_machine") ++(define_cpu_unit "gw_e4_p2" "nds32_graywolf_machine") ++ ++(define_reservation "gw_ii" "gw_ii_0 | gw_ii_1") ++(define_reservation "gw_ex" "gw_ex_p0 | gw_ex_p1") ++(define_reservation "gw_mm" "gw_mm_p0 | gw_mm_p1") ++(define_reservation "gw_wb" "gw_wb_p0 | gw_wb_p1") ++ ++(define_reservation "gw_ii_all" "gw_ii_0 + gw_ii_1") ++ ++(define_insn_reservation "nds_gw_unknown" 1 ++ (and (eq_attr "type" "unknown") ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii, gw_ex, gw_mm, gw_wb") ++ ++(define_insn_reservation "nds_gw_misc" 1 ++ (and (eq_attr "type" "misc") ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii, gw_ex, gw_mm, gw_wb") ++ ++(define_insn_reservation "nds_gw_mmu" 1 ++ (and (eq_attr "type" "mmu") ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii, gw_ex, gw_mm, gw_wb") ++ ++(define_insn_reservation "nds_gw_alu" 1 ++ (and (and (eq_attr "type" "alu") ++ (match_test "!nds32::movd44_insn_p (insn)")) ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii, gw_ex, gw_mm, gw_wb") ++ ++(define_insn_reservation "nds_gw_movd44" 1 ++ (and (and (eq_attr "type" "alu") ++ (match_test "nds32::movd44_insn_p (insn)")) ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii_1, gw_ex, gw_mm, gw_wb") ++ ++(define_insn_reservation "nds_gw_alu_shift" 1 ++ (and (eq_attr "type" "alu_shift") ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii, gw_ex*2, gw_mm, gw_wb") ++ ++(define_insn_reservation "nds_gw_pbsad" 1 ++ (and (eq_attr "type" "pbsad") ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii, gw_ex*3, gw_mm, gw_wb") ++ ++(define_insn_reservation "nds_gw_pbsada" 1 ++ (and (eq_attr "type" "pbsada") ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii, gw_ex*3, gw_mm, gw_wb") ++ ++(define_insn_reservation "nds_gw_load" 1 ++ (and (and (eq_attr "type" "load") ++ (match_test "!nds32::post_update_insn_p (insn)")) ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii_1, gw_ex_p1, gw_mm_p1, gw_wb_p1") ++ ++(define_insn_reservation "nds_gw_load_2w" 1 ++ (and (and (eq_attr "type" "load") ++ (match_test "nds32::post_update_insn_p (insn)")) ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii_all, gw_ex_p1, gw_mm_p1, gw_wb_p1") ++ ++(define_insn_reservation "nds_gw_store" 1 ++ (and (and (eq_attr "type" "store") ++ (match_test "!nds32::store_offset_reg_p (insn)")) ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii_1, gw_ex_p1, gw_mm_p1, gw_wb_p1") ++ ++(define_insn_reservation "nds_gw_store_3r" 1 ++ (and (and (eq_attr "type" "store") ++ (match_test "nds32::store_offset_reg_p (insn)")) ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii_all, gw_ex_p1, gw_mm_p1, gw_wb_p1") ++ ++(define_insn_reservation "nds_gw_load_multiple_1" 1 ++ (and (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "1")) ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii_1, gw_ex_p1, gw_mm_p1, gw_wb_p1") ++ ++(define_insn_reservation "nds_gw_load_multiple_2" 1 ++ (and (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "2")) ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii_1, gw_ex_p1*2, gw_mm_p1, gw_wb_p1") ++ ++(define_insn_reservation "nds_gw_load_multiple_3" 1 ++ (and (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "3")) ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii_1, gw_ex_p1*3, gw_mm_p1, gw_wb_p1") ++ ++(define_insn_reservation "nds_gw_load_multiple_4" 1 ++ (and (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "4")) ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii_1, gw_ex_p1*4, gw_mm_p1, gw_wb_p1") ++ ++(define_insn_reservation "nds_gw_load_multiple_5" 1 ++ (and (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "5")) ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii_1, gw_ex_p1*4, gw_mm_p1, gw_wb_p1") ++ ++(define_insn_reservation "nds_gw_load_multiple_6" 1 ++ (and (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "6")) ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii_1, gw_ex_p1*4, gw_mm_p1, gw_wb_p1") ++ ++(define_insn_reservation "nds_gw_load_multiple_7" 1 ++ (and (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "7")) ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii_1, gw_ex_p1*4, gw_mm_p1, gw_wb_p1") ++ ++(define_insn_reservation "nds_gw_load_multiple_8" 1 ++ (and (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "8")) ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii_1, gw_ex_p1*4, gw_mm_p1, gw_wb_p1") ++ ++(define_insn_reservation "nds_gw_load_multiple_12" 1 ++ (and (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "12")) ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii_1, gw_ex_p1*4, gw_mm_p1, gw_wb_p1") ++ ++(define_insn_reservation "nds_gw_store_multiple_1" 1 ++ (and (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "1")) ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii_1, gw_ex_p1, gw_mm_p1, gw_wb_p1") ++ ++(define_insn_reservation "nds_gw_store_multiple_2" 1 ++ (and (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "2")) ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii_1, gw_ex_p1*2, gw_mm_p1, gw_wb_p1") ++ ++(define_insn_reservation "nds_gw_store_multiple_3" 1 ++ (and (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "3")) ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii_1, gw_ex_p1*3, gw_mm_p1, gw_wb_p1") ++ ++(define_insn_reservation "nds_gw_store_multiple_4" 1 ++ (and (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "4")) ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii_1, gw_ex_p1*4, gw_mm_p1, gw_wb_p1") ++ ++(define_insn_reservation "nds_gw_store_multiple_5" 1 ++ (and (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "5")) ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii_1, gw_ex_p1*4, gw_mm_p1, gw_wb_p1") ++ ++(define_insn_reservation "nds_gw_store_multiple_6" 1 ++ (and (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "6")) ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii_1, gw_ex_p1*4, gw_mm_p1, gw_wb_p1") ++ ++(define_insn_reservation "nds_gw_store_multiple_7" 1 ++ (and (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "7")) ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii_1, gw_ex_p1*4, gw_mm_p1, gw_wb_p1") ++ ++(define_insn_reservation "nds_gw_store_multiple_8" 1 ++ (and (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "8")) ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii_1, gw_ex_p1*4, gw_mm_p1, gw_wb_p1") ++ ++(define_insn_reservation "nds_gw_store_multiple_12" 1 ++ (and (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "12")) ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii_1, gw_ex_p1*4, gw_mm_p1, gw_wb_p1") ++ ++(define_insn_reservation "nds_gw_mul_fast1" 1 ++ (and (match_test "nds32_mul_config == MUL_TYPE_FAST_1") ++ (and (eq_attr "type" "mul") ++ (eq_attr "pipeline_model" "graywolf"))) ++ "gw_ii_0, gw_ex_p0, gw_mm_p0, gw_wb_p0") ++ ++(define_insn_reservation "nds_gw_mul_fast2" 1 ++ (and (match_test "nds32_mul_config == MUL_TYPE_FAST_2") ++ (and (eq_attr "type" "mul") ++ (eq_attr "pipeline_model" "graywolf"))) ++ "gw_ii_0, gw_ex_p0*2, gw_mm_p0, gw_wb_p0") ++ ++(define_insn_reservation "nds_gw_mul_slow" 1 ++ (and (match_test "nds32_mul_config == MUL_TYPE_SLOW") ++ (and (eq_attr "type" "mul") ++ (eq_attr "pipeline_model" "graywolf"))) ++ "gw_ii_0, gw_ex_p0*4, gw_mm_p0, gw_wb_p0") ++ ++(define_insn_reservation "nds_gw_mac_fast1" 1 ++ (and (match_test "nds32_mul_config == MUL_TYPE_FAST_1") ++ (and (eq_attr "type" "mac") ++ (eq_attr "pipeline_model" "graywolf"))) ++ "gw_ii_all, gw_ex_p0, gw_mm_p0, gw_wb_p0") ++ ++(define_insn_reservation "nds_gw_mac_fast2" 1 ++ (and (match_test "nds32_mul_config == MUL_TYPE_FAST_2") ++ (and (eq_attr "type" "mac") ++ (eq_attr "pipeline_model" "graywolf"))) ++ "gw_ii_all, gw_ex_p0*2, gw_mm_p0, gw_wb_p0") ++ ++(define_insn_reservation "nds_gw_mac_slow" 1 ++ (and (match_test "nds32_mul_config == MUL_TYPE_SLOW") ++ (and (eq_attr "type" "mac") ++ (eq_attr "pipeline_model" "graywolf"))) ++ "gw_ii_all, gw_ex_p0*4, gw_mm_p0, gw_wb_p0") ++ ++(define_insn_reservation "nds_gw_div" 1 ++ (and (and (eq_attr "type" "div") ++ (match_test "!nds32::divmod_p (insn)")) ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii_0, gw_ex_p0*4, gw_mm_p0, gw_wb_p0") ++ ++(define_insn_reservation "nds_gw_div_2w" 1 ++ (and (and (eq_attr "type" "div") ++ (match_test "nds32::divmod_p (insn)")) ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii_all, gw_ex_p0*4, gw_mm_p0, gw_wb_p0") ++ ++(define_insn_reservation "nds_gw_branch" 1 ++ (and (eq_attr "type" "branch") ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii_0, gw_ex_p0, gw_mm_p0, gw_wb_p0") ++ ++(define_insn_reservation "nds_gw_dsp_alu" 1 ++ (and (eq_attr "type" "dalu") ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii, gw_ex, gw_mm, gw_wb") ++ ++(define_insn_reservation "nds_gw_dsp_alu64" 1 ++ (and (eq_attr "type" "dalu64") ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii_all, gw_ex_p0, gw_mm_p0, gw_wb_p0") ++ ++(define_insn_reservation "nds_gw_dsp_alu_round" 1 ++ (and (eq_attr "type" "daluround") ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii_0, gw_ex_p0, gw_mm_p0, gw_wb_p0") ++ ++(define_insn_reservation "nds_gw_dsp_cmp" 1 ++ (and (eq_attr "type" "dcmp") ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii_0, gw_ex_p0, gw_mm_p0, gw_wb_p0") ++ ++(define_insn_reservation "nds_gw_dsp_clip" 1 ++ (and (eq_attr "type" "dclip") ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii_0, gw_ex_p0, gw_mm_p0, gw_wb_p0") ++ ++(define_insn_reservation "nds_gw_dsp_mul" 1 ++ (and (eq_attr "type" "dmul") ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii_0, gw_ex_p0, gw_mm_p0, gw_wb_p0") ++ ++(define_insn_reservation "nds_gw_dsp_mac" 1 ++ (and (eq_attr "type" "dmac") ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii_all, gw_ex_p0, gw_mm_p0, gw_wb_p0") ++ ++(define_insn_reservation "nds_gw_dsp_insb" 1 ++ (and (eq_attr "type" "dinsb") ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii_0, gw_ex_p0, gw_mm_p0, gw_wb_p0") ++ ++(define_insn_reservation "nds_gw_dsp_pack" 1 ++ (and (eq_attr "type" "dpack") ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii_0, gw_ex_p0, gw_mm_p0, gw_wb_p0") ++ ++(define_insn_reservation "nds_gw_dsp_bpick" 1 ++ (and (eq_attr "type" "dbpick") ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii_0, gw_ex_p0, gw_mm_p0, gw_wb_p0") ++ ++(define_insn_reservation "nds_gw_dsp_wext" 1 ++ (and (eq_attr "type" "dwext") ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii_all, gw_ex_p0, gw_mm_p0, gw_wb_p0") ++ ++(define_insn_reservation "nds_gw_fpu_alu" 4 ++ (and (eq_attr "type" "falu") ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii, gw_iq_p2, gw_rf_p2, gw_e1_p2, gw_e2_p2, gw_e3_p2, gw_e4_p2") ++ ++(define_insn_reservation "nds_gw_fpu_muls" 4 ++ (and (eq_attr "type" "fmuls") ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii, gw_iq_p2, gw_rf_p2, gw_e1_p2, gw_e2_p2, gw_e3_p2, gw_e4_p2") ++ ++(define_insn_reservation "nds_gw_fpu_muld" 4 ++ (and (eq_attr "type" "fmuld") ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii, gw_iq_p2, gw_rf_p2, gw_e1_p2, gw_e2_p2*2, gw_e3_p2, gw_e4_p2") ++ ++(define_insn_reservation "nds_gw_fpu_macs" 4 ++ (and (eq_attr "type" "fmacs") ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii, gw_iq_p2, gw_rf_p2, gw_e1_p2, gw_e2_p2*3, gw_e3_p2, gw_e4_p2") ++ ++(define_insn_reservation "nds_gw_fpu_macd" 4 ++ (and (eq_attr "type" "fmacd") ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii, gw_iq_p2, gw_rf_p2, gw_e1_p2, gw_e2_p2*4, gw_e3_p2, gw_e4_p2") ++ ++(define_insn_reservation "nds_gw_fpu_divs" 4 ++ (and (ior (eq_attr "type" "fdivs") ++ (eq_attr "type" "fsqrts")) ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii, gw_iq_p2, gw_rf_p2, gw_e1_p2, gw_e2_p2*14, gw_e3_p2, gw_e4_p2") ++ ++(define_insn_reservation "nds_gw_fpu_divd" 4 ++ (and (ior (eq_attr "type" "fdivd") ++ (eq_attr "type" "fsqrtd")) ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii, gw_iq_p2, gw_rf_p2, gw_e1_p2, gw_e2_p2*28, gw_e3_p2, gw_e4_p2") ++ ++(define_insn_reservation "nds_gw_fpu_fast_alu" 2 ++ (and (ior (eq_attr "type" "fcmp") ++ (ior (eq_attr "type" "fabs") ++ (ior (eq_attr "type" "fcpy") ++ (eq_attr "type" "fcmov")))) ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii, gw_iq_p2, gw_rf_p2, gw_e1_p2, gw_e2_p2, gw_e3_p2, gw_e4_p2") ++ ++(define_insn_reservation "nds_gw_fpu_fmtsr" 1 ++ (and (eq_attr "type" "fmtsr") ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii, gw_iq_p2, gw_rf_p2, gw_e1_p2, gw_e2_p2, gw_e3_p2, gw_e4_p2") ++ ++(define_insn_reservation "nds_gw_fpu_fmtdr" 1 ++ (and (eq_attr "type" "fmtdr") ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii, gw_ii+gw_iq_p2, gw_iq_p2+gw_rf_p2, gw_rf_p2+gw_e1_p2, gw_e1_p2+gw_e2_p2, gw_e2_p2+gw_e3_p2, gw_e3_p2+gw_e4_p2, gw_e4_p2") ++ ++(define_insn_reservation "nds_gw_fpu_fmfsr" 1 ++ (and (eq_attr "type" "fmfsr") ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii, gw_iq_p2, gw_rf_p2, gw_e1_p2, gw_e2_p2, gw_e3_p2, gw_e4_p2") ++ ++(define_insn_reservation "nds_gw_fpu_fmfdr" 1 ++ (and (eq_attr "type" "fmfdr") ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii, gw_ii+gw_iq_p2, gw_iq_p2+gw_rf_p2, gw_rf_p2+gw_e1_p2, gw_e1_p2+gw_e2_p2, gw_e2_p2+gw_e3_p2, gw_e3_p2+gw_e4_p2, gw_e4_p2") ++ ++(define_insn_reservation "nds_gw_fpu_load" 3 ++ (and (eq_attr "type" "fload") ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii, gw_iq_p2, gw_rf_p2, gw_e1_p2, gw_e2_p2, gw_e3_p2, gw_e4_p2") ++ ++(define_insn_reservation "nds_gw_fpu_store" 1 ++ (and (eq_attr "type" "fstore") ++ (eq_attr "pipeline_model" "graywolf")) ++ "gw_ii, gw_iq_p2, gw_rf_p2, gw_e1_p2, gw_e2_p2, gw_e3_p2, gw_e4_p2") ++ ++;; FPU_ADDR_OUT -> FPU_ADDR_IN ++;; Main pipeline rules don't need this because those default latency is 1. ++(define_bypass 1 ++ "nds_gw_fpu_load, nds_gw_fpu_store" ++ "nds_gw_fpu_load, nds_gw_fpu_store" ++ "nds32_gw_ex_to_ex_p" ++) ++ ++;; LD, MUL, MAC, DIV, DALU64, DMUL, DMAC, DALUROUND, DBPICK, DWEXT ++;; -> ALU, ALU_SHIFT_Rb, PBSAD, PBSADA_RaRb, MOVD44, MUL, MAC_RaRb, DIV, ADDR_IN, BR, MMU, ++;; DALU, DALUROUND, DMUL, DMAC_RaRb, DPACK, DINSB, DCMP, DCLIP, WEXT_O, BPICK_RaRb ++(define_bypass 2 ++ "nds_gw_load, nds_gw_load_2w,\ ++ nds_gw_mul_fast1, nds_gw_mul_fast2, nds_gw_mul_slow,\ ++ nds_gw_mac_fast1, nds_gw_mac_fast2, nds_gw_mac_slow,\ ++ nds_gw_div, nds_gw_div_2w,\ ++ nds_gw_dsp_alu64, nds_gw_dsp_mul, nds_gw_dsp_mac,\ ++ nds_gw_dsp_alu_round, nds_gw_dsp_bpick, nds_gw_dsp_wext" ++ "nds_gw_alu, nds_gw_movd44, nds_gw_alu_shift,\ ++ nds_gw_pbsad, nds_gw_pbsada,\ ++ nds_gw_mul_fast1, nds_gw_mul_fast2, nds_gw_mul_slow,\ ++ nds_gw_mac_fast1, nds_gw_mac_fast2, nds_gw_mac_slow,\ ++ nds_gw_branch,\ ++ nds_gw_div, nds_gw_div_2w,\ ++ nds_gw_load, nds_gw_load_2w, nds_gw_store, nds_gw_store_3r,\ ++ nds_gw_load_multiple_1,nds_gw_load_multiple_2, nds_gw_load_multiple_3,\ ++ nds_gw_load_multiple_4,nds_gw_load_multiple_5, nds_gw_load_multiple_6,\ ++ nds_gw_load_multiple_7,nds_gw_load_multiple_8, nds_gw_load_multiple_12,\ ++ nds_gw_store_multiple_1,nds_gw_store_multiple_2, nds_gw_store_multiple_3,\ ++ nds_gw_store_multiple_4,nds_gw_store_multiple_5, nds_gw_store_multiple_6,\ ++ nds_gw_store_multiple_7,nds_gw_store_multiple_8, nds_gw_store_multiple_12,\ ++ nds_gw_mmu,\ ++ nds_gw_dsp_alu, nds_gw_dsp_alu_round,\ ++ nds_gw_dsp_mul, nds_gw_dsp_mac, nds_gw_dsp_pack,\ ++ nds_gw_dsp_insb, nds_gw_dsp_cmp, nds_gw_dsp_clip,\ ++ nds_gw_dsp_wext, nds_gw_dsp_bpick" ++ "nds32_gw_mm_to_ex_p" ++) ++ ++;; LMW(N, N) ++;; -> ALU, ALU_SHIFT_Rb, PBSAD, PBSADA_RaRb, MOVD44, MUL, MAC_RaRb, DIV, ADDR_IN, BR, MMU ++;; DALU, DALUROUND, DMUL, DMAC_RaRb, DPACK, DINSB, DCMP, DCLIP, WEXT_O, BPICK_RaRb ++(define_bypass 2 ++ "nds_gw_load_multiple_1,nds_gw_load_multiple_2, nds_gw_load_multiple_3,\ ++ nds_gw_load_multiple_4,nds_gw_load_multiple_5, nds_gw_load_multiple_6,\ ++ nds_gw_load_multiple_7,nds_gw_load_multiple_8, nds_gw_load_multiple_12" ++ "nds_gw_alu, nds_gw_movd44, nds_gw_alu_shift,\ ++ nds_gw_pbsad, nds_gw_pbsada,\ ++ nds_gw_mul_fast1, nds_gw_mul_fast2, nds_gw_mul_slow,\ ++ nds_gw_mac_fast1, nds_gw_mac_fast2, nds_gw_mac_slow,\ ++ nds_gw_branch,\ ++ nds_gw_div, nds_gw_div_2w,\ ++ nds_gw_load, nds_gw_load_2w, nds_gw_store, nds_gw_store_3r,\ ++ nds_gw_load_multiple_1,nds_gw_load_multiple_2, nds_gw_load_multiple_3,\ ++ nds_gw_load_multiple_4,nds_gw_load_multiple_5, nds_gw_load_multiple_6,\ ++ nds_gw_load_multiple_7,nds_gw_load_multiple_8, nds_gw_load_multiple_12,\ ++ nds_gw_store_multiple_1,nds_gw_store_multiple_2, nds_gw_store_multiple_3,\ ++ nds_gw_store_multiple_4,nds_gw_store_multiple_5, nds_gw_store_multiple_6,\ ++ nds_gw_store_multiple_7,nds_gw_store_multiple_8, nds_gw_store_multiple_12,\ ++ nds_gw_mmu,\ ++ nds_gw_dsp_alu, nds_gw_dsp_alu_round,\ ++ nds_gw_dsp_mul, nds_gw_dsp_mac, nds_gw_dsp_pack,\ ++ nds_gw_dsp_insb, nds_gw_dsp_cmp, nds_gw_dsp_clip,\ ++ nds_gw_dsp_wext, nds_gw_dsp_bpick" ++ "nds32_gw_last_load_to_ex_p" ++) +diff -urN gcc-8.2.0.orig/gcc/config/nds32/nds32.h gcc-8.2.0/gcc/config/nds32/nds32.h +--- gcc-8.2.0.orig/gcc/config/nds32/nds32.h 2018-05-07 03:27:52.000000000 +0200 ++++ gcc-8.2.0/gcc/config/nds32/nds32.h 2019-01-25 15:44:03.534160189 +0100 +@@ -36,6 +36,16 @@ + #define NDS32_SYMBOL_REF_RODATA_P(x) \ + ((SYMBOL_REF_FLAGS (x) & NDS32_SYMBOL_FLAG_RODATA) != 0) + ++enum nds32_relax_insn_type ++{ ++ RELAX_ORI, ++ RELAX_PLT_ADD, ++ RELAX_TLS_ADD_or_LW, ++ RELAX_TLS_ADD_LW, ++ RELAX_TLS_LW_JRAL, ++ RELAX_DONE ++}; ++ + /* Classifies expand result for expand helper function. */ + enum nds32_expand_result_type + { +@@ -140,6 +150,9 @@ + Check gcc/common/config/nds32/nds32-common.c for the optimizations that + apply -malways-align. */ + #define NDS32_ALIGN_P() (TARGET_ALWAYS_ALIGN) ++ ++#define NDS32_EXT_DSP_P() (TARGET_EXT_DSP && !TARGET_FORCE_NO_EXT_DSP) ++ + /* Get alignment according to mode or type information. + When 'type' is nonnull, there is no need to look at 'mode'. */ + #define NDS32_MODE_TYPE_ALIGN(mode, type) \ +@@ -305,6 +318,10 @@ + 2. The rtl lowering and optimization are close to target code. + For this case we need address to be strictly aligned. */ + int strict_aligned_p; ++ ++ /* Record two similar attributes status. */ ++ int attr_naked_p; ++ int attr_no_prologue_p; + }; + + /* A C structure that contains the arguments information. */ +@@ -350,7 +367,8 @@ + { + NDS32_NESTED, + NDS32_NOT_NESTED, +- NDS32_NESTED_READY ++ NDS32_NESTED_READY, ++ NDS32_CRITICAL + }; + + /* Define structure to record isr information. +@@ -378,6 +396,13 @@ + unless user specifies attribute to change it. */ + enum nds32_isr_nested_type nested_type; + ++ /* Secure isr level. ++ Currently we have 0-3 security level. ++ It should be set to 0 by default. ++ For security processors, this is determined by secure ++ attribute or compiler options. */ ++ unsigned int security_level; ++ + /* Total vectors. + The total vectors = interrupt + exception numbers + reset. + It should be set to 0 by default. +@@ -439,7 +464,30 @@ + NDS32_BUILTIN_FFB, + NDS32_BUILTIN_FFMISM, + NDS32_BUILTIN_FLMISM, +- ++ NDS32_BUILTIN_KADDW, ++ NDS32_BUILTIN_KSUBW, ++ NDS32_BUILTIN_KADDH, ++ NDS32_BUILTIN_KSUBH, ++ NDS32_BUILTIN_KDMBB, ++ NDS32_BUILTIN_V_KDMBB, ++ NDS32_BUILTIN_KDMBT, ++ NDS32_BUILTIN_V_KDMBT, ++ NDS32_BUILTIN_KDMTB, ++ NDS32_BUILTIN_V_KDMTB, ++ NDS32_BUILTIN_KDMTT, ++ NDS32_BUILTIN_V_KDMTT, ++ NDS32_BUILTIN_KHMBB, ++ NDS32_BUILTIN_V_KHMBB, ++ NDS32_BUILTIN_KHMBT, ++ NDS32_BUILTIN_V_KHMBT, ++ NDS32_BUILTIN_KHMTB, ++ NDS32_BUILTIN_V_KHMTB, ++ NDS32_BUILTIN_KHMTT, ++ NDS32_BUILTIN_V_KHMTT, ++ NDS32_BUILTIN_KSLRAW, ++ NDS32_BUILTIN_KSLRAW_U, ++ NDS32_BUILTIN_RDOV, ++ NDS32_BUILTIN_CLROV, + NDS32_BUILTIN_ROTR, + NDS32_BUILTIN_SVA, + NDS32_BUILTIN_SVS, +@@ -512,7 +560,295 @@ + NDS32_BUILTIN_SET_TRIG_LEVEL, + NDS32_BUILTIN_SET_TRIG_EDGE, + NDS32_BUILTIN_GET_TRIG_TYPE, +- ++ NDS32_BUILTIN_DSP_BEGIN, ++ NDS32_BUILTIN_ADD16, ++ NDS32_BUILTIN_V_UADD16, ++ NDS32_BUILTIN_V_SADD16, ++ NDS32_BUILTIN_RADD16, ++ NDS32_BUILTIN_V_RADD16, ++ NDS32_BUILTIN_URADD16, ++ NDS32_BUILTIN_V_URADD16, ++ NDS32_BUILTIN_KADD16, ++ NDS32_BUILTIN_V_KADD16, ++ NDS32_BUILTIN_UKADD16, ++ NDS32_BUILTIN_V_UKADD16, ++ NDS32_BUILTIN_SUB16, ++ NDS32_BUILTIN_V_USUB16, ++ NDS32_BUILTIN_V_SSUB16, ++ NDS32_BUILTIN_RSUB16, ++ NDS32_BUILTIN_V_RSUB16, ++ NDS32_BUILTIN_URSUB16, ++ NDS32_BUILTIN_V_URSUB16, ++ NDS32_BUILTIN_KSUB16, ++ NDS32_BUILTIN_V_KSUB16, ++ NDS32_BUILTIN_UKSUB16, ++ NDS32_BUILTIN_V_UKSUB16, ++ NDS32_BUILTIN_CRAS16, ++ NDS32_BUILTIN_V_UCRAS16, ++ NDS32_BUILTIN_V_SCRAS16, ++ NDS32_BUILTIN_RCRAS16, ++ NDS32_BUILTIN_V_RCRAS16, ++ NDS32_BUILTIN_URCRAS16, ++ NDS32_BUILTIN_V_URCRAS16, ++ NDS32_BUILTIN_KCRAS16, ++ NDS32_BUILTIN_V_KCRAS16, ++ NDS32_BUILTIN_UKCRAS16, ++ NDS32_BUILTIN_V_UKCRAS16, ++ NDS32_BUILTIN_CRSA16, ++ NDS32_BUILTIN_V_UCRSA16, ++ NDS32_BUILTIN_V_SCRSA16, ++ NDS32_BUILTIN_RCRSA16, ++ NDS32_BUILTIN_V_RCRSA16, ++ NDS32_BUILTIN_URCRSA16, ++ NDS32_BUILTIN_V_URCRSA16, ++ NDS32_BUILTIN_KCRSA16, ++ NDS32_BUILTIN_V_KCRSA16, ++ NDS32_BUILTIN_UKCRSA16, ++ NDS32_BUILTIN_V_UKCRSA16, ++ NDS32_BUILTIN_ADD8, ++ NDS32_BUILTIN_V_UADD8, ++ NDS32_BUILTIN_V_SADD8, ++ NDS32_BUILTIN_RADD8, ++ NDS32_BUILTIN_V_RADD8, ++ NDS32_BUILTIN_URADD8, ++ NDS32_BUILTIN_V_URADD8, ++ NDS32_BUILTIN_KADD8, ++ NDS32_BUILTIN_V_KADD8, ++ NDS32_BUILTIN_UKADD8, ++ NDS32_BUILTIN_V_UKADD8, ++ NDS32_BUILTIN_SUB8, ++ NDS32_BUILTIN_V_USUB8, ++ NDS32_BUILTIN_V_SSUB8, ++ NDS32_BUILTIN_RSUB8, ++ NDS32_BUILTIN_V_RSUB8, ++ NDS32_BUILTIN_URSUB8, ++ NDS32_BUILTIN_V_URSUB8, ++ NDS32_BUILTIN_KSUB8, ++ NDS32_BUILTIN_V_KSUB8, ++ NDS32_BUILTIN_UKSUB8, ++ NDS32_BUILTIN_V_UKSUB8, ++ NDS32_BUILTIN_SRA16, ++ NDS32_BUILTIN_V_SRA16, ++ NDS32_BUILTIN_SRA16_U, ++ NDS32_BUILTIN_V_SRA16_U, ++ NDS32_BUILTIN_SRL16, ++ NDS32_BUILTIN_V_SRL16, ++ NDS32_BUILTIN_SRL16_U, ++ NDS32_BUILTIN_V_SRL16_U, ++ NDS32_BUILTIN_SLL16, ++ NDS32_BUILTIN_V_SLL16, ++ NDS32_BUILTIN_KSLL16, ++ NDS32_BUILTIN_V_KSLL16, ++ NDS32_BUILTIN_KSLRA16, ++ NDS32_BUILTIN_V_KSLRA16, ++ NDS32_BUILTIN_KSLRA16_U, ++ NDS32_BUILTIN_V_KSLRA16_U, ++ NDS32_BUILTIN_CMPEQ16, ++ NDS32_BUILTIN_V_SCMPEQ16, ++ NDS32_BUILTIN_V_UCMPEQ16, ++ NDS32_BUILTIN_SCMPLT16, ++ NDS32_BUILTIN_V_SCMPLT16, ++ NDS32_BUILTIN_SCMPLE16, ++ NDS32_BUILTIN_V_SCMPLE16, ++ NDS32_BUILTIN_UCMPLT16, ++ NDS32_BUILTIN_V_UCMPLT16, ++ NDS32_BUILTIN_UCMPLE16, ++ NDS32_BUILTIN_V_UCMPLE16, ++ NDS32_BUILTIN_CMPEQ8, ++ NDS32_BUILTIN_V_SCMPEQ8, ++ NDS32_BUILTIN_V_UCMPEQ8, ++ NDS32_BUILTIN_SCMPLT8, ++ NDS32_BUILTIN_V_SCMPLT8, ++ NDS32_BUILTIN_SCMPLE8, ++ NDS32_BUILTIN_V_SCMPLE8, ++ NDS32_BUILTIN_UCMPLT8, ++ NDS32_BUILTIN_V_UCMPLT8, ++ NDS32_BUILTIN_UCMPLE8, ++ NDS32_BUILTIN_V_UCMPLE8, ++ NDS32_BUILTIN_SMIN16, ++ NDS32_BUILTIN_V_SMIN16, ++ NDS32_BUILTIN_UMIN16, ++ NDS32_BUILTIN_V_UMIN16, ++ NDS32_BUILTIN_SMAX16, ++ NDS32_BUILTIN_V_SMAX16, ++ NDS32_BUILTIN_UMAX16, ++ NDS32_BUILTIN_V_UMAX16, ++ NDS32_BUILTIN_SCLIP16, ++ NDS32_BUILTIN_V_SCLIP16, ++ NDS32_BUILTIN_UCLIP16, ++ NDS32_BUILTIN_V_UCLIP16, ++ NDS32_BUILTIN_KHM16, ++ NDS32_BUILTIN_V_KHM16, ++ NDS32_BUILTIN_KHMX16, ++ NDS32_BUILTIN_V_KHMX16, ++ NDS32_BUILTIN_KABS16, ++ NDS32_BUILTIN_V_KABS16, ++ NDS32_BUILTIN_SMIN8, ++ NDS32_BUILTIN_V_SMIN8, ++ NDS32_BUILTIN_UMIN8, ++ NDS32_BUILTIN_V_UMIN8, ++ NDS32_BUILTIN_SMAX8, ++ NDS32_BUILTIN_V_SMAX8, ++ NDS32_BUILTIN_UMAX8, ++ NDS32_BUILTIN_V_UMAX8, ++ NDS32_BUILTIN_KABS8, ++ NDS32_BUILTIN_V_KABS8, ++ NDS32_BUILTIN_SUNPKD810, ++ NDS32_BUILTIN_V_SUNPKD810, ++ NDS32_BUILTIN_SUNPKD820, ++ NDS32_BUILTIN_V_SUNPKD820, ++ NDS32_BUILTIN_SUNPKD830, ++ NDS32_BUILTIN_V_SUNPKD830, ++ NDS32_BUILTIN_SUNPKD831, ++ NDS32_BUILTIN_V_SUNPKD831, ++ NDS32_BUILTIN_ZUNPKD810, ++ NDS32_BUILTIN_V_ZUNPKD810, ++ NDS32_BUILTIN_ZUNPKD820, ++ NDS32_BUILTIN_V_ZUNPKD820, ++ NDS32_BUILTIN_ZUNPKD830, ++ NDS32_BUILTIN_V_ZUNPKD830, ++ NDS32_BUILTIN_ZUNPKD831, ++ NDS32_BUILTIN_V_ZUNPKD831, ++ NDS32_BUILTIN_RADDW, ++ NDS32_BUILTIN_URADDW, ++ NDS32_BUILTIN_RSUBW, ++ NDS32_BUILTIN_URSUBW, ++ NDS32_BUILTIN_SRA_U, ++ NDS32_BUILTIN_KSLL, ++ NDS32_BUILTIN_PKBB16, ++ NDS32_BUILTIN_V_PKBB16, ++ NDS32_BUILTIN_PKBT16, ++ NDS32_BUILTIN_V_PKBT16, ++ NDS32_BUILTIN_PKTB16, ++ NDS32_BUILTIN_V_PKTB16, ++ NDS32_BUILTIN_PKTT16, ++ NDS32_BUILTIN_V_PKTT16, ++ NDS32_BUILTIN_SMMUL, ++ NDS32_BUILTIN_SMMUL_U, ++ NDS32_BUILTIN_KMMAC, ++ NDS32_BUILTIN_KMMAC_U, ++ NDS32_BUILTIN_KMMSB, ++ NDS32_BUILTIN_KMMSB_U, ++ NDS32_BUILTIN_KWMMUL, ++ NDS32_BUILTIN_KWMMUL_U, ++ NDS32_BUILTIN_SMMWB, ++ NDS32_BUILTIN_V_SMMWB, ++ NDS32_BUILTIN_SMMWB_U, ++ NDS32_BUILTIN_V_SMMWB_U, ++ NDS32_BUILTIN_SMMWT, ++ NDS32_BUILTIN_V_SMMWT, ++ NDS32_BUILTIN_SMMWT_U, ++ NDS32_BUILTIN_V_SMMWT_U, ++ NDS32_BUILTIN_KMMAWB, ++ NDS32_BUILTIN_V_KMMAWB, ++ NDS32_BUILTIN_KMMAWB_U, ++ NDS32_BUILTIN_V_KMMAWB_U, ++ NDS32_BUILTIN_KMMAWT, ++ NDS32_BUILTIN_V_KMMAWT, ++ NDS32_BUILTIN_KMMAWT_U, ++ NDS32_BUILTIN_V_KMMAWT_U, ++ NDS32_BUILTIN_SMBB, ++ NDS32_BUILTIN_V_SMBB, ++ NDS32_BUILTIN_SMBT, ++ NDS32_BUILTIN_V_SMBT, ++ NDS32_BUILTIN_SMTT, ++ NDS32_BUILTIN_V_SMTT, ++ NDS32_BUILTIN_KMDA, ++ NDS32_BUILTIN_V_KMDA, ++ NDS32_BUILTIN_KMXDA, ++ NDS32_BUILTIN_V_KMXDA, ++ NDS32_BUILTIN_SMDS, ++ NDS32_BUILTIN_V_SMDS, ++ NDS32_BUILTIN_SMDRS, ++ NDS32_BUILTIN_V_SMDRS, ++ NDS32_BUILTIN_SMXDS, ++ NDS32_BUILTIN_V_SMXDS, ++ NDS32_BUILTIN_KMABB, ++ NDS32_BUILTIN_V_KMABB, ++ NDS32_BUILTIN_KMABT, ++ NDS32_BUILTIN_V_KMABT, ++ NDS32_BUILTIN_KMATT, ++ NDS32_BUILTIN_V_KMATT, ++ NDS32_BUILTIN_KMADA, ++ NDS32_BUILTIN_V_KMADA, ++ NDS32_BUILTIN_KMAXDA, ++ NDS32_BUILTIN_V_KMAXDA, ++ NDS32_BUILTIN_KMADS, ++ NDS32_BUILTIN_V_KMADS, ++ NDS32_BUILTIN_KMADRS, ++ NDS32_BUILTIN_V_KMADRS, ++ NDS32_BUILTIN_KMAXDS, ++ NDS32_BUILTIN_V_KMAXDS, ++ NDS32_BUILTIN_KMSDA, ++ NDS32_BUILTIN_V_KMSDA, ++ NDS32_BUILTIN_KMSXDA, ++ NDS32_BUILTIN_V_KMSXDA, ++ NDS32_BUILTIN_SMAL, ++ NDS32_BUILTIN_V_SMAL, ++ NDS32_BUILTIN_BITREV, ++ NDS32_BUILTIN_WEXT, ++ NDS32_BUILTIN_BPICK, ++ NDS32_BUILTIN_INSB, ++ NDS32_BUILTIN_SADD64, ++ NDS32_BUILTIN_UADD64, ++ NDS32_BUILTIN_RADD64, ++ NDS32_BUILTIN_URADD64, ++ NDS32_BUILTIN_KADD64, ++ NDS32_BUILTIN_UKADD64, ++ NDS32_BUILTIN_SSUB64, ++ NDS32_BUILTIN_USUB64, ++ NDS32_BUILTIN_RSUB64, ++ NDS32_BUILTIN_URSUB64, ++ NDS32_BUILTIN_KSUB64, ++ NDS32_BUILTIN_UKSUB64, ++ NDS32_BUILTIN_SMAR64, ++ NDS32_BUILTIN_SMSR64, ++ NDS32_BUILTIN_UMAR64, ++ NDS32_BUILTIN_UMSR64, ++ NDS32_BUILTIN_KMAR64, ++ NDS32_BUILTIN_KMSR64, ++ NDS32_BUILTIN_UKMAR64, ++ NDS32_BUILTIN_UKMSR64, ++ NDS32_BUILTIN_SMALBB, ++ NDS32_BUILTIN_V_SMALBB, ++ NDS32_BUILTIN_SMALBT, ++ NDS32_BUILTIN_V_SMALBT, ++ NDS32_BUILTIN_SMALTT, ++ NDS32_BUILTIN_V_SMALTT, ++ NDS32_BUILTIN_SMALDA, ++ NDS32_BUILTIN_V_SMALDA, ++ NDS32_BUILTIN_SMALXDA, ++ NDS32_BUILTIN_V_SMALXDA, ++ NDS32_BUILTIN_SMALDS, ++ NDS32_BUILTIN_V_SMALDS, ++ NDS32_BUILTIN_SMALDRS, ++ NDS32_BUILTIN_V_SMALDRS, ++ NDS32_BUILTIN_SMALXDS, ++ NDS32_BUILTIN_V_SMALXDS, ++ NDS32_BUILTIN_SMUL16, ++ NDS32_BUILTIN_V_SMUL16, ++ NDS32_BUILTIN_SMULX16, ++ NDS32_BUILTIN_V_SMULX16, ++ NDS32_BUILTIN_UMUL16, ++ NDS32_BUILTIN_V_UMUL16, ++ NDS32_BUILTIN_UMULX16, ++ NDS32_BUILTIN_V_UMULX16, ++ NDS32_BUILTIN_SMSLDA, ++ NDS32_BUILTIN_V_SMSLDA, ++ NDS32_BUILTIN_SMSLXDA, ++ NDS32_BUILTIN_V_SMSLXDA, ++ NDS32_BUILTIN_UCLIP32, ++ NDS32_BUILTIN_SCLIP32, ++ NDS32_BUILTIN_KABS, ++ NDS32_BUILTIN_UALOAD_U16, ++ NDS32_BUILTIN_UALOAD_S16, ++ NDS32_BUILTIN_UALOAD_U8, ++ NDS32_BUILTIN_UALOAD_S8, ++ NDS32_BUILTIN_UASTORE_U16, ++ NDS32_BUILTIN_UASTORE_S16, ++ NDS32_BUILTIN_UASTORE_U8, ++ NDS32_BUILTIN_UASTORE_S8, ++ NDS32_BUILTIN_DSP_END, + NDS32_BUILTIN_UNALIGNED_FEATURE, + NDS32_BUILTIN_ENABLE_UNALIGNED, + NDS32_BUILTIN_DISABLE_UNALIGNED, +@@ -521,16 +857,30 @@ + + /* ------------------------------------------------------------------------ */ + +-#define TARGET_ISA_V2 (nds32_arch_option == ARCH_V2) ++#define TARGET_ISR_VECTOR_SIZE_4_BYTE \ ++ (nds32_isr_vector_size == 4) + ++#define TARGET_ISA_V2 (nds32_arch_option == ARCH_V2) + #define TARGET_ISA_V3 \ + (nds32_arch_option == ARCH_V3 \ ++ || nds32_arch_option == ARCH_V3J \ + || nds32_arch_option == ARCH_V3F \ + || nds32_arch_option == ARCH_V3S) + #define TARGET_ISA_V3M (nds32_arch_option == ARCH_V3M) + ++#define TARGET_PIPELINE_N7 \ ++ (nds32_cpu_option == CPU_N7) ++#define TARGET_PIPELINE_N8 \ ++ (nds32_cpu_option == CPU_N6 \ ++ || nds32_cpu_option == CPU_N8) + #define TARGET_PIPELINE_N9 \ + (nds32_cpu_option == CPU_N9) ++#define TARGET_PIPELINE_N10 \ ++ (nds32_cpu_option == CPU_N10) ++#define TARGET_PIPELINE_N13 \ ++ (nds32_cpu_option == CPU_N12 || nds32_cpu_option == CPU_N13) ++#define TARGET_PIPELINE_GRAYWOLF \ ++ (nds32_cpu_option == CPU_GRAYWOLF) + #define TARGET_PIPELINE_SIMPLE \ + (nds32_cpu_option == CPU_SIMPLE) + +@@ -541,6 +891,12 @@ + #define TARGET_CMODEL_LARGE \ + (nds32_cmodel_option == CMODEL_LARGE) + ++#define TARGET_ICT_MODEL_SMALL \ ++ (nds32_ict_model == ICT_MODEL_SMALL) ++ ++#define TARGET_ICT_MODEL_LARGE \ ++ (nds32_ict_model == ICT_MODEL_LARGE) ++ + /* When -mcmodel=small or -mcmodel=medium, + compiler may generate gp-base instruction directly. */ + #define TARGET_GP_DIRECT \ +@@ -576,6 +932,21 @@ + #endif + + #define TARGET_CONFIG_FPU_DEFAULT NDS32_CONFIG_FPU_2 ++ ++/* ------------------------------------------------------------------------ */ ++ ++#ifdef TARGET_DEFAULT_RELAX ++# define NDS32_RELAX_SPEC " %{!mno-relax:--relax}" ++#else ++# define NDS32_RELAX_SPEC " %{mrelax:--relax}" ++#endif ++ ++#ifdef TARGET_DEFAULT_EXT_DSP ++# define NDS32_EXT_DSP_SPEC " %{!mno-ext-dsp:-mext-dsp}" ++#else ++# define NDS32_EXT_DSP_SPEC "" ++#endif ++ + /* ------------------------------------------------------------------------ */ + + /* Controlling the Compilation Driver. */ +@@ -591,11 +962,15 @@ + {"float", "%{!mfloat-abi=*:-mfloat-abi=%(VALUE)}" } + + #define CC1_SPEC \ +- "" ++ NDS32_EXT_DSP_SPEC + + #define ASM_SPEC \ + " %{mbig-endian:-EB} %{mlittle-endian:-EL}" \ + " %{march=*:-march=%*}" \ ++ " %{mno-16-bit|mno-16bit:-mno-16bit-ext}" \ ++ " %{march=v3m:%{!mfull-regs:%{!mreduced-regs:-mreduced-regs}}}" \ ++ " %{mfull-regs:-mno-reduced-regs}" \ ++ " %{mreduced-regs:-mreduced-regs}" \ + " %{mabi=*:-mabi=v%*}" \ + " %{mconfig-fpu=*:-mfpu-freg=%*}" \ + " %{mext-fpu-mac:-mmac}" \ +@@ -603,35 +978,9 @@ + " %{mext-fpu-sp:-mfpu-sp-ext}" \ + " %{mno-ext-fpu-sp:-mno-fpu-sp-ext}" \ + " %{mext-fpu-dp:-mfpu-dp-ext}" \ +- " %{mno-ext-fpu-sp:-mno-fpu-dp-ext}" +- +-/* If user issues -mrelax, we need to pass '--relax' to linker. */ +-#define LINK_SPEC \ +- " %{mbig-endian:-EB} %{mlittle-endian:-EL}" \ +- " %{mrelax:--relax}" +- +-#define LIB_SPEC \ +- " -lc -lgloss" +- +-/* The option -mno-ctor-dtor can disable constructor/destructor feature +- by applying different crt stuff. In the convention, crt0.o is the +- startup file without constructor/destructor; +- crt1.o, crti.o, crtbegin.o, crtend.o, and crtn.o are the +- startup files with constructor/destructor. +- Note that crt0.o, crt1.o, crti.o, and crtn.o are provided +- by newlib/mculib/glibc/ublic, while crtbegin.o and crtend.o are +- currently provided by GCC for nds32 target. +- +- For nds32 target so far: +- If -mno-ctor-dtor, we are going to link +- "crt0.o [user objects]". +- If general cases, we are going to link +- "crt1.o crtbegin1.o [user objects] crtend1.o". */ +-#define STARTFILE_SPEC \ +- " %{!mno-ctor-dtor:crt1.o%s;:crt0.o%s}" \ +- " %{!mno-ctor-dtor:crtbegin1.o%s}" +-#define ENDFILE_SPEC \ +- " %{!mno-ctor-dtor:crtend1.o%s}" ++ " %{mno-ext-fpu-sp:-mno-fpu-dp-ext}" \ ++ " %{mext-dsp:-mdsp-ext}" \ ++ " %{O|O1|O2|O3|Ofast:-O1;:-Os}" + + /* The TARGET_BIG_ENDIAN_DEFAULT is defined if we + configure gcc with --target=nds32be-* setting. +@@ -642,9 +991,11 @@ + # define NDS32_ENDIAN_DEFAULT "mlittle-endian" + #endif + +-/* Currently we only have elf toolchain, +- where -mcmodel=medium is always the default. */ +-#define NDS32_CMODEL_DEFAULT "mcmodel=medium" ++#if TARGET_ELF ++# define NDS32_CMODEL_DEFAULT "mcmodel=medium" ++#else ++# define NDS32_CMODEL_DEFAULT "mcmodel=large" ++#endif + + #define MULTILIB_DEFAULTS \ + { NDS32_ENDIAN_DEFAULT, NDS32_CMODEL_DEFAULT } +@@ -1139,6 +1490,11 @@ + + #define PIC_OFFSET_TABLE_REGNUM GP_REGNUM + ++#define SYMBOLIC_CONST_P(X) \ ++(GET_CODE (X) == SYMBOL_REF \ ++ || GET_CODE (X) == LABEL_REF \ ++ || (GET_CODE (X) == CONST && symbolic_reference_mentioned_p (X))) ++ + + /* Defining the Output Assembler Language. */ + +diff -urN gcc-8.2.0.orig/gcc/config/nds32/nds32_init.inc gcc-8.2.0/gcc/config/nds32/nds32_init.inc +--- gcc-8.2.0.orig/gcc/config/nds32/nds32_init.inc 1970-01-01 01:00:00.000000000 +0100 ++++ gcc-8.2.0/gcc/config/nds32/nds32_init.inc 2019-01-25 15:38:32.833242671 +0100 +@@ -0,0 +1,43 @@ ++/* ++ * nds32_init.inc ++ * ++ * NDS32 architecture startup assembler header file ++ * ++ */ ++ ++.macro nds32_init ++ ++ ! Initialize GP for data access ++ la $gp, _SDA_BASE_ ++ ++#if defined(__NDS32_EXT_EX9__) ++ ! Check HW for EX9 ++ mfsr $r0, $MSC_CFG ++ li $r1, (1 << 24) ++ and $r2, $r0, $r1 ++ beqz $r2, 1f ++ ++ ! Initialize the table base of EX9 instruction ++ la $r0, _ITB_BASE_ ++ mtusr $r0, $ITB ++1: ++#endif ++ ++#if defined(__NDS32_EXT_FPU_DP__) || defined(__NDS32_EXT_FPU_SP__) ++ ! Enable FPU ++ mfsr $r0, $FUCOP_CTL ++ ori $r0, $r0, #0x1 ++ mtsr $r0, $FUCOP_CTL ++ dsb ++ ++ ! Enable denormalized flush-to-Zero mode ++ fmfcsr $r0 ++ ori $r0,$r0,#0x1000 ++ fmtcsr $r0 ++ dsb ++#endif ++ ++ ! Initialize default stack pointer ++ la $sp, _stack ++ ++.endm +diff -urN gcc-8.2.0.orig/gcc/config/nds32/nds32-intrinsic.c gcc-8.2.0/gcc/config/nds32/nds32-intrinsic.c +--- gcc-8.2.0.orig/gcc/config/nds32/nds32-intrinsic.c 2018-04-22 09:46:39.000000000 +0200 ++++ gcc-8.2.0/gcc/config/nds32/nds32-intrinsic.c 2019-01-25 15:38:32.825242648 +0100 +@@ -519,6 +519,7 @@ + { + NDS32_BUILTIN(unspec_fmfcfg, "fmfcfg", FMFCFG) + NDS32_BUILTIN(unspec_fmfcsr, "fmfcsr", FMFCSR) ++ NDS32_BUILTIN(unspec_volatile_rdov, "rdov", RDOV) + NDS32_BUILTIN(unspec_get_current_sp, "get_current_sp", GET_CURRENT_SP) + NDS32_BUILTIN(unspec_return_address, "return_address", RETURN_ADDRESS) + NDS32_BUILTIN(unspec_get_all_pending_int, "get_all_pending_int", +@@ -558,6 +559,31 @@ + NDS32_NO_TARGET_BUILTIN(unspec_ret_itoff, "ret_itoff", RET_ITOFF) + NDS32_NO_TARGET_BUILTIN(unspec_set_current_sp, + "set_current_sp", SET_CURRENT_SP) ++ NDS32_BUILTIN(kabsv2hi2, "kabs16", KABS16) ++ NDS32_BUILTIN(kabsv2hi2, "v_kabs16", V_KABS16) ++ NDS32_BUILTIN(kabsv4qi2, "kabs8", KABS8) ++ NDS32_BUILTIN(kabsv4qi2, "v_kabs8", V_KABS8) ++ NDS32_BUILTIN(sunpkd810, "sunpkd810", SUNPKD810) ++ NDS32_BUILTIN(sunpkd810, "v_sunpkd810", V_SUNPKD810) ++ NDS32_BUILTIN(sunpkd820, "sunpkd820", SUNPKD820) ++ NDS32_BUILTIN(sunpkd820, "v_sunpkd820", V_SUNPKD820) ++ NDS32_BUILTIN(sunpkd830, "sunpkd830", SUNPKD830) ++ NDS32_BUILTIN(sunpkd830, "v_sunpkd830", V_SUNPKD830) ++ NDS32_BUILTIN(sunpkd831, "sunpkd831", SUNPKD831) ++ NDS32_BUILTIN(sunpkd831, "v_sunpkd831", V_SUNPKD831) ++ NDS32_BUILTIN(zunpkd810, "zunpkd810", ZUNPKD810) ++ NDS32_BUILTIN(zunpkd810, "v_zunpkd810", V_ZUNPKD810) ++ NDS32_BUILTIN(zunpkd820, "zunpkd820", ZUNPKD820) ++ NDS32_BUILTIN(zunpkd820, "v_zunpkd820", V_ZUNPKD820) ++ NDS32_BUILTIN(zunpkd830, "zunpkd830", ZUNPKD830) ++ NDS32_BUILTIN(zunpkd830, "v_zunpkd830", V_ZUNPKD830) ++ NDS32_BUILTIN(zunpkd831, "zunpkd831", ZUNPKD831) ++ NDS32_BUILTIN(zunpkd831, "v_zunpkd831", V_ZUNPKD831) ++ NDS32_BUILTIN(unspec_kabs, "kabs", KABS) ++ NDS32_BUILTIN(unaligned_loadv2hi, "get_unaligned_u16x2", UALOAD_U16) ++ NDS32_BUILTIN(unaligned_loadv2hi, "get_unaligned_s16x2", UALOAD_S16) ++ NDS32_BUILTIN(unaligned_loadv4qi, "get_unaligned_u8x4", UALOAD_U8) ++ NDS32_BUILTIN(unaligned_loadv4qi, "get_unaligned_s8x4", UALOAD_S8) + }; + + /* Intrinsics that take just one argument. and the argument is immediate. */ +@@ -593,6 +619,28 @@ + NDS32_BUILTIN(unspec_ffb, "ffb", FFB) + NDS32_BUILTIN(unspec_ffmism, "ffmsim", FFMISM) + NDS32_BUILTIN(unspec_flmism, "flmism", FLMISM) ++ NDS32_BUILTIN(unspec_kaddw, "kaddw", KADDW) ++ NDS32_BUILTIN(unspec_kaddh, "kaddh", KADDH) ++ NDS32_BUILTIN(unspec_ksubw, "ksubw", KSUBW) ++ NDS32_BUILTIN(unspec_ksubh, "ksubh", KSUBH) ++ NDS32_BUILTIN(unspec_kdmbb, "kdmbb", KDMBB) ++ NDS32_BUILTIN(unspec_kdmbb, "v_kdmbb", V_KDMBB) ++ NDS32_BUILTIN(unspec_kdmbt, "kdmbt", KDMBT) ++ NDS32_BUILTIN(unspec_kdmbt, "v_kdmbt", V_KDMBT) ++ NDS32_BUILTIN(unspec_kdmtb, "kdmtb", KDMTB) ++ NDS32_BUILTIN(unspec_kdmtb, "v_kdmtb", V_KDMTB) ++ NDS32_BUILTIN(unspec_kdmtt, "kdmtt", KDMTT) ++ NDS32_BUILTIN(unspec_kdmtt, "v_kdmtt", V_KDMTT) ++ NDS32_BUILTIN(unspec_khmbb, "khmbb", KHMBB) ++ NDS32_BUILTIN(unspec_khmbb, "v_khmbb", V_KHMBB) ++ NDS32_BUILTIN(unspec_khmbt, "khmbt", KHMBT) ++ NDS32_BUILTIN(unspec_khmbt, "v_khmbt", V_KHMBT) ++ NDS32_BUILTIN(unspec_khmtb, "khmtb", KHMTB) ++ NDS32_BUILTIN(unspec_khmtb, "v_khmtb", V_KHMTB) ++ NDS32_BUILTIN(unspec_khmtt, "khmtt", KHMTT) ++ NDS32_BUILTIN(unspec_khmtt, "v_khmtt", V_KHMTT) ++ NDS32_BUILTIN(unspec_kslraw, "kslraw", KSLRAW) ++ NDS32_BUILTIN(unspec_kslrawu, "kslraw_u", KSLRAW_U) + NDS32_BUILTIN(rotrsi3, "rotr", ROTR) + NDS32_BUILTIN(unspec_sva, "sva", SVA) + NDS32_BUILTIN(unspec_svs, "svs", SVS) +@@ -603,7 +651,202 @@ + NDS32_NO_TARGET_BUILTIN(unaligned_store_hw, "unaligned_store_hw", UASTORE_HW) + NDS32_NO_TARGET_BUILTIN(unaligned_storesi, "unaligned_store_hw", UASTORE_W) + NDS32_NO_TARGET_BUILTIN(unaligned_storedi, "unaligned_store_hw", UASTORE_DW) +- ++ NDS32_BUILTIN(addv2hi3, "add16", ADD16) ++ NDS32_BUILTIN(addv2hi3, "v_uadd16", V_UADD16) ++ NDS32_BUILTIN(addv2hi3, "v_sadd16", V_SADD16) ++ NDS32_BUILTIN(raddv2hi3, "radd16", RADD16) ++ NDS32_BUILTIN(raddv2hi3, "v_radd16", V_RADD16) ++ NDS32_BUILTIN(uraddv2hi3, "uradd16", URADD16) ++ NDS32_BUILTIN(uraddv2hi3, "v_uradd16", V_URADD16) ++ NDS32_BUILTIN(kaddv2hi3, "kadd16", KADD16) ++ NDS32_BUILTIN(kaddv2hi3, "v_kadd16", V_KADD16) ++ NDS32_BUILTIN(ukaddv2hi3, "ukadd16", UKADD16) ++ NDS32_BUILTIN(ukaddv2hi3, "v_ukadd16", V_UKADD16) ++ NDS32_BUILTIN(subv2hi3, "sub16", SUB16) ++ NDS32_BUILTIN(subv2hi3, "v_usub16", V_USUB16) ++ NDS32_BUILTIN(subv2hi3, "v_ssub16", V_SSUB16) ++ NDS32_BUILTIN(rsubv2hi3, "rsub16", RSUB16) ++ NDS32_BUILTIN(rsubv2hi3, "v_rsub16", V_RSUB16) ++ NDS32_BUILTIN(ursubv2hi3, "ursub16", URSUB16) ++ NDS32_BUILTIN(ursubv2hi3, "v_ursub16", V_URSUB16) ++ NDS32_BUILTIN(ksubv2hi3, "ksub16", KSUB16) ++ NDS32_BUILTIN(ksubv2hi3, "v_ksub16", V_KSUB16) ++ NDS32_BUILTIN(uksubv2hi3, "uksub16", UKSUB16) ++ NDS32_BUILTIN(uksubv2hi3, "v_uksub16", V_UKSUB16) ++ NDS32_BUILTIN(cras16_1, "cras16", CRAS16) ++ NDS32_BUILTIN(cras16_1, "v_ucras16", V_UCRAS16) ++ NDS32_BUILTIN(cras16_1, "v_scras16", V_SCRAS16) ++ NDS32_BUILTIN(rcras16_1, "rcras16", RCRAS16) ++ NDS32_BUILTIN(rcras16_1, "v_rcras16", V_RCRAS16) ++ NDS32_BUILTIN(urcras16_1, "urcras16", URCRAS16) ++ NDS32_BUILTIN(urcras16_1, "v_urcras16", V_URCRAS16) ++ NDS32_BUILTIN(kcras16_1, "kcras16", KCRAS16) ++ NDS32_BUILTIN(kcras16_1, "v_kcras16", V_KCRAS16) ++ NDS32_BUILTIN(ukcras16_1, "ukcras16", UKCRAS16) ++ NDS32_BUILTIN(ukcras16_1, "v_ukcras16", V_UKCRAS16) ++ NDS32_BUILTIN(crsa16_1, "crsa16", CRSA16) ++ NDS32_BUILTIN(crsa16_1, "v_ucrsa16", V_UCRSA16) ++ NDS32_BUILTIN(crsa16_1, "v_scrsa16", V_SCRSA16) ++ NDS32_BUILTIN(rcrsa16_1, "rcrsa16", RCRSA16) ++ NDS32_BUILTIN(rcrsa16_1, "v_rcrsa16", V_RCRSA16) ++ NDS32_BUILTIN(urcrsa16_1, "urcrsa16", URCRSA16) ++ NDS32_BUILTIN(urcrsa16_1, "v_urcrsa16", V_URCRSA16) ++ NDS32_BUILTIN(kcrsa16_1, "kcrsa16", KCRSA16) ++ NDS32_BUILTIN(kcrsa16_1, "v_kcrsa16", V_KCRSA16) ++ NDS32_BUILTIN(ukcrsa16_1, "ukcrsa16", UKCRSA16) ++ NDS32_BUILTIN(ukcrsa16_1, "v_ukcrsa16", V_UKCRSA16) ++ NDS32_BUILTIN(addv4qi3, "add8", ADD8) ++ NDS32_BUILTIN(addv4qi3, "v_uadd8", V_UADD8) ++ NDS32_BUILTIN(addv4qi3, "v_sadd8", V_SADD8) ++ NDS32_BUILTIN(raddv4qi3, "radd8", RADD8) ++ NDS32_BUILTIN(raddv4qi3, "v_radd8", V_RADD8) ++ NDS32_BUILTIN(uraddv4qi3, "uradd8", URADD8) ++ NDS32_BUILTIN(uraddv4qi3, "v_uradd8", V_URADD8) ++ NDS32_BUILTIN(kaddv4qi3, "kadd8", KADD8) ++ NDS32_BUILTIN(kaddv4qi3, "v_kadd8", V_KADD8) ++ NDS32_BUILTIN(ukaddv4qi3, "ukadd8", UKADD8) ++ NDS32_BUILTIN(ukaddv4qi3, "v_ukadd8", V_UKADD8) ++ NDS32_BUILTIN(subv4qi3, "sub8", SUB8) ++ NDS32_BUILTIN(subv4qi3, "v_usub8", V_USUB8) ++ NDS32_BUILTIN(subv4qi3, "v_ssub8", V_SSUB8) ++ NDS32_BUILTIN(rsubv4qi3, "rsub8", RSUB8) ++ NDS32_BUILTIN(rsubv4qi3, "v_rsub8", V_RSUB8) ++ NDS32_BUILTIN(ursubv4qi3, "ursub8", URSUB8) ++ NDS32_BUILTIN(ursubv4qi3, "v_ursub8", V_URSUB8) ++ NDS32_BUILTIN(ksubv4qi3, "ksub8", KSUB8) ++ NDS32_BUILTIN(ksubv4qi3, "v_ksub8", V_KSUB8) ++ NDS32_BUILTIN(uksubv4qi3, "uksub8", UKSUB8) ++ NDS32_BUILTIN(uksubv4qi3, "v_uksub8", V_UKSUB8) ++ NDS32_BUILTIN(ashrv2hi3, "sra16", SRA16) ++ NDS32_BUILTIN(ashrv2hi3, "v_sra16", V_SRA16) ++ NDS32_BUILTIN(sra16_round, "sra16_u", SRA16_U) ++ NDS32_BUILTIN(sra16_round, "v_sra16_u", V_SRA16_U) ++ NDS32_BUILTIN(lshrv2hi3, "srl16", SRL16) ++ NDS32_BUILTIN(lshrv2hi3, "v_srl16", V_SRL16) ++ NDS32_BUILTIN(srl16_round, "srl16_u", SRL16_U) ++ NDS32_BUILTIN(srl16_round, "v_srl16_u", V_SRL16_U) ++ NDS32_BUILTIN(ashlv2hi3, "sll16", SLL16) ++ NDS32_BUILTIN(ashlv2hi3, "v_sll16", V_SLL16) ++ NDS32_BUILTIN(kslli16, "ksll16", KSLL16) ++ NDS32_BUILTIN(kslli16, "v_ksll16", V_KSLL16) ++ NDS32_BUILTIN(kslra16, "kslra16", KSLRA16) ++ NDS32_BUILTIN(kslra16, "v_kslra16", V_KSLRA16) ++ NDS32_BUILTIN(kslra16_round, "kslra16_u", KSLRA16_U) ++ NDS32_BUILTIN(kslra16_round, "v_kslra16_u", V_KSLRA16_U) ++ NDS32_BUILTIN(cmpeq16, "cmpeq16", CMPEQ16) ++ NDS32_BUILTIN(cmpeq16, "v_scmpeq16", V_SCMPEQ16) ++ NDS32_BUILTIN(cmpeq16, "v_ucmpeq16", V_UCMPEQ16) ++ NDS32_BUILTIN(scmplt16, "scmplt16", SCMPLT16) ++ NDS32_BUILTIN(scmplt16, "v_scmplt16", V_SCMPLT16) ++ NDS32_BUILTIN(scmple16, "scmple16", SCMPLE16) ++ NDS32_BUILTIN(scmple16, "v_scmple16", V_SCMPLE16) ++ NDS32_BUILTIN(ucmplt16, "ucmplt16", UCMPLT16) ++ NDS32_BUILTIN(ucmplt16, "v_ucmplt16", V_UCMPLT16) ++ NDS32_BUILTIN(ucmplt16, "ucmple16", UCMPLE16) ++ NDS32_BUILTIN(ucmplt16, "v_ucmple16", V_UCMPLE16) ++ NDS32_BUILTIN(cmpeq8, "cmpeq8", CMPEQ8) ++ NDS32_BUILTIN(cmpeq8, "v_scmpeq8", V_SCMPEQ8) ++ NDS32_BUILTIN(cmpeq8, "v_ucmpeq8", V_UCMPEQ8) ++ NDS32_BUILTIN(scmplt8, "scmplt8", SCMPLT8) ++ NDS32_BUILTIN(scmplt8, "v_scmplt8", V_SCMPLT8) ++ NDS32_BUILTIN(scmple8, "scmple8", SCMPLE8) ++ NDS32_BUILTIN(scmple8, "v_scmple8", V_SCMPLE8) ++ NDS32_BUILTIN(ucmplt8, "ucmplt8", UCMPLT8) ++ NDS32_BUILTIN(ucmplt8, "v_ucmplt8", V_UCMPLT8) ++ NDS32_BUILTIN(ucmplt8, "ucmple8", UCMPLE8) ++ NDS32_BUILTIN(ucmplt8, "v_ucmple8", V_UCMPLE8) ++ NDS32_BUILTIN(sminv2hi3, "smin16", SMIN16) ++ NDS32_BUILTIN(sminv2hi3, "v_smin16", V_SMIN16) ++ NDS32_BUILTIN(uminv2hi3, "umin16", UMIN16) ++ NDS32_BUILTIN(uminv2hi3, "v_umin16", V_UMIN16) ++ NDS32_BUILTIN(smaxv2hi3, "smax16", SMAX16) ++ NDS32_BUILTIN(smaxv2hi3, "v_smax16", V_SMAX16) ++ NDS32_BUILTIN(umaxv2hi3, "umax16", UMAX16) ++ NDS32_BUILTIN(umaxv2hi3, "v_umax16", V_UMAX16) ++ NDS32_BUILTIN(khm16, "khm16", KHM16) ++ NDS32_BUILTIN(khm16, "v_khm16", V_KHM16) ++ NDS32_BUILTIN(khmx16, "khmx16", KHMX16) ++ NDS32_BUILTIN(khmx16, "v_khmx16", V_KHMX16) ++ NDS32_BUILTIN(sminv4qi3, "smin8", SMIN8) ++ NDS32_BUILTIN(sminv4qi3, "v_smin8", V_SMIN8) ++ NDS32_BUILTIN(uminv4qi3, "umin8", UMIN8) ++ NDS32_BUILTIN(uminv4qi3, "v_umin8", V_UMIN8) ++ NDS32_BUILTIN(smaxv4qi3, "smax8", SMAX8) ++ NDS32_BUILTIN(smaxv4qi3, "v_smax8", V_SMAX8) ++ NDS32_BUILTIN(umaxv4qi3, "umax8", UMAX8) ++ NDS32_BUILTIN(umaxv4qi3, "v_umax8", V_UMAX8) ++ NDS32_BUILTIN(raddsi3, "raddw", RADDW) ++ NDS32_BUILTIN(uraddsi3, "uraddw", URADDW) ++ NDS32_BUILTIN(rsubsi3, "rsubw", RSUBW) ++ NDS32_BUILTIN(ursubsi3, "ursubw", URSUBW) ++ NDS32_BUILTIN(sraiu, "sra_u", SRA_U) ++ NDS32_BUILTIN(kssl, "ksll", KSLL) ++ NDS32_BUILTIN(pkbb, "pkbb16", PKBB16) ++ NDS32_BUILTIN(pkbb, "v_pkbb16", V_PKBB16) ++ NDS32_BUILTIN(pkbt, "pkbt16", PKBT16) ++ NDS32_BUILTIN(pkbt, "v_pkbt16", V_PKBT16) ++ NDS32_BUILTIN(pktb, "pktb16", PKTB16) ++ NDS32_BUILTIN(pktb, "v_pktb16", V_PKTB16) ++ NDS32_BUILTIN(pktt, "pktt16", PKTT16) ++ NDS32_BUILTIN(pktt, "v_pktt16", V_PKTT16) ++ NDS32_BUILTIN(smulsi3_highpart, "smmul", SMMUL) ++ NDS32_BUILTIN(smmul_round, "smmul_u", SMMUL_U) ++ NDS32_BUILTIN(smmwb, "smmwb", SMMWB) ++ NDS32_BUILTIN(smmwb, "v_smmwb", V_SMMWB) ++ NDS32_BUILTIN(smmwb_round, "smmwb_u", SMMWB_U) ++ NDS32_BUILTIN(smmwb_round, "v_smmwb_u", V_SMMWB_U) ++ NDS32_BUILTIN(smmwt, "smmwt", SMMWT) ++ NDS32_BUILTIN(smmwt, "v_smmwt", V_SMMWT) ++ NDS32_BUILTIN(smmwt_round, "smmwt_u", SMMWT_U) ++ NDS32_BUILTIN(smmwt_round, "v_smmwt_u", V_SMMWT_U) ++ NDS32_BUILTIN(smbb, "smbb", SMBB) ++ NDS32_BUILTIN(smbb, "v_smbb", V_SMBB) ++ NDS32_BUILTIN(smbt, "smbt", SMBT) ++ NDS32_BUILTIN(smbt, "v_smbt", V_SMBT) ++ NDS32_BUILTIN(smtt, "smtt", SMTT) ++ NDS32_BUILTIN(smtt, "v_smtt", V_SMTT) ++ NDS32_BUILTIN(kmda, "kmda", KMDA) ++ NDS32_BUILTIN(kmda, "v_kmda", V_KMDA) ++ NDS32_BUILTIN(kmxda, "kmxda", KMXDA) ++ NDS32_BUILTIN(kmxda, "v_kmxda", V_KMXDA) ++ NDS32_BUILTIN(smds, "smds", SMDS) ++ NDS32_BUILTIN(smds, "v_smds", V_SMDS) ++ NDS32_BUILTIN(smdrs, "smdrs", SMDRS) ++ NDS32_BUILTIN(smdrs, "v_smdrs", V_SMDRS) ++ NDS32_BUILTIN(smxdsv, "smxds", SMXDS) ++ NDS32_BUILTIN(smxdsv, "v_smxds", V_SMXDS) ++ NDS32_BUILTIN(smal1, "smal", SMAL) ++ NDS32_BUILTIN(smal1, "v_smal", V_SMAL) ++ NDS32_BUILTIN(bitrev, "bitrev", BITREV) ++ NDS32_BUILTIN(wext, "wext", WEXT) ++ NDS32_BUILTIN(adddi3, "sadd64", SADD64) ++ NDS32_BUILTIN(adddi3, "uadd64", UADD64) ++ NDS32_BUILTIN(radddi3, "radd64", RADD64) ++ NDS32_BUILTIN(uradddi3, "uradd64", URADD64) ++ NDS32_BUILTIN(kadddi3, "kadd64", KADD64) ++ NDS32_BUILTIN(ukadddi3, "ukadd64", UKADD64) ++ NDS32_BUILTIN(subdi3, "ssub64", SSUB64) ++ NDS32_BUILTIN(subdi3, "usub64", USUB64) ++ NDS32_BUILTIN(rsubdi3, "rsub64", RSUB64) ++ NDS32_BUILTIN(ursubdi3, "ursub64", URSUB64) ++ NDS32_BUILTIN(ksubdi3, "ksub64", KSUB64) ++ NDS32_BUILTIN(uksubdi3, "uksub64", UKSUB64) ++ NDS32_BUILTIN(smul16, "smul16", SMUL16) ++ NDS32_BUILTIN(smul16, "v_smul16", V_SMUL16) ++ NDS32_BUILTIN(smulx16, "smulx16", SMULX16) ++ NDS32_BUILTIN(smulx16, "v_smulx16", V_SMULX16) ++ NDS32_BUILTIN(umul16, "umul16", UMUL16) ++ NDS32_BUILTIN(umul16, "v_umul16", V_UMUL16) ++ NDS32_BUILTIN(umulx16, "umulx16", UMULX16) ++ NDS32_BUILTIN(umulx16, "v_umulx16", V_UMULX16) ++ NDS32_BUILTIN(kwmmul, "kwmmul", KWMMUL) ++ NDS32_BUILTIN(kwmmul_round, "kwmmul_u", KWMMUL_U) ++ NDS32_NO_TARGET_BUILTIN(unaligned_storev2hi, ++ "put_unaligned_u16x2", UASTORE_U16) ++ NDS32_NO_TARGET_BUILTIN(unaligned_storev2hi, ++ "put_unaligned_s16x2", UASTORE_S16) ++ NDS32_NO_TARGET_BUILTIN(unaligned_storev4qi, "put_unaligned_u8x4", UASTORE_U8) ++ NDS32_NO_TARGET_BUILTIN(unaligned_storev4qi, "put_unaligned_s8x4", UASTORE_S8) + }; + + /* Two-argument intrinsics with an immediate second argument. */ +@@ -617,6 +860,22 @@ + NDS32_BUILTIN(unspec_clips, "clips", CLIPS) + NDS32_NO_TARGET_BUILTIN(unspec_teqz, "teqz", TEQZ) + NDS32_NO_TARGET_BUILTIN(unspec_tnez, "tnez", TNEZ) ++ NDS32_BUILTIN(ashrv2hi3, "srl16", SRL16) ++ NDS32_BUILTIN(ashrv2hi3, "v_srl16", V_SRL16) ++ NDS32_BUILTIN(srl16_round, "srl16_u", SRL16_U) ++ NDS32_BUILTIN(srl16_round, "v_srl16_u", V_SRL16_U) ++ NDS32_BUILTIN(kslli16, "ksll16", KSLL16) ++ NDS32_BUILTIN(kslli16, "v_ksll16", V_KSLL16) ++ NDS32_BUILTIN(sclip16, "sclip16", SCLIP16) ++ NDS32_BUILTIN(sclip16, "v_sclip16", V_SCLIP16) ++ NDS32_BUILTIN(uclip16, "uclip16", UCLIP16) ++ NDS32_BUILTIN(uclip16, "v_uclip16", V_UCLIP16) ++ NDS32_BUILTIN(sraiu, "sra_u", SRA_U) ++ NDS32_BUILTIN(kssl, "ksll", KSLL) ++ NDS32_BUILTIN(bitrev, "bitrev", BITREV) ++ NDS32_BUILTIN(wext, "wext", WEXT) ++ NDS32_BUILTIN(uclip32, "uclip32", UCLIP32) ++ NDS32_BUILTIN(sclip32, "sclip32", SCLIP32) + }; + + /* Intrinsics that take three arguments. */ +@@ -625,6 +884,67 @@ + NDS32_BUILTIN(unspec_pbsada, "pbsada", PBSADA) + NDS32_NO_TARGET_BUILTIN(bse, "bse", BSE) + NDS32_NO_TARGET_BUILTIN(bsp, "bsp", BSP) ++ NDS32_BUILTIN(kmabb, "kmabb", KMABB) ++ NDS32_BUILTIN(kmabb, "v_kmabb", V_KMABB) ++ NDS32_BUILTIN(kmabt, "kmabt", KMABT) ++ NDS32_BUILTIN(kmabt, "v_kmabt", V_KMABT) ++ NDS32_BUILTIN(kmatt, "kmatt", KMATT) ++ NDS32_BUILTIN(kmatt, "v_kmatt", V_KMATT) ++ NDS32_BUILTIN(kmada, "kmada", KMADA) ++ NDS32_BUILTIN(kmada, "v_kmada", V_KMADA) ++ NDS32_BUILTIN(kmaxda, "kmaxda", KMAXDA) ++ NDS32_BUILTIN(kmaxda, "v_kmaxda", V_KMAXDA) ++ NDS32_BUILTIN(kmads, "kmads", KMADS) ++ NDS32_BUILTIN(kmads, "v_kmads", V_KMADS) ++ NDS32_BUILTIN(kmadrs, "kmadrs", KMADRS) ++ NDS32_BUILTIN(kmadrs, "v_kmadrs", V_KMADRS) ++ NDS32_BUILTIN(kmaxds, "kmaxds", KMAXDS) ++ NDS32_BUILTIN(kmaxds, "v_kmaxds", V_KMAXDS) ++ NDS32_BUILTIN(kmsda, "kmsda", KMSDA) ++ NDS32_BUILTIN(kmsda, "v_kmsda", V_KMSDA) ++ NDS32_BUILTIN(kmsxda, "kmsxda", KMSXDA) ++ NDS32_BUILTIN(kmsxda, "v_kmsxda", V_KMSXDA) ++ NDS32_BUILTIN(bpick1, "bpick", BPICK) ++ NDS32_BUILTIN(smar64_1, "smar64", SMAR64) ++ NDS32_BUILTIN(smsr64, "smsr64", SMSR64) ++ NDS32_BUILTIN(umar64_1, "umar64", UMAR64) ++ NDS32_BUILTIN(umsr64, "umsr64", UMSR64) ++ NDS32_BUILTIN(kmar64_1, "kmar64", KMAR64) ++ NDS32_BUILTIN(kmsr64, "kmsr64", KMSR64) ++ NDS32_BUILTIN(ukmar64_1, "ukmar64", UKMAR64) ++ NDS32_BUILTIN(ukmsr64, "ukmsr64", UKMSR64) ++ NDS32_BUILTIN(smalbb, "smalbb", SMALBB) ++ NDS32_BUILTIN(smalbb, "v_smalbb", V_SMALBB) ++ NDS32_BUILTIN(smalbt, "smalbt", SMALBT) ++ NDS32_BUILTIN(smalbt, "v_smalbt", V_SMALBT) ++ NDS32_BUILTIN(smaltt, "smaltt", SMALTT) ++ NDS32_BUILTIN(smaltt, "v_smaltt", V_SMALTT) ++ NDS32_BUILTIN(smalda1, "smalda", SMALDA) ++ NDS32_BUILTIN(smalda1, "v_smalda", V_SMALDA) ++ NDS32_BUILTIN(smalxda1, "smalxda", SMALXDA) ++ NDS32_BUILTIN(smalxda1, "v_smalxda", V_SMALXDA) ++ NDS32_BUILTIN(smalds1, "smalds", SMALDS) ++ NDS32_BUILTIN(smalds1, "v_smalds", V_SMALDS) ++ NDS32_BUILTIN(smaldrs3, "smaldrs", SMALDRS) ++ NDS32_BUILTIN(smaldrs3, "v_smaldrs", V_SMALDRS) ++ NDS32_BUILTIN(smalxds1, "smalxds", SMALXDS) ++ NDS32_BUILTIN(smalxds1, "v_smalxds", V_SMALXDS) ++ NDS32_BUILTIN(smslda1, "smslda", SMSLDA) ++ NDS32_BUILTIN(smslda1, "v_smslda", V_SMSLDA) ++ NDS32_BUILTIN(smslxda1, "smslxda", SMSLXDA) ++ NDS32_BUILTIN(smslxda1, "v_smslxda", V_SMSLXDA) ++ NDS32_BUILTIN(kmmawb, "kmmawb", KMMAWB) ++ NDS32_BUILTIN(kmmawb, "v_kmmawb", V_KMMAWB) ++ NDS32_BUILTIN(kmmawb_round, "kmmawb_u", KMMAWB_U) ++ NDS32_BUILTIN(kmmawb_round, "v_kmmawb_u", V_KMMAWB_U) ++ NDS32_BUILTIN(kmmawt, "kmmawt", KMMAWT) ++ NDS32_BUILTIN(kmmawt, "v_kmmawt", V_KMMAWT) ++ NDS32_BUILTIN(kmmawt_round, "kmmawt_u", KMMAWT_U) ++ NDS32_BUILTIN(kmmawt_round, "v_kmmawt_u", V_KMMAWT_U) ++ NDS32_BUILTIN(kmmac, "kmmac", KMMAC) ++ NDS32_BUILTIN(kmmac_round, "kmmac_u", KMMAC_U) ++ NDS32_BUILTIN(kmmsb, "kmmsb", KMMSB) ++ NDS32_BUILTIN(kmmsb_round, "kmmsb_u", KMMSB_U) + }; + + /* Three-argument intrinsics with an immediate third argument. */ +@@ -634,6 +954,7 @@ + NDS32_NO_TARGET_BUILTIN(prefetch_hw, "prefetch_hw", DPREF_HW) + NDS32_NO_TARGET_BUILTIN(prefetch_w, "prefetch_w", DPREF_W) + NDS32_NO_TARGET_BUILTIN(prefetch_dw, "prefetch_dw", DPREF_DW) ++ NDS32_BUILTIN(insb, "insb", INSB) + }; + + /* Intrinsics that load a value. */ +@@ -676,6 +997,11 @@ + unsigned i; + struct builtin_description *d; + ++ if (!NDS32_EXT_DSP_P () ++ && fcode > NDS32_BUILTIN_DSP_BEGIN ++ && fcode < NDS32_BUILTIN_DSP_END) ++ error ("don't support DSP extension instructions"); ++ + switch (fcode) + { + /* FPU Register Transfer. */ +@@ -812,6 +1138,9 @@ + case NDS32_BUILTIN_CCTL_L1D_WBALL_ONE_LVL: + emit_insn (gen_cctl_l1d_wball_one_lvl()); + return target; ++ case NDS32_BUILTIN_CLROV: ++ emit_insn (gen_unspec_volatile_clrov ()); ++ return target; + case NDS32_BUILTIN_STANDBY_NO_WAKE_GRANT: + emit_insn (gen_unspec_standby_no_wake_grant ()); + return target; +@@ -947,10 +1276,18 @@ + NDS32_BUILTIN_ ## CODE, BUILT_IN_MD, NULL, NULL_TREE) + + /* Looking for return type and argument can be found in tree.h file. */ ++ tree ptr_char_type_node = build_pointer_type (char_type_node); + tree ptr_uchar_type_node = build_pointer_type (unsigned_char_type_node); + tree ptr_ushort_type_node = build_pointer_type (short_unsigned_type_node); ++ tree ptr_short_type_node = build_pointer_type (short_integer_type_node); + tree ptr_uint_type_node = build_pointer_type (unsigned_type_node); + tree ptr_ulong_type_node = build_pointer_type (long_long_unsigned_type_node); ++ tree v4qi_type_node = build_vector_type (intQI_type_node, 4); ++ tree u_v4qi_type_node = build_vector_type (unsigned_intQI_type_node, 4); ++ tree v2hi_type_node = build_vector_type (intHI_type_node, 2); ++ tree u_v2hi_type_node = build_vector_type (unsigned_intHI_type_node, 2); ++ tree v2si_type_node = build_vector_type (intSI_type_node, 2); ++ tree u_v2si_type_node = build_vector_type (unsigned_intSI_type_node, 2); + + /* Cache. */ + ADD_NDS32_BUILTIN1 ("isync", void, ptr_uint, ISYNC); +@@ -1050,6 +1387,31 @@ + ADD_NDS32_BUILTIN2 ("se_ffmism", integer, unsigned, unsigned, FFMISM); + ADD_NDS32_BUILTIN2 ("se_flmism", integer, unsigned, unsigned, FLMISM); + ++ /* SATURATION */ ++ ADD_NDS32_BUILTIN2 ("kaddw", integer, integer, integer, KADDW); ++ ADD_NDS32_BUILTIN2 ("ksubw", integer, integer, integer, KSUBW); ++ ADD_NDS32_BUILTIN2 ("kaddh", integer, integer, integer, KADDH); ++ ADD_NDS32_BUILTIN2 ("ksubh", integer, integer, integer, KSUBH); ++ ADD_NDS32_BUILTIN2 ("kdmbb", integer, unsigned, unsigned, KDMBB); ++ ADD_NDS32_BUILTIN2 ("v_kdmbb", integer, v2hi, v2hi, V_KDMBB); ++ ADD_NDS32_BUILTIN2 ("kdmbt", integer, unsigned, unsigned, KDMBT); ++ ADD_NDS32_BUILTIN2 ("v_kdmbt", integer, v2hi, v2hi, V_KDMBT); ++ ADD_NDS32_BUILTIN2 ("kdmtb", integer, unsigned, unsigned, KDMTB); ++ ADD_NDS32_BUILTIN2 ("v_kdmtb", integer, v2hi, v2hi, V_KDMTB); ++ ADD_NDS32_BUILTIN2 ("kdmtt", integer, unsigned, unsigned, KDMTT); ++ ADD_NDS32_BUILTIN2 ("v_kdmtt", integer, v2hi, v2hi, V_KDMTT); ++ ADD_NDS32_BUILTIN2 ("khmbb", integer, unsigned, unsigned, KHMBB); ++ ADD_NDS32_BUILTIN2 ("v_khmbb", integer, v2hi, v2hi, V_KHMBB); ++ ADD_NDS32_BUILTIN2 ("khmbt", integer, unsigned, unsigned, KHMBT); ++ ADD_NDS32_BUILTIN2 ("v_khmbt", integer, v2hi, v2hi, V_KHMBT); ++ ADD_NDS32_BUILTIN2 ("khmtb", integer, unsigned, unsigned, KHMTB); ++ ADD_NDS32_BUILTIN2 ("v_khmtb", integer, v2hi, v2hi, V_KHMTB); ++ ADD_NDS32_BUILTIN2 ("khmtt", integer, unsigned, unsigned, KHMTT); ++ ADD_NDS32_BUILTIN2 ("v_khmtt", integer, v2hi, v2hi, V_KHMTT); ++ ADD_NDS32_BUILTIN2 ("kslraw", integer, integer, integer, KSLRAW); ++ ADD_NDS32_BUILTIN2 ("kslraw_u", integer, integer, integer, KSLRAW_U); ++ ADD_NDS32_BUILTIN0 ("rdov", unsigned, RDOV); ++ ADD_NDS32_BUILTIN0 ("clrov", void, CLROV); + + /* ROTR */ + ADD_NDS32_BUILTIN2 ("rotr", unsigned, unsigned, unsigned, ROTR); +@@ -1109,4 +1471,384 @@ + ADD_NDS32_BUILTIN0 ("enable_unaligned", void, ENABLE_UNALIGNED); + ADD_NDS32_BUILTIN0 ("disable_unaligned", void, DISABLE_UNALIGNED); + ++ /* DSP Extension: SIMD 16bit Add and Subtract. */ ++ ADD_NDS32_BUILTIN2 ("add16", unsigned, unsigned, unsigned, ADD16); ++ ADD_NDS32_BUILTIN2 ("v_uadd16", u_v2hi, u_v2hi, u_v2hi, V_UADD16); ++ ADD_NDS32_BUILTIN2 ("v_sadd16", v2hi, v2hi, v2hi, V_SADD16); ++ ADD_NDS32_BUILTIN2 ("radd16", unsigned, unsigned, unsigned, RADD16); ++ ADD_NDS32_BUILTIN2 ("v_radd16", v2hi, v2hi, v2hi, V_RADD16); ++ ADD_NDS32_BUILTIN2 ("uradd16", unsigned, unsigned, unsigned, URADD16); ++ ADD_NDS32_BUILTIN2 ("v_uradd16", u_v2hi, u_v2hi, u_v2hi, V_URADD16); ++ ADD_NDS32_BUILTIN2 ("kadd16", unsigned, unsigned, unsigned, KADD16); ++ ADD_NDS32_BUILTIN2 ("v_kadd16", v2hi, v2hi, v2hi, V_KADD16); ++ ADD_NDS32_BUILTIN2 ("ukadd16", unsigned, unsigned, unsigned, UKADD16); ++ ADD_NDS32_BUILTIN2 ("v_ukadd16", u_v2hi, u_v2hi, u_v2hi, V_UKADD16); ++ ADD_NDS32_BUILTIN2 ("sub16", unsigned, unsigned, unsigned, SUB16); ++ ADD_NDS32_BUILTIN2 ("v_usub16", u_v2hi, u_v2hi, u_v2hi, V_USUB16); ++ ADD_NDS32_BUILTIN2 ("v_ssub16", v2hi, v2hi, v2hi, V_SSUB16); ++ ADD_NDS32_BUILTIN2 ("rsub16", unsigned, unsigned, unsigned, RSUB16); ++ ADD_NDS32_BUILTIN2 ("v_rsub16", v2hi, v2hi, v2hi, V_RSUB16); ++ ADD_NDS32_BUILTIN2 ("ursub16", unsigned, unsigned, unsigned, URSUB16); ++ ADD_NDS32_BUILTIN2 ("v_ursub16", u_v2hi, u_v2hi, u_v2hi, V_URSUB16); ++ ADD_NDS32_BUILTIN2 ("ksub16", unsigned, unsigned, unsigned, KSUB16); ++ ADD_NDS32_BUILTIN2 ("v_ksub16", v2hi, v2hi, v2hi, V_KSUB16); ++ ADD_NDS32_BUILTIN2 ("uksub16", unsigned, unsigned, unsigned, UKSUB16); ++ ADD_NDS32_BUILTIN2 ("v_uksub16", u_v2hi, u_v2hi, u_v2hi, V_UKSUB16); ++ ADD_NDS32_BUILTIN2 ("cras16", unsigned, unsigned, unsigned, CRAS16); ++ ADD_NDS32_BUILTIN2 ("v_ucras16", u_v2hi, u_v2hi, u_v2hi, V_UCRAS16); ++ ADD_NDS32_BUILTIN2 ("v_scras16", v2hi, v2hi, v2hi, V_SCRAS16); ++ ADD_NDS32_BUILTIN2 ("rcras16", unsigned, unsigned, unsigned, RCRAS16); ++ ADD_NDS32_BUILTIN2 ("v_rcras16", v2hi, v2hi, v2hi, V_RCRAS16); ++ ADD_NDS32_BUILTIN2 ("urcras16", unsigned, unsigned, unsigned, URCRAS16); ++ ADD_NDS32_BUILTIN2 ("v_urcras16", u_v2hi, u_v2hi, u_v2hi, V_URCRAS16); ++ ADD_NDS32_BUILTIN2 ("kcras16", unsigned, unsigned, unsigned, KCRAS16); ++ ADD_NDS32_BUILTIN2 ("v_kcras16", v2hi, v2hi, v2hi, V_KCRAS16); ++ ADD_NDS32_BUILTIN2 ("ukcras16", unsigned, unsigned, unsigned, UKCRAS16); ++ ADD_NDS32_BUILTIN2 ("v_ukcras16", u_v2hi, u_v2hi, u_v2hi, V_UKCRAS16); ++ ADD_NDS32_BUILTIN2 ("crsa16", unsigned, unsigned, unsigned, CRSA16); ++ ADD_NDS32_BUILTIN2 ("v_ucrsa16", u_v2hi, u_v2hi, u_v2hi, V_UCRSA16); ++ ADD_NDS32_BUILTIN2 ("v_scrsa16", v2hi, v2hi, v2hi, V_SCRSA16); ++ ADD_NDS32_BUILTIN2 ("rcrsa16", unsigned, unsigned, unsigned, RCRSA16); ++ ADD_NDS32_BUILTIN2 ("v_rcrsa16", v2hi, v2hi, v2hi, V_RCRSA16); ++ ADD_NDS32_BUILTIN2 ("urcrsa16", unsigned, unsigned, unsigned, URCRSA16); ++ ADD_NDS32_BUILTIN2 ("v_urcrsa16", u_v2hi, u_v2hi, u_v2hi, V_URCRSA16); ++ ADD_NDS32_BUILTIN2 ("kcrsa16", unsigned, unsigned, unsigned, KCRSA16); ++ ADD_NDS32_BUILTIN2 ("v_kcrsa16", v2hi, v2hi, v2hi, V_KCRSA16); ++ ADD_NDS32_BUILTIN2 ("ukcrsa16", unsigned, unsigned, unsigned, UKCRSA16); ++ ADD_NDS32_BUILTIN2 ("v_ukcrsa16", u_v2hi, u_v2hi, u_v2hi, V_UKCRSA16); ++ ++ /* DSP Extension: SIMD 8bit Add and Subtract. */ ++ ADD_NDS32_BUILTIN2 ("add8", integer, integer, integer, ADD8); ++ ADD_NDS32_BUILTIN2 ("v_uadd8", u_v4qi, u_v4qi, u_v4qi, V_UADD8); ++ ADD_NDS32_BUILTIN2 ("v_sadd8", v4qi, v4qi, v4qi, V_SADD8); ++ ADD_NDS32_BUILTIN2 ("radd8", unsigned, unsigned, unsigned, RADD8); ++ ADD_NDS32_BUILTIN2 ("v_radd8", v4qi, v4qi, v4qi, V_RADD8); ++ ADD_NDS32_BUILTIN2 ("uradd8", unsigned, unsigned, unsigned, URADD8); ++ ADD_NDS32_BUILTIN2 ("v_uradd8", u_v4qi, u_v4qi, u_v4qi, V_URADD8); ++ ADD_NDS32_BUILTIN2 ("kadd8", unsigned, unsigned, unsigned, KADD8); ++ ADD_NDS32_BUILTIN2 ("v_kadd8", v4qi, v4qi, v4qi, V_KADD8); ++ ADD_NDS32_BUILTIN2 ("ukadd8", unsigned, unsigned, unsigned, UKADD8); ++ ADD_NDS32_BUILTIN2 ("v_ukadd8", u_v4qi, u_v4qi, u_v4qi, V_UKADD8); ++ ADD_NDS32_BUILTIN2 ("sub8", integer, integer, integer, SUB8); ++ ADD_NDS32_BUILTIN2 ("v_usub8", u_v4qi, u_v4qi, u_v4qi, V_USUB8); ++ ADD_NDS32_BUILTIN2 ("v_ssub8", v4qi, v4qi, v4qi, V_SSUB8); ++ ADD_NDS32_BUILTIN2 ("rsub8", unsigned, unsigned, unsigned, RSUB8); ++ ADD_NDS32_BUILTIN2 ("v_rsub8", v4qi, v4qi, v4qi, V_RSUB8); ++ ADD_NDS32_BUILTIN2 ("ursub8", unsigned, unsigned, unsigned, URSUB8); ++ ADD_NDS32_BUILTIN2 ("v_ursub8", u_v4qi, u_v4qi, u_v4qi, V_URSUB8); ++ ADD_NDS32_BUILTIN2 ("ksub8", unsigned, unsigned, unsigned, KSUB8); ++ ADD_NDS32_BUILTIN2 ("v_ksub8", v4qi, v4qi, v4qi, V_KSUB8); ++ ADD_NDS32_BUILTIN2 ("uksub8", unsigned, unsigned, unsigned, UKSUB8); ++ ADD_NDS32_BUILTIN2 ("v_uksub8", u_v4qi, u_v4qi, u_v4qi, V_UKSUB8); ++ ++ /* DSP Extension: SIMD 16bit Shift. */ ++ ADD_NDS32_BUILTIN2 ("sra16", unsigned, unsigned, unsigned, SRA16); ++ ADD_NDS32_BUILTIN2 ("v_sra16", v2hi, v2hi, unsigned, V_SRA16); ++ ADD_NDS32_BUILTIN2 ("sra16_u", unsigned, unsigned, unsigned, SRA16_U); ++ ADD_NDS32_BUILTIN2 ("v_sra16_u", v2hi, v2hi, unsigned, V_SRA16_U); ++ ADD_NDS32_BUILTIN2 ("srl16", unsigned, unsigned, unsigned, SRL16); ++ ADD_NDS32_BUILTIN2 ("v_srl16", u_v2hi, u_v2hi, unsigned, V_SRL16); ++ ADD_NDS32_BUILTIN2 ("srl16_u", unsigned, unsigned, unsigned, SRL16_U); ++ ADD_NDS32_BUILTIN2 ("v_srl16_u", u_v2hi, u_v2hi, unsigned, V_SRL16_U); ++ ADD_NDS32_BUILTIN2 ("sll16", unsigned, unsigned, unsigned, SLL16); ++ ADD_NDS32_BUILTIN2 ("v_sll16", u_v2hi, u_v2hi, unsigned, V_SLL16); ++ ADD_NDS32_BUILTIN2 ("ksll16", unsigned, unsigned, unsigned, KSLL16); ++ ADD_NDS32_BUILTIN2 ("v_ksll16", v2hi, v2hi, unsigned, V_KSLL16); ++ ADD_NDS32_BUILTIN2 ("kslra16", unsigned, unsigned, unsigned, KSLRA16); ++ ADD_NDS32_BUILTIN2 ("v_kslra16", v2hi, v2hi, unsigned, V_KSLRA16); ++ ADD_NDS32_BUILTIN2 ("kslra16_u", unsigned, unsigned, unsigned, KSLRA16_U); ++ ADD_NDS32_BUILTIN2 ("v_kslra16_u", v2hi, v2hi, unsigned, V_KSLRA16_U); ++ ++ /* DSP Extension: 16bit Compare. */ ++ ADD_NDS32_BUILTIN2 ("cmpeq16", unsigned, unsigned, unsigned, CMPEQ16); ++ ADD_NDS32_BUILTIN2 ("v_scmpeq16", u_v2hi, v2hi, v2hi, V_SCMPEQ16); ++ ADD_NDS32_BUILTIN2 ("v_ucmpeq16", u_v2hi, u_v2hi, u_v2hi, V_UCMPEQ16); ++ ADD_NDS32_BUILTIN2 ("scmplt16", unsigned, unsigned, unsigned, SCMPLT16); ++ ADD_NDS32_BUILTIN2 ("v_scmplt16", u_v2hi, v2hi, v2hi, V_SCMPLT16); ++ ADD_NDS32_BUILTIN2 ("scmple16", unsigned, unsigned, unsigned, SCMPLE16); ++ ADD_NDS32_BUILTIN2 ("v_scmple16", u_v2hi, v2hi, v2hi, V_SCMPLE16); ++ ADD_NDS32_BUILTIN2 ("ucmplt16", unsigned, unsigned, unsigned, UCMPLT16); ++ ADD_NDS32_BUILTIN2 ("v_ucmplt16", u_v2hi, u_v2hi, u_v2hi, V_UCMPLT16); ++ ADD_NDS32_BUILTIN2 ("ucmple16", unsigned, unsigned, unsigned, UCMPLE16); ++ ADD_NDS32_BUILTIN2 ("v_ucmple16", u_v2hi, u_v2hi, u_v2hi, V_UCMPLE16); ++ ++ /* DSP Extension: 8bit Compare. */ ++ ADD_NDS32_BUILTIN2 ("cmpeq8", unsigned, unsigned, unsigned, CMPEQ8); ++ ADD_NDS32_BUILTIN2 ("v_scmpeq8", u_v4qi, v4qi, v4qi, V_SCMPEQ8); ++ ADD_NDS32_BUILTIN2 ("v_ucmpeq8", u_v4qi, u_v4qi, u_v4qi, V_UCMPEQ8); ++ ADD_NDS32_BUILTIN2 ("scmplt8", unsigned, unsigned, unsigned, SCMPLT8); ++ ADD_NDS32_BUILTIN2 ("v_scmplt8", u_v4qi, v4qi, v4qi, V_SCMPLT8); ++ ADD_NDS32_BUILTIN2 ("scmple8", unsigned, unsigned, unsigned, SCMPLE8); ++ ADD_NDS32_BUILTIN2 ("v_scmple8", u_v4qi, v4qi, v4qi, V_SCMPLE8); ++ ADD_NDS32_BUILTIN2 ("ucmplt8", unsigned, unsigned, unsigned, UCMPLT8); ++ ADD_NDS32_BUILTIN2 ("v_ucmplt8", u_v4qi, u_v4qi, u_v4qi, V_UCMPLT8); ++ ADD_NDS32_BUILTIN2 ("ucmple8", unsigned, unsigned, unsigned, UCMPLE8); ++ ADD_NDS32_BUILTIN2 ("v_ucmple8", u_v4qi, u_v4qi, u_v4qi, V_UCMPLE8); ++ ++ /* DSP Extension: SIMD 16bit MISC. */ ++ ADD_NDS32_BUILTIN2 ("smin16", unsigned, unsigned, unsigned, SMIN16); ++ ADD_NDS32_BUILTIN2 ("v_smin16", v2hi, v2hi, v2hi, V_SMIN16); ++ ADD_NDS32_BUILTIN2 ("umin16", unsigned, unsigned, unsigned, UMIN16); ++ ADD_NDS32_BUILTIN2 ("v_umin16", u_v2hi, u_v2hi, u_v2hi, V_UMIN16); ++ ADD_NDS32_BUILTIN2 ("smax16", unsigned, unsigned, unsigned, SMAX16); ++ ADD_NDS32_BUILTIN2 ("v_smax16", v2hi, v2hi, v2hi, V_SMAX16); ++ ADD_NDS32_BUILTIN2 ("umax16", unsigned, unsigned, unsigned, UMAX16); ++ ADD_NDS32_BUILTIN2 ("v_umax16", u_v2hi, u_v2hi, u_v2hi, V_UMAX16); ++ ADD_NDS32_BUILTIN2 ("sclip16", unsigned, unsigned, unsigned, SCLIP16); ++ ADD_NDS32_BUILTIN2 ("v_sclip16", v2hi, v2hi, unsigned, V_SCLIP16); ++ ADD_NDS32_BUILTIN2 ("uclip16", unsigned, unsigned, unsigned, UCLIP16); ++ ADD_NDS32_BUILTIN2 ("v_uclip16", v2hi, v2hi, unsigned, V_UCLIP16); ++ ADD_NDS32_BUILTIN2 ("khm16", unsigned, unsigned, unsigned, KHM16); ++ ADD_NDS32_BUILTIN2 ("v_khm16", v2hi, v2hi, v2hi, V_KHM16); ++ ADD_NDS32_BUILTIN2 ("khmx16", unsigned, unsigned, unsigned, KHMX16); ++ ADD_NDS32_BUILTIN2 ("v_khmx16", v2hi, v2hi, v2hi, V_KHMX16); ++ ADD_NDS32_BUILTIN1 ("kabs16", unsigned, unsigned, KABS16); ++ ADD_NDS32_BUILTIN1 ("v_kabs16", v2hi, v2hi, V_KABS16); ++ ADD_NDS32_BUILTIN2 ("smul16", long_long_unsigned, unsigned, unsigned, SMUL16); ++ ADD_NDS32_BUILTIN2 ("v_smul16", v2si, v2hi, v2hi, V_SMUL16); ++ ADD_NDS32_BUILTIN2 ("smulx16", ++ long_long_unsigned, unsigned, unsigned, SMULX16); ++ ADD_NDS32_BUILTIN2 ("v_smulx16", v2si, v2hi, v2hi, V_SMULX16); ++ ADD_NDS32_BUILTIN2 ("umul16", long_long_unsigned, unsigned, unsigned, UMUL16); ++ ADD_NDS32_BUILTIN2 ("v_umul16", u_v2si, u_v2hi, u_v2hi, V_UMUL16); ++ ADD_NDS32_BUILTIN2 ("umulx16", ++ long_long_unsigned, unsigned, unsigned, UMULX16); ++ ADD_NDS32_BUILTIN2 ("v_umulx16", u_v2si, u_v2hi, u_v2hi, V_UMULX16); ++ ++ /* DSP Extension: SIMD 8bit MISC. */ ++ ADD_NDS32_BUILTIN2 ("smin8", unsigned, unsigned, unsigned, SMIN8); ++ ADD_NDS32_BUILTIN2 ("v_smin8", v4qi, v4qi, v4qi, V_SMIN8); ++ ADD_NDS32_BUILTIN2 ("umin8", unsigned, unsigned, unsigned, UMIN8); ++ ADD_NDS32_BUILTIN2 ("v_umin8", u_v4qi, u_v4qi, u_v4qi, V_UMIN8); ++ ADD_NDS32_BUILTIN2 ("smax8", unsigned, unsigned, unsigned, SMAX8); ++ ADD_NDS32_BUILTIN2 ("v_smax8", v4qi, v4qi, v4qi, V_SMAX8); ++ ADD_NDS32_BUILTIN2 ("umax8", unsigned, unsigned, unsigned, UMAX8); ++ ADD_NDS32_BUILTIN2 ("v_umax8", u_v4qi, u_v4qi, u_v4qi, V_UMAX8); ++ ADD_NDS32_BUILTIN1 ("kabs8", unsigned, unsigned, KABS8); ++ ADD_NDS32_BUILTIN1 ("v_kabs8", v4qi, v4qi, V_KABS8); ++ ++ /* DSP Extension: 8bit Unpacking. */ ++ ADD_NDS32_BUILTIN1 ("sunpkd810", unsigned, unsigned, SUNPKD810); ++ ADD_NDS32_BUILTIN1 ("v_sunpkd810", v2hi, v4qi, V_SUNPKD810); ++ ADD_NDS32_BUILTIN1 ("sunpkd820", unsigned, unsigned, SUNPKD820); ++ ADD_NDS32_BUILTIN1 ("v_sunpkd820", v2hi, v4qi, V_SUNPKD820); ++ ADD_NDS32_BUILTIN1 ("sunpkd830", unsigned, unsigned, SUNPKD830); ++ ADD_NDS32_BUILTIN1 ("v_sunpkd830", v2hi, v4qi, V_SUNPKD830); ++ ADD_NDS32_BUILTIN1 ("sunpkd831", unsigned, unsigned, SUNPKD831); ++ ADD_NDS32_BUILTIN1 ("v_sunpkd831", v2hi, v4qi, V_SUNPKD831); ++ ADD_NDS32_BUILTIN1 ("zunpkd810", unsigned, unsigned, ZUNPKD810); ++ ADD_NDS32_BUILTIN1 ("v_zunpkd810", u_v2hi, u_v4qi, V_ZUNPKD810); ++ ADD_NDS32_BUILTIN1 ("zunpkd820", unsigned, unsigned, ZUNPKD820); ++ ADD_NDS32_BUILTIN1 ("v_zunpkd820", u_v2hi, u_v4qi, V_ZUNPKD820); ++ ADD_NDS32_BUILTIN1 ("zunpkd830", unsigned, unsigned, ZUNPKD830); ++ ADD_NDS32_BUILTIN1 ("v_zunpkd830", u_v2hi, u_v4qi, V_ZUNPKD830); ++ ADD_NDS32_BUILTIN1 ("zunpkd831", unsigned, unsigned, ZUNPKD831); ++ ADD_NDS32_BUILTIN1 ("v_zunpkd831", u_v2hi, u_v4qi, V_ZUNPKD831); ++ ++ /* DSP Extension: 32bit Add and Subtract. */ ++ ADD_NDS32_BUILTIN2 ("raddw", integer, integer, integer, RADDW); ++ ADD_NDS32_BUILTIN2 ("uraddw", unsigned, unsigned, unsigned, URADDW); ++ ADD_NDS32_BUILTIN2 ("rsubw", integer, integer, integer, RSUBW); ++ ADD_NDS32_BUILTIN2 ("ursubw", unsigned, unsigned, unsigned, URSUBW); ++ ++ /* DSP Extension: 32bit Shift. */ ++ ADD_NDS32_BUILTIN2 ("sra_u", integer, integer, unsigned, SRA_U); ++ ADD_NDS32_BUILTIN2 ("ksll", integer, integer, unsigned, KSLL); ++ ++ /* DSP Extension: 16bit Packing. */ ++ ADD_NDS32_BUILTIN2 ("pkbb16", unsigned, unsigned, unsigned, PKBB16); ++ ADD_NDS32_BUILTIN2 ("v_pkbb16", u_v2hi, u_v2hi, u_v2hi, V_PKBB16); ++ ADD_NDS32_BUILTIN2 ("pkbt16", unsigned, unsigned, unsigned, PKBT16); ++ ADD_NDS32_BUILTIN2 ("v_pkbt16", u_v2hi, u_v2hi, u_v2hi, V_PKBT16); ++ ADD_NDS32_BUILTIN2 ("pktb16", unsigned, unsigned, unsigned, PKTB16); ++ ADD_NDS32_BUILTIN2 ("v_pktb16", u_v2hi, u_v2hi, u_v2hi, V_PKTB16); ++ ADD_NDS32_BUILTIN2 ("pktt16", unsigned, unsigned, unsigned, PKTT16); ++ ADD_NDS32_BUILTIN2 ("v_pktt16", u_v2hi, u_v2hi, u_v2hi, V_PKTT16); ++ ++ /* DSP Extension: Signed MSW 32x32 Multiply and ADD. */ ++ ADD_NDS32_BUILTIN2 ("smmul", integer, integer, integer, SMMUL); ++ ADD_NDS32_BUILTIN2 ("smmul_u", integer, integer, integer, SMMUL_U); ++ ADD_NDS32_BUILTIN3 ("kmmac", integer, integer, integer, integer, KMMAC); ++ ADD_NDS32_BUILTIN3 ("kmmac_u", integer, integer, integer, integer, KMMAC_U); ++ ADD_NDS32_BUILTIN3 ("kmmsb", integer, integer, integer, integer, KMMSB); ++ ADD_NDS32_BUILTIN3 ("kmmsb_u", integer, integer, integer, integer, KMMSB_U); ++ ADD_NDS32_BUILTIN2 ("kwmmul", integer, integer, integer, KWMMUL); ++ ADD_NDS32_BUILTIN2 ("kwmmul_u", integer, integer, integer, KWMMUL_U); ++ ++ /* DSP Extension: Most Significant Word 32x16 Multiply and ADD. */ ++ ADD_NDS32_BUILTIN2 ("smmwb", integer, integer, unsigned, SMMWB); ++ ADD_NDS32_BUILTIN2 ("v_smmwb", integer, integer, v2hi, V_SMMWB); ++ ADD_NDS32_BUILTIN2 ("smmwb_u", integer, integer, unsigned, SMMWB_U); ++ ADD_NDS32_BUILTIN2 ("v_smmwb_u", integer, integer, v2hi, V_SMMWB_U); ++ ADD_NDS32_BUILTIN2 ("smmwt", integer, integer, unsigned, SMMWT); ++ ADD_NDS32_BUILTIN2 ("v_smmwt", integer, integer, v2hi, V_SMMWT); ++ ADD_NDS32_BUILTIN2 ("smmwt_u", integer, integer, unsigned, SMMWT_U); ++ ADD_NDS32_BUILTIN2 ("v_smmwt_u", integer, integer, v2hi, V_SMMWT_U); ++ ADD_NDS32_BUILTIN3 ("kmmawb", integer, integer, integer, unsigned, KMMAWB); ++ ADD_NDS32_BUILTIN3 ("v_kmmawb", integer, integer, integer, v2hi, V_KMMAWB); ++ ADD_NDS32_BUILTIN3 ("kmmawb_u", ++ integer, integer, integer, unsigned, KMMAWB_U); ++ ADD_NDS32_BUILTIN3 ("v_kmmawb_u", ++ integer, integer, integer, v2hi, V_KMMAWB_U); ++ ADD_NDS32_BUILTIN3 ("kmmawt", integer, integer, integer, unsigned, KMMAWT); ++ ADD_NDS32_BUILTIN3 ("v_kmmawt", integer, integer, integer, v2hi, V_KMMAWT); ++ ADD_NDS32_BUILTIN3 ("kmmawt_u", ++ integer, integer, integer, unsigned, KMMAWT_U); ++ ADD_NDS32_BUILTIN3 ("v_kmmawt_u", ++ integer, integer, integer, v2hi, V_KMMAWT_U); ++ ++ /* DSP Extension: Signed 16bit Multiply with ADD/Subtract. */ ++ ADD_NDS32_BUILTIN2 ("smbb", integer, unsigned, unsigned, SMBB); ++ ADD_NDS32_BUILTIN2 ("v_smbb", integer, v2hi, v2hi, V_SMBB); ++ ADD_NDS32_BUILTIN2 ("smbt", integer, unsigned, unsigned, SMBT); ++ ADD_NDS32_BUILTIN2 ("v_smbt", integer, v2hi, v2hi, V_SMBT); ++ ADD_NDS32_BUILTIN2 ("smtt", integer, unsigned, unsigned, SMTT); ++ ADD_NDS32_BUILTIN2 ("v_smtt", integer, v2hi, v2hi, V_SMTT); ++ ADD_NDS32_BUILTIN2 ("kmda", integer, unsigned, unsigned, KMDA); ++ ADD_NDS32_BUILTIN2 ("v_kmda", integer, v2hi, v2hi, V_KMDA); ++ ADD_NDS32_BUILTIN2 ("kmxda", integer, unsigned, unsigned, KMXDA); ++ ADD_NDS32_BUILTIN2 ("v_kmxda", integer, v2hi, v2hi, V_KMXDA); ++ ADD_NDS32_BUILTIN2 ("smds", integer, unsigned, unsigned, SMDS); ++ ADD_NDS32_BUILTIN2 ("v_smds", integer, v2hi, v2hi, V_SMDS); ++ ADD_NDS32_BUILTIN2 ("smdrs", integer, unsigned, unsigned, SMDRS); ++ ADD_NDS32_BUILTIN2 ("v_smdrs", integer, v2hi, v2hi, V_SMDRS); ++ ADD_NDS32_BUILTIN2 ("smxds", integer, unsigned, unsigned, SMXDS); ++ ADD_NDS32_BUILTIN2 ("v_smxds", integer, v2hi, v2hi, V_SMXDS); ++ ADD_NDS32_BUILTIN3 ("kmabb", integer, integer, unsigned, unsigned, KMABB); ++ ADD_NDS32_BUILTIN3 ("v_kmabb", integer, integer, v2hi, v2hi, V_KMABB); ++ ADD_NDS32_BUILTIN3 ("kmabt", integer, integer, unsigned, unsigned, KMABT); ++ ADD_NDS32_BUILTIN3 ("v_kmabt", integer, integer, v2hi, v2hi, V_KMABT); ++ ADD_NDS32_BUILTIN3 ("kmatt", integer, integer, unsigned, unsigned, KMATT); ++ ADD_NDS32_BUILTIN3 ("v_kmatt", integer, integer, v2hi, v2hi, V_KMATT); ++ ADD_NDS32_BUILTIN3 ("kmada", integer, integer, unsigned, unsigned, KMADA); ++ ADD_NDS32_BUILTIN3 ("v_kmada", integer, integer, v2hi, v2hi, V_KMADA); ++ ADD_NDS32_BUILTIN3 ("kmaxda", integer, integer, unsigned, unsigned, KMAXDA); ++ ADD_NDS32_BUILTIN3 ("v_kmaxda", integer, integer, v2hi, v2hi, V_KMAXDA); ++ ADD_NDS32_BUILTIN3 ("kmads", integer, integer, unsigned, unsigned, KMADS); ++ ADD_NDS32_BUILTIN3 ("v_kmads", integer, integer, v2hi, v2hi, V_KMADS); ++ ADD_NDS32_BUILTIN3 ("kmadrs", integer, integer, unsigned, unsigned, KMADRS); ++ ADD_NDS32_BUILTIN3 ("v_kmadrs", integer, integer, v2hi, v2hi, V_KMADRS); ++ ADD_NDS32_BUILTIN3 ("kmaxds", integer, integer, unsigned, unsigned, KMAXDS); ++ ADD_NDS32_BUILTIN3 ("v_kmaxds", integer, integer, v2hi, v2hi, V_KMAXDS); ++ ADD_NDS32_BUILTIN3 ("kmsda", integer, integer, unsigned, unsigned, KMSDA); ++ ADD_NDS32_BUILTIN3 ("v_kmsda", integer, integer, v2hi, v2hi, V_KMSDA); ++ ADD_NDS32_BUILTIN3 ("kmsxda", integer, integer, unsigned, unsigned, KMSXDA); ++ ADD_NDS32_BUILTIN3 ("v_kmsxda", integer, integer, v2hi, v2hi, V_KMSXDA); ++ ++ /* DSP Extension: Signed 16bit Multiply with 64bit ADD/Subtract. */ ++ ADD_NDS32_BUILTIN2 ("smal", long_long_integer, ++ long_long_integer, unsigned, SMAL); ++ ADD_NDS32_BUILTIN2 ("v_smal", long_long_integer, ++ long_long_integer, v2hi, V_SMAL); ++ ++ /* DSP Extension: 32bit MISC. */ ++ ADD_NDS32_BUILTIN2 ("bitrev", unsigned, unsigned, unsigned, BITREV); ++ ADD_NDS32_BUILTIN2 ("wext", unsigned, long_long_integer, unsigned, WEXT); ++ ADD_NDS32_BUILTIN3 ("bpick", unsigned, unsigned, unsigned, unsigned, BPICK); ++ ADD_NDS32_BUILTIN3 ("insb", unsigned, unsigned, unsigned, unsigned, INSB); ++ ++ /* DSP Extension: 64bit Add and Subtract. */ ++ ADD_NDS32_BUILTIN2 ("sadd64", long_long_integer, ++ long_long_integer, long_long_integer, SADD64); ++ ADD_NDS32_BUILTIN2 ("uadd64", long_long_unsigned, ++ long_long_unsigned, long_long_unsigned, UADD64); ++ ADD_NDS32_BUILTIN2 ("radd64", long_long_integer, ++ long_long_integer, long_long_integer, RADD64); ++ ADD_NDS32_BUILTIN2 ("uradd64", long_long_unsigned, ++ long_long_unsigned, long_long_unsigned, URADD64); ++ ADD_NDS32_BUILTIN2 ("kadd64", long_long_integer, ++ long_long_integer, long_long_integer, KADD64); ++ ADD_NDS32_BUILTIN2 ("ukadd64", long_long_unsigned, ++ long_long_unsigned, long_long_unsigned, UKADD64); ++ ADD_NDS32_BUILTIN2 ("ssub64", long_long_integer, ++ long_long_integer, long_long_integer, SSUB64); ++ ADD_NDS32_BUILTIN2 ("usub64", long_long_unsigned, ++ long_long_unsigned, long_long_unsigned, USUB64); ++ ADD_NDS32_BUILTIN2 ("rsub64", long_long_integer, ++ long_long_integer, long_long_integer, RSUB64); ++ ADD_NDS32_BUILTIN2 ("ursub64", long_long_unsigned, ++ long_long_unsigned, long_long_unsigned, URSUB64); ++ ADD_NDS32_BUILTIN2 ("ksub64", long_long_integer, ++ long_long_integer, long_long_integer, KSUB64); ++ ADD_NDS32_BUILTIN2 ("uksub64", long_long_unsigned, ++ long_long_unsigned, long_long_unsigned, UKSUB64); ++ ++ /* DSP Extension: 32bit Multiply with 64bit Add/Subtract. */ ++ ADD_NDS32_BUILTIN3 ("smar64", long_long_integer, ++ long_long_integer, integer, integer, SMAR64); ++ ADD_NDS32_BUILTIN3 ("smsr64", long_long_integer, ++ long_long_integer, integer, integer, SMSR64); ++ ADD_NDS32_BUILTIN3 ("umar64", long_long_unsigned, ++ long_long_unsigned, unsigned, unsigned, UMAR64); ++ ADD_NDS32_BUILTIN3 ("umsr64", long_long_unsigned, ++ long_long_unsigned, unsigned, unsigned, UMSR64); ++ ADD_NDS32_BUILTIN3 ("kmar64", long_long_integer, ++ long_long_integer, integer, integer, KMAR64); ++ ADD_NDS32_BUILTIN3 ("kmsr64", long_long_integer, ++ long_long_integer, integer, integer, KMSR64); ++ ADD_NDS32_BUILTIN3 ("ukmar64", long_long_unsigned, ++ long_long_unsigned, unsigned, unsigned, UKMAR64); ++ ADD_NDS32_BUILTIN3 ("ukmsr64", long_long_unsigned, ++ long_long_unsigned, unsigned, unsigned, UKMSR64); ++ ++ /* DSP Extension: Signed 16bit Multiply with 64bit Add/Subtract. */ ++ ADD_NDS32_BUILTIN3 ("smalbb", long_long_integer, ++ long_long_integer, unsigned, unsigned, SMALBB); ++ ADD_NDS32_BUILTIN3 ("v_smalbb", long_long_integer, ++ long_long_integer, v2hi, v2hi, V_SMALBB); ++ ADD_NDS32_BUILTIN3 ("smalbt", long_long_integer, ++ long_long_integer, unsigned, unsigned, SMALBT); ++ ADD_NDS32_BUILTIN3 ("v_smalbt", long_long_integer, ++ long_long_integer, v2hi, v2hi, V_SMALBT); ++ ADD_NDS32_BUILTIN3 ("smaltt", long_long_integer, ++ long_long_integer, unsigned, unsigned, SMALTT); ++ ADD_NDS32_BUILTIN3 ("v_smaltt", long_long_integer, ++ long_long_integer, v2hi, v2hi, V_SMALTT); ++ ADD_NDS32_BUILTIN3 ("smalda", long_long_integer, ++ long_long_integer, unsigned, unsigned, SMALDA); ++ ADD_NDS32_BUILTIN3 ("v_smalda", long_long_integer, ++ long_long_integer, v2hi, v2hi, V_SMALDA); ++ ADD_NDS32_BUILTIN3 ("smalxda", long_long_integer, ++ long_long_integer, unsigned, unsigned, SMALXDA); ++ ADD_NDS32_BUILTIN3 ("v_smalxda", long_long_integer, ++ long_long_integer, v2hi, v2hi, V_SMALXDA); ++ ADD_NDS32_BUILTIN3 ("smalds", long_long_integer, ++ long_long_integer, unsigned, unsigned, SMALDS); ++ ADD_NDS32_BUILTIN3 ("v_smalds", long_long_integer, ++ long_long_integer, v2hi, v2hi, V_SMALDS); ++ ADD_NDS32_BUILTIN3 ("smaldrs", long_long_integer, ++ long_long_integer, unsigned, unsigned, SMALDRS); ++ ADD_NDS32_BUILTIN3 ("v_smaldrs", long_long_integer, ++ long_long_integer, v2hi, v2hi, V_SMALDRS); ++ ADD_NDS32_BUILTIN3 ("smalxds", long_long_integer, ++ long_long_integer, unsigned, unsigned, SMALXDS); ++ ADD_NDS32_BUILTIN3 ("v_smalxds", long_long_integer, ++ long_long_integer, v2hi, v2hi, V_SMALXDS); ++ ADD_NDS32_BUILTIN3 ("smslda", long_long_integer, ++ long_long_integer, unsigned, unsigned, SMSLDA); ++ ADD_NDS32_BUILTIN3 ("v_smslda", long_long_integer, ++ long_long_integer, v2hi, v2hi, V_SMSLDA); ++ ADD_NDS32_BUILTIN3 ("smslxda", long_long_integer, ++ long_long_integer, unsigned, unsigned, SMSLXDA); ++ ADD_NDS32_BUILTIN3 ("v_smslxda", long_long_integer, ++ long_long_integer, v2hi, v2hi, V_SMSLXDA); ++ ++ /* DSP Extension: augmented baseline. */ ++ ADD_NDS32_BUILTIN2 ("uclip32", unsigned, integer, unsigned, UCLIP32); ++ ADD_NDS32_BUILTIN2 ("sclip32", integer, integer, unsigned, SCLIP32); ++ ADD_NDS32_BUILTIN1 ("kabs", integer, integer, KABS); ++ ++ /* DSP Extension: vector type unaligned Load/Store */ ++ ADD_NDS32_BUILTIN1 ("get_unaligned_u16x2", u_v2hi, ptr_ushort, UALOAD_U16); ++ ADD_NDS32_BUILTIN1 ("get_unaligned_s16x2", v2hi, ptr_short, UALOAD_S16); ++ ADD_NDS32_BUILTIN1 ("get_unaligned_u8x4", u_v4qi, ptr_uchar, UALOAD_U8); ++ ADD_NDS32_BUILTIN1 ("get_unaligned_s8x4", v4qi, ptr_char, UALOAD_S8); ++ ADD_NDS32_BUILTIN2 ("put_unaligned_u16x2", void, ptr_ushort, ++ u_v2hi, UASTORE_U16); ++ ADD_NDS32_BUILTIN2 ("put_unaligned_s16x2", void, ptr_short, ++ v2hi, UASTORE_S16); ++ ADD_NDS32_BUILTIN2 ("put_unaligned_u8x4", void, ptr_uchar, ++ u_v4qi, UASTORE_U8); ++ ADD_NDS32_BUILTIN2 ("put_unaligned_s8x4", void, ptr_char, ++ v4qi, UASTORE_S8); + } +diff -urN gcc-8.2.0.orig/gcc/config/nds32/nds32_intrinsic.h gcc-8.2.0/gcc/config/nds32/nds32_intrinsic.h +--- gcc-8.2.0.orig/gcc/config/nds32/nds32_intrinsic.h 2018-04-22 09:46:39.000000000 +0200 ++++ gcc-8.2.0/gcc/config/nds32/nds32_intrinsic.h 2019-01-25 15:38:32.833242671 +0100 +@@ -26,6 +26,13 @@ + #ifndef _NDS32_INTRINSIC_H + #define _NDS32_INTRINSIC_H + ++typedef signed char int8x4_t __attribute ((vector_size(4))); ++typedef short int16x2_t __attribute ((vector_size(4))); ++typedef int int32x2_t __attribute__((vector_size(8))); ++typedef unsigned char uint8x4_t __attribute__ ((vector_size (4))); ++typedef unsigned short uint16x2_t __attribute__ ((vector_size (4))); ++typedef unsigned int uint32x2_t __attribute__((vector_size(8))); ++ + /* General instrinsic register names. */ + enum nds32_intrinsic_registers + { +@@ -691,6 +698,55 @@ + #define __nds32__tlbop_flua() \ + (__builtin_nds32_tlbop_flua()) + ++#define __nds32__kaddw(a, b) \ ++ (__builtin_nds32_kaddw ((a), (b))) ++#define __nds32__kaddh(a, b) \ ++ (__builtin_nds32_kaddh ((a), (b))) ++#define __nds32__ksubw(a, b) \ ++ (__builtin_nds32_ksubw ((a), (b))) ++#define __nds32__ksubh(a, b) \ ++ (__builtin_nds32_ksubh ((a), (b))) ++#define __nds32__kdmbb(a, b) \ ++ (__builtin_nds32_kdmbb ((a), (b))) ++#define __nds32__v_kdmbb(a, b) \ ++ (__builtin_nds32_v_kdmbb ((a), (b))) ++#define __nds32__kdmbt(a, b) \ ++ (__builtin_nds32_kdmbt ((a), (b))) ++#define __nds32__v_kdmbt(a, b) \ ++ (__builtin_nds32_v_kdmbt ((a), (b))) ++#define __nds32__kdmtb(a, b) \ ++ (__builtin_nds32_kdmtb ((a), (b))) ++#define __nds32__v_kdmtb(a, b) \ ++ (__builtin_nds32_v_kdmtb ((a), (b))) ++#define __nds32__kdmtt(a, b) \ ++ (__builtin_nds32_kdmtt ((a), (b))) ++#define __nds32__v_kdmtt(a, b) \ ++ (__builtin_nds32_v_kdmtt ((a), (b))) ++#define __nds32__khmbb(a, b) \ ++ (__builtin_nds32_khmbb ((a), (b))) ++#define __nds32__v_khmbb(a, b) \ ++ (__builtin_nds32_v_khmbb ((a), (b))) ++#define __nds32__khmbt(a, b) \ ++ (__builtin_nds32_khmbt ((a), (b))) ++#define __nds32__v_khmbt(a, b) \ ++ (__builtin_nds32_v_khmbt ((a), (b))) ++#define __nds32__khmtb(a, b) \ ++ (__builtin_nds32_khmtb ((a), (b))) ++#define __nds32__v_khmtb(a, b) \ ++ (__builtin_nds32_v_khmtb ((a), (b))) ++#define __nds32__khmtt(a, b) \ ++ (__builtin_nds32_khmtt ((a), (b))) ++#define __nds32__v_khmtt(a, b) \ ++ (__builtin_nds32_v_khmtt ((a), (b))) ++#define __nds32__kslraw(a, b) \ ++ (__builtin_nds32_kslraw ((a), (b))) ++#define __nds32__kslraw_u(a, b) \ ++ (__builtin_nds32_kslraw_u ((a), (b))) ++ ++#define __nds32__rdov() \ ++ (__builtin_nds32_rdov()) ++#define __nds32__clrov() \ ++ (__builtin_nds32_clrov()) + #define __nds32__gie_dis() \ + (__builtin_nds32_gie_dis()) + #define __nds32__gie_en() \ +@@ -720,10 +776,622 @@ + #define __nds32__get_trig_type(a) \ + (__builtin_nds32_get_trig_type ((a))) + ++#define __nds32__get_unaligned_hw(a) \ ++ (__builtin_nds32_unaligned_load_hw ((a))) ++#define __nds32__get_unaligned_w(a) \ ++ (__builtin_nds32_unaligned_load_w ((a))) ++#define __nds32__get_unaligned_dw(a) \ ++ (__builtin_nds32_unaligned_load_dw ((a))) ++#define __nds32__put_unaligned_hw(a, data) \ ++ (__builtin_nds32_unaligned_store_hw ((a), (data))) ++#define __nds32__put_unaligned_w(a, data) \ ++ (__builtin_nds32_unaligned_store_w ((a), (data))) ++#define __nds32__put_unaligned_dw(a, data) \ ++ (__builtin_nds32_unaligned_store_dw ((a), (data))) ++ ++#define __nds32__add16(a, b) \ ++ (__builtin_nds32_add16 ((a), (b))) ++#define __nds32__v_uadd16(a, b) \ ++ (__builtin_nds32_v_uadd16 ((a), (b))) ++#define __nds32__v_sadd16(a, b) \ ++ (__builtin_nds32_v_sadd16 ((a), (b))) ++#define __nds32__radd16(a, b) \ ++ (__builtin_nds32_radd16 ((a), (b))) ++#define __nds32__v_radd16(a, b) \ ++ (__builtin_nds32_v_radd16 ((a), (b))) ++#define __nds32__uradd16(a, b) \ ++ (__builtin_nds32_uradd16 ((a), (b))) ++#define __nds32__v_uradd16(a, b) \ ++ (__builtin_nds32_v_uradd16 ((a), (b))) ++#define __nds32__kadd16(a, b) \ ++ (__builtin_nds32_kadd16 ((a), (b))) ++#define __nds32__v_kadd16(a, b) \ ++ (__builtin_nds32_v_kadd16 ((a), (b))) ++#define __nds32__ukadd16(a, b) \ ++ (__builtin_nds32_ukadd16 ((a), (b))) ++#define __nds32__v_ukadd16(a, b) \ ++ (__builtin_nds32_v_ukadd16 ((a), (b))) ++#define __nds32__sub16(a, b) \ ++ (__builtin_nds32_sub16 ((a), (b))) ++#define __nds32__v_usub16(a, b) \ ++ (__builtin_nds32_v_usub16 ((a), (b))) ++#define __nds32__v_ssub16(a, b) \ ++ (__builtin_nds32_v_ssub16 ((a), (b))) ++#define __nds32__rsub16(a, b) \ ++ (__builtin_nds32_rsub16 ((a), (b))) ++#define __nds32__v_rsub16(a, b) \ ++ (__builtin_nds32_v_rsub16 ((a), (b))) ++#define __nds32__ursub16(a, b) \ ++ (__builtin_nds32_ursub16 ((a), (b))) ++#define __nds32__v_ursub16(a, b) \ ++ (__builtin_nds32_v_ursub16 ((a), (b))) ++#define __nds32__ksub16(a, b) \ ++ (__builtin_nds32_ksub16 ((a), (b))) ++#define __nds32__v_ksub16(a, b) \ ++ (__builtin_nds32_v_ksub16 ((a), (b))) ++#define __nds32__uksub16(a, b) \ ++ (__builtin_nds32_uksub16 ((a), (b))) ++#define __nds32__v_uksub16(a, b) \ ++ (__builtin_nds32_v_uksub16 ((a), (b))) ++#define __nds32__cras16(a, b) \ ++ (__builtin_nds32_cras16 ((a), (b))) ++#define __nds32__v_ucras16(a, b) \ ++ (__builtin_nds32_v_ucras16 ((a), (b))) ++#define __nds32__v_scras16(a, b) \ ++ (__builtin_nds32_v_scras16 ((a), (b))) ++#define __nds32__rcras16(a, b) \ ++ (__builtin_nds32_rcras16 ((a), (b))) ++#define __nds32__v_rcras16(a, b) \ ++ (__builtin_nds32_v_rcras16 ((a), (b))) ++#define __nds32__urcras16(a, b) \ ++ (__builtin_nds32_urcras16 ((a), (b))) ++#define __nds32__v_urcras16(a, b) \ ++ (__builtin_nds32_v_urcras16 ((a), (b))) ++#define __nds32__kcras16(a, b) \ ++ (__builtin_nds32_kcras16 ((a), (b))) ++#define __nds32__v_kcras16(a, b) \ ++ (__builtin_nds32_v_kcras16 ((a), (b))) ++#define __nds32__ukcras16(a, b) \ ++ (__builtin_nds32_ukcras16 ((a), (b))) ++#define __nds32__v_ukcras16(a, b) \ ++ (__builtin_nds32_v_ukcras16 ((a), (b))) ++#define __nds32__crsa16(a, b) \ ++ (__builtin_nds32_crsa16 ((a), (b))) ++#define __nds32__v_ucrsa16(a, b) \ ++ (__builtin_nds32_v_ucrsa16 ((a), (b))) ++#define __nds32__v_scrsa16(a, b) \ ++ (__builtin_nds32_v_scrsa16 ((a), (b))) ++#define __nds32__rcrsa16(a, b) \ ++ (__builtin_nds32_rcrsa16 ((a), (b))) ++#define __nds32__v_rcrsa16(a, b) \ ++ (__builtin_nds32_v_rcrsa16 ((a), (b))) ++#define __nds32__urcrsa16(a, b) \ ++ (__builtin_nds32_urcrsa16 ((a), (b))) ++#define __nds32__v_urcrsa16(a, b) \ ++ (__builtin_nds32_v_urcrsa16 ((a), (b))) ++#define __nds32__kcrsa16(a, b) \ ++ (__builtin_nds32_kcrsa16 ((a), (b))) ++#define __nds32__v_kcrsa16(a, b) \ ++ (__builtin_nds32_v_kcrsa16 ((a), (b))) ++#define __nds32__ukcrsa16(a, b) \ ++ (__builtin_nds32_ukcrsa16 ((a), (b))) ++#define __nds32__v_ukcrsa16(a, b) \ ++ (__builtin_nds32_v_ukcrsa16 ((a), (b))) ++ ++#define __nds32__add8(a, b) \ ++ (__builtin_nds32_add8 ((a), (b))) ++#define __nds32__v_uadd8(a, b) \ ++ (__builtin_nds32_v_uadd8 ((a), (b))) ++#define __nds32__v_sadd8(a, b) \ ++ (__builtin_nds32_v_sadd8 ((a), (b))) ++#define __nds32__radd8(a, b) \ ++ (__builtin_nds32_radd8 ((a), (b))) ++#define __nds32__v_radd8(a, b) \ ++ (__builtin_nds32_v_radd8 ((a), (b))) ++#define __nds32__uradd8(a, b) \ ++ (__builtin_nds32_uradd8 ((a), (b))) ++#define __nds32__v_uradd8(a, b) \ ++ (__builtin_nds32_v_uradd8 ((a), (b))) ++#define __nds32__kadd8(a, b) \ ++ (__builtin_nds32_kadd8 ((a), (b))) ++#define __nds32__v_kadd8(a, b) \ ++ (__builtin_nds32_v_kadd8 ((a), (b))) ++#define __nds32__ukadd8(a, b) \ ++ (__builtin_nds32_ukadd8 ((a), (b))) ++#define __nds32__v_ukadd8(a, b) \ ++ (__builtin_nds32_v_ukadd8 ((a), (b))) ++#define __nds32__sub8(a, b) \ ++ (__builtin_nds32_sub8 ((a), (b))) ++#define __nds32__v_usub8(a, b) \ ++ (__builtin_nds32_v_usub8 ((a), (b))) ++#define __nds32__v_ssub8(a, b) \ ++ (__builtin_nds32_v_ssub8 ((a), (b))) ++#define __nds32__rsub8(a, b) \ ++ (__builtin_nds32_rsub8 ((a), (b))) ++#define __nds32__v_rsub8(a, b) \ ++ (__builtin_nds32_v_rsub8 ((a), (b))) ++#define __nds32__ursub8(a, b) \ ++ (__builtin_nds32_ursub8 ((a), (b))) ++#define __nds32__v_ursub8(a, b) \ ++ (__builtin_nds32_v_ursub8 ((a), (b))) ++#define __nds32__ksub8(a, b) \ ++ (__builtin_nds32_ksub8 ((a), (b))) ++#define __nds32__v_ksub8(a, b) \ ++ (__builtin_nds32_v_ksub8 ((a), (b))) ++#define __nds32__uksub8(a, b) \ ++ (__builtin_nds32_uksub8 ((a), (b))) ++#define __nds32__v_uksub8(a, b) \ ++ (__builtin_nds32_v_uksub8 ((a), (b))) ++ ++#define __nds32__sra16(a, b) \ ++ (__builtin_nds32_sra16 ((a), (b))) ++#define __nds32__v_sra16(a, b) \ ++ (__builtin_nds32_v_sra16 ((a), (b))) ++#define __nds32__sra16_u(a, b) \ ++ (__builtin_nds32_sra16_u ((a), (b))) ++#define __nds32__v_sra16_u(a, b) \ ++ (__builtin_nds32_v_sra16_u ((a), (b))) ++#define __nds32__srl16(a, b) \ ++ (__builtin_nds32_srl16 ((a), (b))) ++#define __nds32__v_srl16(a, b) \ ++ (__builtin_nds32_v_srl16 ((a), (b))) ++#define __nds32__srl16_u(a, b) \ ++ (__builtin_nds32_srl16_u ((a), (b))) ++#define __nds32__v_srl16_u(a, b) \ ++ (__builtin_nds32_v_srl16_u ((a), (b))) ++#define __nds32__sll16(a, b) \ ++ (__builtin_nds32_sll16 ((a), (b))) ++#define __nds32__v_sll16(a, b) \ ++ (__builtin_nds32_v_sll16 ((a), (b))) ++#define __nds32__ksll16(a, b) \ ++ (__builtin_nds32_ksll16 ((a), (b))) ++#define __nds32__v_ksll16(a, b) \ ++ (__builtin_nds32_v_ksll16 ((a), (b))) ++#define __nds32__kslra16(a, b) \ ++ (__builtin_nds32_kslra16 ((a), (b))) ++#define __nds32__v_kslra16(a, b) \ ++ (__builtin_nds32_v_kslra16 ((a), (b))) ++#define __nds32__kslra16_u(a, b) \ ++ (__builtin_nds32_kslra16_u ((a), (b))) ++#define __nds32__v_kslra16_u(a, b) \ ++ (__builtin_nds32_v_kslra16_u ((a), (b))) ++ ++#define __nds32__cmpeq16(a, b) \ ++ (__builtin_nds32_cmpeq16 ((a), (b))) ++#define __nds32__v_scmpeq16(a, b) \ ++ (__builtin_nds32_v_scmpeq16 ((a), (b))) ++#define __nds32__v_ucmpeq16(a, b) \ ++ (__builtin_nds32_v_ucmpeq16 ((a), (b))) ++#define __nds32__scmplt16(a, b) \ ++ (__builtin_nds32_scmplt16 ((a), (b))) ++#define __nds32__v_scmplt16(a, b) \ ++ (__builtin_nds32_v_scmplt16 ((a), (b))) ++#define __nds32__scmple16(a, b) \ ++ (__builtin_nds32_scmple16 ((a), (b))) ++#define __nds32__v_scmple16(a, b) \ ++ (__builtin_nds32_v_scmple16 ((a), (b))) ++#define __nds32__ucmplt16(a, b) \ ++ (__builtin_nds32_ucmplt16 ((a), (b))) ++#define __nds32__v_ucmplt16(a, b) \ ++ (__builtin_nds32_v_ucmplt16 ((a), (b))) ++#define __nds32__ucmple16(a, b) \ ++ (__builtin_nds32_ucmple16 ((a), (b))) ++#define __nds32__v_ucmple16(a, b) \ ++ (__builtin_nds32_v_ucmple16 ((a), (b))) ++ ++#define __nds32__cmpeq8(a, b) \ ++ (__builtin_nds32_cmpeq8 ((a), (b))) ++#define __nds32__v_scmpeq8(a, b) \ ++ (__builtin_nds32_v_scmpeq8 ((a), (b))) ++#define __nds32__v_ucmpeq8(a, b) \ ++ (__builtin_nds32_v_ucmpeq8 ((a), (b))) ++#define __nds32__scmplt8(a, b) \ ++ (__builtin_nds32_scmplt8 ((a), (b))) ++#define __nds32__v_scmplt8(a, b) \ ++ (__builtin_nds32_v_scmplt8 ((a), (b))) ++#define __nds32__scmple8(a, b) \ ++ (__builtin_nds32_scmple8 ((a), (b))) ++#define __nds32__v_scmple8(a, b) \ ++ (__builtin_nds32_v_scmple8 ((a), (b))) ++#define __nds32__ucmplt8(a, b) \ ++ (__builtin_nds32_ucmplt8 ((a), (b))) ++#define __nds32__v_ucmplt8(a, b) \ ++ (__builtin_nds32_v_ucmplt8 ((a), (b))) ++#define __nds32__ucmple8(a, b) \ ++ (__builtin_nds32_ucmple8 ((a), (b))) ++#define __nds32__v_ucmple8(a, b) \ ++ (__builtin_nds32_v_ucmple8 ((a), (b))) ++ ++#define __nds32__smin16(a, b) \ ++ (__builtin_nds32_smin16 ((a), (b))) ++#define __nds32__v_smin16(a, b) \ ++ (__builtin_nds32_v_smin16 ((a), (b))) ++#define __nds32__umin16(a, b) \ ++ (__builtin_nds32_umin16 ((a), (b))) ++#define __nds32__v_umin16(a, b) \ ++ (__builtin_nds32_v_umin16 ((a), (b))) ++#define __nds32__smax16(a, b) \ ++ (__builtin_nds32_smax16 ((a), (b))) ++#define __nds32__v_smax16(a, b) \ ++ (__builtin_nds32_v_smax16 ((a), (b))) ++#define __nds32__umax16(a, b) \ ++ (__builtin_nds32_umax16 ((a), (b))) ++#define __nds32__v_umax16(a, b) \ ++ (__builtin_nds32_v_umax16 ((a), (b))) ++#define __nds32__sclip16(a, b) \ ++ (__builtin_nds32_sclip16 ((a), (b))) ++#define __nds32__v_sclip16(a, b) \ ++ (__builtin_nds32_v_sclip16 ((a), (b))) ++#define __nds32__uclip16(a, b) \ ++ (__builtin_nds32_uclip16 ((a), (b))) ++#define __nds32__v_uclip16(a, b) \ ++ (__builtin_nds32_v_uclip16 ((a), (b))) ++#define __nds32__khm16(a, b) \ ++ (__builtin_nds32_khm16 ((a), (b))) ++#define __nds32__v_khm16(a, b) \ ++ (__builtin_nds32_v_khm16 ((a), (b))) ++#define __nds32__khmx16(a, b) \ ++ (__builtin_nds32_khmx16 ((a), (b))) ++#define __nds32__v_khmx16(a, b) \ ++ (__builtin_nds32_v_khmx16 ((a), (b))) ++#define __nds32__kabs16(a) \ ++ (__builtin_nds32_kabs16 ((a))) ++#define __nds32__v_kabs16(a) \ ++ (__builtin_nds32_v_kabs16 ((a))) ++ ++#define __nds32__smin8(a, b) \ ++ (__builtin_nds32_smin8 ((a), (b))) ++#define __nds32__v_smin8(a, b) \ ++ (__builtin_nds32_v_smin8 ((a), (b))) ++#define __nds32__umin8(a, b) \ ++ (__builtin_nds32_umin8 ((a), (b))) ++#define __nds32__v_umin8(a, b) \ ++ (__builtin_nds32_v_umin8 ((a), (b))) ++#define __nds32__smax8(a, b) \ ++ (__builtin_nds32_smax8 ((a), (b))) ++#define __nds32__v_smax8(a, b) \ ++ (__builtin_nds32_v_smax8 ((a), (b))) ++#define __nds32__umax8(a, b) \ ++ (__builtin_nds32_umax8 ((a), (b))) ++#define __nds32__v_umax8(a, b) \ ++ (__builtin_nds32_v_umax8 ((a), (b))) ++#define __nds32__kabs8(a) \ ++ (__builtin_nds32_kabs8 ((a))) ++#define __nds32__v_kabs8(a) \ ++ (__builtin_nds32_v_kabs8 ((a))) ++ ++#define __nds32__sunpkd810(a) \ ++ (__builtin_nds32_sunpkd810 ((a))) ++#define __nds32__v_sunpkd810(a) \ ++ (__builtin_nds32_v_sunpkd810 ((a))) ++#define __nds32__sunpkd820(a) \ ++ (__builtin_nds32_sunpkd820 ((a))) ++#define __nds32__v_sunpkd820(a) \ ++ (__builtin_nds32_v_sunpkd820 ((a))) ++#define __nds32__sunpkd830(a) \ ++ (__builtin_nds32_sunpkd830 ((a))) ++#define __nds32__v_sunpkd830(a) \ ++ (__builtin_nds32_v_sunpkd830 ((a))) ++#define __nds32__sunpkd831(a) \ ++ (__builtin_nds32_sunpkd831 ((a))) ++#define __nds32__v_sunpkd831(a) \ ++ (__builtin_nds32_v_sunpkd831 ((a))) ++#define __nds32__zunpkd810(a) \ ++ (__builtin_nds32_zunpkd810 ((a))) ++#define __nds32__v_zunpkd810(a) \ ++ (__builtin_nds32_v_zunpkd810 ((a))) ++#define __nds32__zunpkd820(a) \ ++ (__builtin_nds32_zunpkd820 ((a))) ++#define __nds32__v_zunpkd820(a) \ ++ (__builtin_nds32_v_zunpkd820 ((a))) ++#define __nds32__zunpkd830(a) \ ++ (__builtin_nds32_zunpkd830 ((a))) ++#define __nds32__v_zunpkd830(a) \ ++ (__builtin_nds32_v_zunpkd830 ((a))) ++#define __nds32__zunpkd831(a) \ ++ (__builtin_nds32_zunpkd831 ((a))) ++#define __nds32__v_zunpkd831(a) \ ++ (__builtin_nds32_v_zunpkd831 ((a))) ++ ++#define __nds32__raddw(a, b) \ ++ (__builtin_nds32_raddw ((a), (b))) ++#define __nds32__uraddw(a, b) \ ++ (__builtin_nds32_uraddw ((a), (b))) ++#define __nds32__rsubw(a, b) \ ++ (__builtin_nds32_rsubw ((a), (b))) ++#define __nds32__ursubw(a, b) \ ++ (__builtin_nds32_ursubw ((a), (b))) ++ ++#define __nds32__sra_u(a, b) \ ++ (__builtin_nds32_sra_u ((a), (b))) ++#define __nds32__ksll(a, b) \ ++ (__builtin_nds32_ksll ((a), (b))) ++#define __nds32__pkbb16(a, b) \ ++ (__builtin_nds32_pkbb16 ((a), (b))) ++#define __nds32__v_pkbb16(a, b) \ ++ (__builtin_nds32_v_pkbb16 ((a), (b))) ++#define __nds32__pkbt16(a, b) \ ++ (__builtin_nds32_pkbt16 ((a), (b))) ++#define __nds32__v_pkbt16(a, b) \ ++ (__builtin_nds32_v_pkbt16 ((a), (b))) ++#define __nds32__pktb16(a, b) \ ++ (__builtin_nds32_pktb16 ((a), (b))) ++#define __nds32__v_pktb16(a, b) \ ++ (__builtin_nds32_v_pktb16 ((a), (b))) ++#define __nds32__pktt16(a, b) \ ++ (__builtin_nds32_pktt16 ((a), (b))) ++#define __nds32__v_pktt16(a, b) \ ++ (__builtin_nds32_v_pktt16 ((a), (b))) ++ ++#define __nds32__smmul(a, b) \ ++ (__builtin_nds32_smmul ((a), (b))) ++#define __nds32__smmul_u(a, b) \ ++ (__builtin_nds32_smmul_u ((a), (b))) ++#define __nds32__kmmac(r, a, b) \ ++ (__builtin_nds32_kmmac ((r), (a), (b))) ++#define __nds32__kmmac_u(r, a, b) \ ++ (__builtin_nds32_kmmac_u ((r), (a), (b))) ++#define __nds32__kmmsb(r, a, b) \ ++ (__builtin_nds32_kmmsb ((r), (a), (b))) ++#define __nds32__kmmsb_u(r, a, b) \ ++ (__builtin_nds32_kmmsb_u ((r), (a), (b))) ++#define __nds32__kwmmul(a, b) \ ++ (__builtin_nds32_kwmmul ((a), (b))) ++#define __nds32__kwmmul_u(a, b) \ ++ (__builtin_nds32_kwmmul_u ((a), (b))) ++ ++#define __nds32__smmwb(a, b) \ ++ (__builtin_nds32_smmwb ((a), (b))) ++#define __nds32__v_smmwb(a, b) \ ++ (__builtin_nds32_v_smmwb ((a), (b))) ++#define __nds32__smmwb_u(a, b) \ ++ (__builtin_nds32_smmwb_u ((a), (b))) ++#define __nds32__v_smmwb_u(a, b) \ ++ (__builtin_nds32_v_smmwb_u ((a), (b))) ++#define __nds32__smmwt(a, b) \ ++ (__builtin_nds32_smmwt ((a), (b))) ++#define __nds32__v_smmwt(a, b) \ ++ (__builtin_nds32_v_smmwt ((a), (b))) ++#define __nds32__smmwt_u(a, b) \ ++ (__builtin_nds32_smmwt_u ((a), (b))) ++#define __nds32__v_smmwt_u(a, b) \ ++ (__builtin_nds32_v_smmwt_u ((a), (b))) ++#define __nds32__kmmawb(r, a, b) \ ++ (__builtin_nds32_kmmawb ((r), (a), (b))) ++#define __nds32__v_kmmawb(r, a, b) \ ++ (__builtin_nds32_v_kmmawb ((r), (a), (b))) ++#define __nds32__kmmawb_u(r, a, b) \ ++ (__builtin_nds32_kmmawb_u ((r), (a), (b))) ++#define __nds32__v_kmmawb_u(r, a, b) \ ++ (__builtin_nds32_v_kmmawb_u ((r), (a), (b))) ++#define __nds32__kmmawt(r, a, b) \ ++ (__builtin_nds32_kmmawt ((r), (a), (b))) ++#define __nds32__v_kmmawt(r, a, b) \ ++ (__builtin_nds32_v_kmmawt ((r), (a), (b))) ++#define __nds32__kmmawt_u(r, a, b) \ ++ (__builtin_nds32_kmmawt_u ((r), (a), (b))) ++#define __nds32__v_kmmawt_u(r, a, b) \ ++ (__builtin_nds32_v_kmmawt_u ((r), (a), (b))) ++ ++#define __nds32__smbb(a, b) \ ++ (__builtin_nds32_smbb ((a), (b))) ++#define __nds32__v_smbb(a, b) \ ++ (__builtin_nds32_v_smbb ((a), (b))) ++#define __nds32__smbt(a, b) \ ++ (__builtin_nds32_smbt ((a), (b))) ++#define __nds32__v_smbt(a, b) \ ++ (__builtin_nds32_v_smbt ((a), (b))) ++#define __nds32__smtt(a, b) \ ++ (__builtin_nds32_smtt ((a), (b))) ++#define __nds32__v_smtt(a, b) \ ++ (__builtin_nds32_v_smtt ((a), (b))) ++#define __nds32__kmda(a, b) \ ++ (__builtin_nds32_kmda ((a), (b))) ++#define __nds32__v_kmda(a, b) \ ++ (__builtin_nds32_v_kmda ((a), (b))) ++#define __nds32__kmxda(a, b) \ ++ (__builtin_nds32_kmxda ((a), (b))) ++#define __nds32__v_kmxda(a, b) \ ++ (__builtin_nds32_v_kmxda ((a), (b))) ++#define __nds32__smds(a, b) \ ++ (__builtin_nds32_smds ((a), (b))) ++#define __nds32__v_smds(a, b) \ ++ (__builtin_nds32_v_smds ((a), (b))) ++#define __nds32__smdrs(a, b) \ ++ (__builtin_nds32_smdrs ((a), (b))) ++#define __nds32__v_smdrs(a, b) \ ++ (__builtin_nds32_v_smdrs ((a), (b))) ++#define __nds32__smxds(a, b) \ ++ (__builtin_nds32_smxds ((a), (b))) ++#define __nds32__v_smxds(a, b) \ ++ (__builtin_nds32_v_smxds ((a), (b))) ++#define __nds32__kmabb(r, a, b) \ ++ (__builtin_nds32_kmabb ((r), (a), (b))) ++#define __nds32__v_kmabb(r, a, b) \ ++ (__builtin_nds32_v_kmabb ((r), (a), (b))) ++#define __nds32__kmabt(r, a, b) \ ++ (__builtin_nds32_kmabt ((r), (a), (b))) ++#define __nds32__v_kmabt(r, a, b) \ ++ (__builtin_nds32_v_kmabt ((r), (a), (b))) ++#define __nds32__kmatt(r, a, b) \ ++ (__builtin_nds32_kmatt ((r), (a), (b))) ++#define __nds32__v_kmatt(r, a, b) \ ++ (__builtin_nds32_v_kmatt ((r), (a), (b))) ++#define __nds32__kmada(r, a, b) \ ++ (__builtin_nds32_kmada ((r), (a), (b))) ++#define __nds32__v_kmada(r, a, b) \ ++ (__builtin_nds32_v_kmada ((r), (a), (b))) ++#define __nds32__kmaxda(r, a, b) \ ++ (__builtin_nds32_kmaxda ((r), (a), (b))) ++#define __nds32__v_kmaxda(r, a, b) \ ++ (__builtin_nds32_v_kmaxda ((r), (a), (b))) ++#define __nds32__kmads(r, a, b) \ ++ (__builtin_nds32_kmads ((r), (a), (b))) ++#define __nds32__v_kmads(r, a, b) \ ++ (__builtin_nds32_v_kmads ((r), (a), (b))) ++#define __nds32__kmadrs(r, a, b) \ ++ (__builtin_nds32_kmadrs ((r), (a), (b))) ++#define __nds32__v_kmadrs(r, a, b) \ ++ (__builtin_nds32_v_kmadrs ((r), (a), (b))) ++#define __nds32__kmaxds(r, a, b) \ ++ (__builtin_nds32_kmaxds ((r), (a), (b))) ++#define __nds32__v_kmaxds(r, a, b) \ ++ (__builtin_nds32_v_kmaxds ((r), (a), (b))) ++#define __nds32__kmsda(r, a, b) \ ++ (__builtin_nds32_kmsda ((r), (a), (b))) ++#define __nds32__v_kmsda(r, a, b) \ ++ (__builtin_nds32_v_kmsda ((r), (a), (b))) ++#define __nds32__kmsxda(r, a, b) \ ++ (__builtin_nds32_kmsxda ((r), (a), (b))) ++#define __nds32__v_kmsxda(r, a, b) \ ++ (__builtin_nds32_v_kmsxda ((r), (a), (b))) ++ ++#define __nds32__smal(a, b) \ ++ (__builtin_nds32_smal ((a), (b))) ++#define __nds32__v_smal(a, b) \ ++ (__builtin_nds32_v_smal ((a), (b))) ++ ++#define __nds32__bitrev(a, b) \ ++ (__builtin_nds32_bitrev ((a), (b))) ++#define __nds32__wext(a, b) \ ++ (__builtin_nds32_wext ((a), (b))) ++#define __nds32__bpick(r, a, b) \ ++ (__builtin_nds32_bpick ((r), (a), (b))) ++#define __nds32__insb(r, a, b) \ ++ (__builtin_nds32_insb ((r), (a), (b))) ++ ++#define __nds32__sadd64(a, b) \ ++ (__builtin_nds32_sadd64 ((a), (b))) ++#define __nds32__uadd64(a, b) \ ++ (__builtin_nds32_uadd64 ((a), (b))) ++#define __nds32__radd64(a, b) \ ++ (__builtin_nds32_radd64 ((a), (b))) ++#define __nds32__uradd64(a, b) \ ++ (__builtin_nds32_uradd64 ((a), (b))) ++#define __nds32__kadd64(a, b) \ ++ (__builtin_nds32_kadd64 ((a), (b))) ++#define __nds32__ukadd64(a, b) \ ++ (__builtin_nds32_ukadd64 ((a), (b))) ++#define __nds32__ssub64(a, b) \ ++ (__builtin_nds32_ssub64 ((a), (b))) ++#define __nds32__usub64(a, b) \ ++ (__builtin_nds32_usub64 ((a), (b))) ++#define __nds32__rsub64(a, b) \ ++ (__builtin_nds32_rsub64 ((a), (b))) ++#define __nds32__ursub64(a, b) \ ++ (__builtin_nds32_ursub64 ((a), (b))) ++#define __nds32__ksub64(a, b) \ ++ (__builtin_nds32_ksub64 ((a), (b))) ++#define __nds32__uksub64(a, b) \ ++ (__builtin_nds32_uksub64 ((a), (b))) ++ ++#define __nds32__smar64(r, a, b) \ ++ (__builtin_nds32_smar64 ((r), (a), (b))) ++#define __nds32__smsr64(r, a, b) \ ++ (__builtin_nds32_smsr64 ((r), (a), (b))) ++#define __nds32__umar64(r, a, b) \ ++ (__builtin_nds32_umar64 ((r), (a), (b))) ++#define __nds32__umsr64(r, a, b) \ ++ (__builtin_nds32_umsr64 ((r), (a), (b))) ++#define __nds32__kmar64(r, a, b) \ ++ (__builtin_nds32_kmar64 ((r), (a), (b))) ++#define __nds32__kmsr64(r, a, b) \ ++ (__builtin_nds32_kmsr64 ((r), (a), (b))) ++#define __nds32__ukmar64(r, a, b) \ ++ (__builtin_nds32_ukmar64 ((r), (a), (b))) ++#define __nds32__ukmsr64(r, a, b) \ ++ (__builtin_nds32_ukmsr64 ((r), (a), (b))) ++ ++#define __nds32__smalbb(r, a, b) \ ++ (__builtin_nds32_smalbb ((r), (a), (b))) ++#define __nds32__v_smalbb(r, a, b) \ ++ (__builtin_nds32_v_smalbb ((r), (a), (b))) ++#define __nds32__smalbt(r, a, b) \ ++ (__builtin_nds32_smalbt ((r), (a), (b))) ++#define __nds32__v_smalbt(r, a, b) \ ++ (__builtin_nds32_v_smalbt ((r), (a), (b))) ++#define __nds32__smaltt(r, a, b) \ ++ (__builtin_nds32_smaltt ((r), (a), (b))) ++#define __nds32__v_smaltt(r, a, b) \ ++ (__builtin_nds32_v_smaltt ((r), (a), (b))) ++#define __nds32__smalda(r, a, b) \ ++ (__builtin_nds32_smalda ((r), (a), (b))) ++#define __nds32__v_smalda(r, a, b) \ ++ (__builtin_nds32_v_smalda ((r), (a), (b))) ++#define __nds32__smalxda(r, a, b) \ ++ (__builtin_nds32_smalxda ((r), (a), (b))) ++#define __nds32__v_smalxda(r, a, b) \ ++ (__builtin_nds32_v_smalxda ((r), (a), (b))) ++#define __nds32__smalds(r, a, b) \ ++ (__builtin_nds32_smalds ((r), (a), (b))) ++#define __nds32__v_smalds(r, a, b) \ ++ (__builtin_nds32_v_smalds ((r), (a), (b))) ++#define __nds32__smaldrs(r, a, b) \ ++ (__builtin_nds32_smaldrs ((r), (a), (b))) ++#define __nds32__v_smaldrs(r, a, b) \ ++ (__builtin_nds32_v_smaldrs ((r), (a), (b))) ++#define __nds32__smalxds(r, a, b) \ ++ (__builtin_nds32_smalxds ((r), (a), (b))) ++#define __nds32__v_smalxds(r, a, b) \ ++ (__builtin_nds32_v_smalxds ((r), (a), (b))) ++#define __nds32__smslda(r, a, b) \ ++ (__builtin_nds32_smslda ((r), (a), (b))) ++#define __nds32__v_smslda(r, a, b) \ ++ (__builtin_nds32_v_smslda ((r), (a), (b))) ++#define __nds32__smslxda(r, a, b) \ ++ (__builtin_nds32_smslxda ((r), (a), (b))) ++#define __nds32__v_smslxda(r, a, b) \ ++ (__builtin_nds32_v_smslxda ((r), (a), (b))) ++ ++#define __nds32__smul16(a, b) \ ++ (__builtin_nds32_smul16 ((a), (b))) ++#define __nds32__v_smul16(a, b) \ ++ (__builtin_nds32_v_smul16 ((a), (b))) ++#define __nds32__smulx16(a, b) \ ++ (__builtin_nds32_smulx16 ((a), (b))) ++#define __nds32__v_smulx16(a, b) \ ++ (__builtin_nds32_v_smulx16 ((a), (b))) ++#define __nds32__umul16(a, b) \ ++ (__builtin_nds32_umul16 ((a), (b))) ++#define __nds32__v_umul16(a, b) \ ++ (__builtin_nds32_v_umul16 ((a), (b))) ++#define __nds32__umulx16(a, b) \ ++ (__builtin_nds32_umulx16 ((a), (b))) ++#define __nds32__v_umulx16(a, b) \ ++ (__builtin_nds32_v_umulx16 ((a), (b))) ++ ++#define __nds32__uclip32(a, imm) \ ++ (__builtin_nds32_uclip32 ((a), (imm))) ++#define __nds32__sclip32(a, imm) \ ++ (__builtin_nds32_sclip32 ((a), (imm))) ++#define __nds32__kabs(a) \ ++ (__builtin_nds32_kabs ((a))) ++ + #define __nds32__unaligned_feature() \ + (__builtin_nds32_unaligned_feature()) + #define __nds32__enable_unaligned() \ + (__builtin_nds32_enable_unaligned()) + #define __nds32__disable_unaligned() \ + (__builtin_nds32_disable_unaligned()) ++ ++#define __nds32__get_unaligned_u16x2(a) \ ++ (__builtin_nds32_get_unaligned_u16x2 ((a))) ++#define __nds32__get_unaligned_s16x2(a) \ ++ (__builtin_nds32_get_unaligned_s16x2 ((a))) ++#define __nds32__get_unaligned_u8x4(a) \ ++ (__builtin_nds32_get_unaligned_u8x4 ((a))) ++#define __nds32__get_unaligned_s8x4(a) \ ++ (__builtin_nds32_get_unaligned_s8x4 ((a))) ++ ++#define __nds32__put_unaligned_u16x2(a, data) \ ++ (__builtin_nds32_put_unaligned_u16x2 ((a), (data))) ++#define __nds32__put_unaligned_s16x2(a, data) \ ++ (__builtin_nds32_put_unaligned_s16x2 ((a), (data))) ++#define __nds32__put_unaligned_u8x4(a, data) \ ++ (__builtin_nds32_put_unaligned_u8x4 ((a), (data))) ++#define __nds32__put_unaligned_s8x4(a, data) \ ++ (__builtin_nds32_put_unaligned_s8x4 ((a), (data))) ++ ++#define NDS32ATTR_SIGNATURE __attribute__((signature)) ++ + #endif /* nds32_intrinsic.h */ +diff -urN gcc-8.2.0.orig/gcc/config/nds32/nds32-intrinsic.md gcc-8.2.0/gcc/config/nds32/nds32-intrinsic.md +--- gcc-8.2.0.orig/gcc/config/nds32/nds32-intrinsic.md 2018-04-22 09:46:39.000000000 +0200 ++++ gcc-8.2.0/gcc/config/nds32/nds32-intrinsic.md 2019-01-25 15:38:32.825242648 +0100 +@@ -1037,6 +1037,187 @@ + (set_attr "length" "4")] + ) + ++;; SATURATION ++ ++(define_insn "unspec_kaddw" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (ss_plus:SI (match_operand:SI 1 "register_operand" "r") ++ (match_operand:SI 2 "register_operand" "r")))] ++ "" ++ "kaddw\t%0, %1, %2" ++ [(set_attr "type" "alu") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "unspec_ksubw" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (ss_minus:SI (match_operand:SI 1 "register_operand" "r") ++ (match_operand:SI 2 "register_operand" "r")))] ++ "" ++ "ksubw\t%0, %1, %2" ++ [(set_attr "type" "alu") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "unspec_kaddh" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (unspec:SI [(match_operand:SI 1 "register_operand" "r") ++ (match_operand:SI 2 "register_operand" "r")] UNSPEC_KADDH))] ++ "" ++ "kaddh\t%0, %1, %2" ++ [(set_attr "type" "alu") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "unspec_ksubh" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (unspec:SI [(match_operand:SI 1 "register_operand" "r") ++ (match_operand:SI 2 "register_operand" "r")] UNSPEC_KSUBH))] ++ "" ++ "ksubh\t%0, %1, %2" ++ [(set_attr "type" "alu") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "unspec_kaddh_dsp" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (unspec:SI [(plus:SI (match_operand:SI 1 "register_operand" "r") ++ (match_operand:SI 2 "register_operand" "r")) ++ (const_int 15)] UNSPEC_CLIPS))] ++ "NDS32_EXT_DSP_P ()" ++ "kaddh\t%0, %1, %2" ++ [(set_attr "type" "alu") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "unspec_ksubh_dsp" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (unspec:SI [(minus:SI (match_operand:SI 1 "register_operand" "r") ++ (match_operand:SI 2 "register_operand" "r")) ++ (const_int 15)] UNSPEC_CLIPS))] ++ "NDS32_EXT_DSP_P ()" ++ "ksubh\t%0, %1, %2" ++ [(set_attr "type" "alu") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "unspec_kdmbb" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (unspec:V2HI [(match_operand:V2HI 1 "register_operand" "r") ++ (match_operand:V2HI 2 "register_operand" "r")] UNSPEC_KDMBB))] ++ "" ++ "kdmbb\t%0, %1, %2" ++ [(set_attr "type" "mul") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "unspec_kdmbt" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (unspec:V2HI [(match_operand:V2HI 1 "register_operand" "r") ++ (match_operand:V2HI 2 "register_operand" "r")] UNSPEC_KDMBT))] ++ "" ++ "kdmbt\t%0, %1, %2" ++ [(set_attr "type" "mul") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "unspec_kdmtb" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (unspec:V2HI [(match_operand:V2HI 1 "register_operand" "r") ++ (match_operand:V2HI 2 "register_operand" "r")] UNSPEC_KDMTB))] ++ "" ++ "kdmtb\t%0, %1, %2" ++ [(set_attr "type" "mul") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "unspec_kdmtt" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (unspec:V2HI [(match_operand:V2HI 1 "register_operand" "r") ++ (match_operand:V2HI 2 "register_operand" "r")] UNSPEC_KDMTT))] ++ "" ++ "kdmtt\t%0, %1, %2" ++ [(set_attr "type" "mul") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "unspec_khmbb" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (unspec:V2HI [(match_operand:V2HI 1 "register_operand" "r") ++ (match_operand:V2HI 2 "register_operand" "r")] UNSPEC_KHMBB))] ++ "" ++ "khmbb\t%0, %1, %2" ++ [(set_attr "type" "mul") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "unspec_khmbt" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (unspec:V2HI [(match_operand:V2HI 1 "register_operand" "r") ++ (match_operand:V2HI 2 "register_operand" "r")] UNSPEC_KHMBT))] ++ "" ++ "khmbt\t%0, %1, %2" ++ [(set_attr "type" "mul") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "unspec_khmtb" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (unspec:V2HI [(match_operand:V2HI 1 "register_operand" "r") ++ (match_operand:V2HI 2 "register_operand" "r")] UNSPEC_KHMTB))] ++ "" ++ "khmtb\t%0, %1, %2" ++ [(set_attr "type" "mul") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "unspec_khmtt" ++ [(set (match_operand:V2HI 0 "register_operand" "=r") ++ (unspec:V2HI [(match_operand:V2HI 1 "register_operand" "r") ++ (match_operand:V2HI 2 "register_operand" "r")] UNSPEC_KHMTT))] ++ "" ++ "khmtt\t%0, %1, %2" ++ [(set_attr "type" "mul") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "unspec_kslraw" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (unspec:SI [(match_operand:SI 1 "register_operand" "r") ++ (match_operand:SI 2 "register_operand" "r")] UNSPEC_KSLRAW))] ++ "" ++ "kslraw\t%0, %1, %2" ++ [(set_attr "type" "alu") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "unspec_kslrawu" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (unspec:SI [(match_operand:SI 1 "register_operand" "r") ++ (match_operand:SI 2 "register_operand" "r")] UNSPEC_KSLRAWU))] ++ "" ++ "kslraw.u\t%0, %1, %2" ++ [(set_attr "type" "alu") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "unspec_volatile_rdov" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_RDOV))] ++ "" ++ "rdov\t%0" ++ [(set_attr "type" "misc") ++ (set_attr "length" "4")] ++) ++ ++(define_insn "unspec_volatile_clrov" ++ [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_CLROV)] ++ "" ++ "clrov" ++ [(set_attr "type" "misc") ++ (set_attr "length" "4")] ++) ++ + ;; System + + (define_insn "unspec_sva" +@@ -1415,22 +1596,17 @@ + if (TARGET_ISA_V3M) + nds32_expand_unaligned_store (operands, DImode); + else +- emit_insn (gen_unaligned_store_dw (operands[0], operands[1])); ++ emit_insn (gen_unaligned_store_dw (gen_rtx_MEM (DImode, operands[0]), ++ operands[1])); + DONE; + }) + + (define_insn "unaligned_store_dw" +- [(set (mem:DI (match_operand:SI 0 "register_operand" "r")) +- (unspec:DI [(match_operand:DI 1 "register_operand" "r")] UNSPEC_UASTORE_DW))] ++ [(set (match_operand:DI 0 "nds32_lmw_smw_base_operand" "=Umw") ++ (unspec:DI [(match_operand:DI 1 "register_operand" " r")] UNSPEC_UASTORE_DW))] + "" + { +- rtx otherops[3]; +- otherops[0] = gen_rtx_REG (SImode, REGNO (operands[1])); +- otherops[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1); +- otherops[2] = operands[0]; +- +- output_asm_insn ("smw.bi\t%0, [%2], %1, 0", otherops); +- return ""; ++ return nds32_output_smw_double_word (operands); + } + [(set_attr "type" "store") + (set_attr "length" "4")] +@@ -1495,4 +1671,15 @@ + DONE; + }) + ++;; abs alias kabs ++ ++(define_insn "unspec_kabs" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (unspec:SI [(match_operand:SI 1 "register_operand" "r")] UNSPEC_KABS))] ++ "" ++ "kabs\t%0, %1" ++ [(set_attr "type" "alu") ++ (set_attr "length" "4")] ++) ++ + ;; ------------------------------------------------------------------------ +diff -urN gcc-8.2.0.orig/gcc/config/nds32/nds32-isr.c gcc-8.2.0/gcc/config/nds32/nds32-isr.c +--- gcc-8.2.0.orig/gcc/config/nds32/nds32-isr.c 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/gcc/config/nds32/nds32-isr.c 2019-01-25 15:38:32.829242659 +0100 +@@ -43,7 +43,260 @@ + We use an array to record essential information for each vector. */ + static struct nds32_isr_info nds32_isr_vectors[NDS32_N_ISR_VECTORS]; + +-/* ------------------------------------------------------------------------ */ ++/* ------------------------------------------------------------- */ ++/* FIXME: ++ FOR BACKWARD COMPATIBILITY, we need to support following patterns: ++ ++ __attribute__((interrupt("XXX;YYY;id=ZZZ"))) ++ __attribute__((exception("XXX;YYY;id=ZZZ"))) ++ __attribute__((reset("vectors=XXX;nmi_func=YYY;warm_func=ZZZ"))) ++ ++ We provide several functions to parse the strings. */ ++ ++static void ++nds32_interrupt_attribute_parse_string (const char *original_str, ++ const char *func_name, ++ unsigned int s_level) ++{ ++ char target_str[100]; ++ enum nds32_isr_save_reg save_reg; ++ enum nds32_isr_nested_type nested_type; ++ ++ char *save_all_regs_str, *save_caller_regs_str; ++ char *nested_str, *not_nested_str, *ready_nested_str, *critical_str; ++ char *id_str, *value_str; ++ ++ /* Copy original string into a character array so that ++ the string APIs can handle it. */ ++ strcpy (target_str, original_str); ++ ++ /* 1. Detect 'save_all_regs' : NDS32_SAVE_ALL ++ 'save_caller_regs' : NDS32_PARTIAL_SAVE */ ++ save_all_regs_str = strstr (target_str, "save_all_regs"); ++ save_caller_regs_str = strstr (target_str, "save_caller_regs"); ++ ++ /* Note that if no argument is found, ++ use NDS32_PARTIAL_SAVE by default. */ ++ if (save_all_regs_str) ++ save_reg = NDS32_SAVE_ALL; ++ else if (save_caller_regs_str) ++ save_reg = NDS32_PARTIAL_SAVE; ++ else ++ save_reg = NDS32_PARTIAL_SAVE; ++ ++ /* 2. Detect 'nested' : NDS32_NESTED ++ 'not_nested' : NDS32_NOT_NESTED ++ 'ready_nested' : NDS32_NESTED_READY ++ 'critical' : NDS32_CRITICAL */ ++ nested_str = strstr (target_str, "nested"); ++ not_nested_str = strstr (target_str, "not_nested"); ++ ready_nested_str = strstr (target_str, "ready_nested"); ++ critical_str = strstr (target_str, "critical"); ++ ++ /* Note that if no argument is found, ++ use NDS32_NOT_NESTED by default. ++ Also, since 'not_nested' and 'ready_nested' both contains ++ 'nested' string, we check 'nested' with lowest priority. */ ++ if (not_nested_str) ++ nested_type = NDS32_NOT_NESTED; ++ else if (ready_nested_str) ++ nested_type = NDS32_NESTED_READY; ++ else if (nested_str) ++ nested_type = NDS32_NESTED; ++ else if (critical_str) ++ nested_type = NDS32_CRITICAL; ++ else ++ nested_type = NDS32_NOT_NESTED; ++ ++ /* 3. Traverse each id value and set corresponding information. */ ++ id_str = strstr (target_str, "id="); ++ ++ /* If user forgets to assign 'id', issue an error message. */ ++ if (id_str == NULL) ++ error ("require id argument in the string"); ++ /* Extract the value_str first. */ ++ id_str = strtok (id_str, "="); ++ value_str = strtok (NULL, ";"); ++ ++ /* Pick up the first id value token. */ ++ value_str = strtok (value_str, ","); ++ while (value_str != NULL) ++ { ++ int i; ++ i = atoi (value_str); ++ ++ /* For interrupt(0..63), the actual vector number is (9..72). */ ++ i = i + 9; ++ if (i < 9 || i > 72) ++ error ("invalid id value for interrupt attribute"); ++ ++ /* Setup nds32_isr_vectors[] array. */ ++ nds32_isr_vectors[i].category = NDS32_ISR_INTERRUPT; ++ strcpy (nds32_isr_vectors[i].func_name, func_name); ++ nds32_isr_vectors[i].save_reg = save_reg; ++ nds32_isr_vectors[i].nested_type = nested_type; ++ nds32_isr_vectors[i].security_level = s_level; ++ ++ /* Fetch next token. */ ++ value_str = strtok (NULL, ","); ++ } ++ ++ return; ++} ++ ++static void ++nds32_exception_attribute_parse_string (const char *original_str, ++ const char *func_name, ++ unsigned int s_level) ++{ ++ char target_str[100]; ++ enum nds32_isr_save_reg save_reg; ++ enum nds32_isr_nested_type nested_type; ++ ++ char *save_all_regs_str, *save_caller_regs_str; ++ char *nested_str, *not_nested_str, *ready_nested_str, *critical_str; ++ char *id_str, *value_str; ++ ++ /* Copy original string into a character array so that ++ the string APIs can handle it. */ ++ strcpy (target_str, original_str); ++ ++ /* 1. Detect 'save_all_regs' : NDS32_SAVE_ALL ++ 'save_caller_regs' : NDS32_PARTIAL_SAVE */ ++ save_all_regs_str = strstr (target_str, "save_all_regs"); ++ save_caller_regs_str = strstr (target_str, "save_caller_regs"); ++ ++ /* Note that if no argument is found, ++ use NDS32_PARTIAL_SAVE by default. */ ++ if (save_all_regs_str) ++ save_reg = NDS32_SAVE_ALL; ++ else if (save_caller_regs_str) ++ save_reg = NDS32_PARTIAL_SAVE; ++ else ++ save_reg = NDS32_PARTIAL_SAVE; ++ ++ /* 2. Detect 'nested' : NDS32_NESTED ++ 'not_nested' : NDS32_NOT_NESTED ++ 'ready_nested' : NDS32_NESTED_READY ++ 'critical' : NDS32_CRITICAL */ ++ nested_str = strstr (target_str, "nested"); ++ not_nested_str = strstr (target_str, "not_nested"); ++ ready_nested_str = strstr (target_str, "ready_nested"); ++ critical_str = strstr (target_str, "critical"); ++ ++ /* Note that if no argument is found, ++ use NDS32_NOT_NESTED by default. ++ Also, since 'not_nested' and 'ready_nested' both contains ++ 'nested' string, we check 'nested' with lowest priority. */ ++ if (not_nested_str) ++ nested_type = NDS32_NOT_NESTED; ++ else if (ready_nested_str) ++ nested_type = NDS32_NESTED_READY; ++ else if (nested_str) ++ nested_type = NDS32_NESTED; ++ else if (critical_str) ++ nested_type = NDS32_CRITICAL; ++ else ++ nested_type = NDS32_NOT_NESTED; ++ ++ /* 3. Traverse each id value and set corresponding information. */ ++ id_str = strstr (target_str, "id="); ++ ++ /* If user forgets to assign 'id', issue an error message. */ ++ if (id_str == NULL) ++ error ("require id argument in the string"); ++ /* Extract the value_str first. */ ++ id_str = strtok (id_str, "="); ++ value_str = strtok (NULL, ";"); ++ ++ /* Pick up the first id value token. */ ++ value_str = strtok (value_str, ","); ++ while (value_str != NULL) ++ { ++ int i; ++ i = atoi (value_str); ++ ++ /* For exception(1..8), the actual vector number is (1..8). */ ++ if (i < 1 || i > 8) ++ error ("invalid id value for exception attribute"); ++ ++ /* Setup nds32_isr_vectors[] array. */ ++ nds32_isr_vectors[i].category = NDS32_ISR_EXCEPTION; ++ strcpy (nds32_isr_vectors[i].func_name, func_name); ++ nds32_isr_vectors[i].save_reg = save_reg; ++ nds32_isr_vectors[i].nested_type = nested_type; ++ nds32_isr_vectors[i].security_level = s_level; ++ ++ /* Fetch next token. */ ++ value_str = strtok (NULL, ","); ++ } ++ ++ return; ++} ++ ++static void ++nds32_reset_attribute_parse_string (const char *original_str, ++ const char *func_name) ++{ ++ char target_str[100]; ++ char *vectors_str, *nmi_str, *warm_str, *value_str; ++ ++ /* Deal with reset attribute. Its vector number is always 0. */ ++ nds32_isr_vectors[0].category = NDS32_ISR_RESET; ++ ++ ++ /* 1. Parse 'vectors=XXXX'. */ ++ ++ /* Copy original string into a character array so that ++ the string APIs can handle it. */ ++ strcpy (target_str, original_str); ++ vectors_str = strstr (target_str, "vectors="); ++ /* The total vectors = interrupt + exception numbers + reset. ++ There are 8 exception and 1 reset in nds32 architecture. ++ If user forgets to assign 'vectors', user default 16 interrupts. */ ++ if (vectors_str != NULL) ++ { ++ /* Extract the value_str. */ ++ vectors_str = strtok (vectors_str, "="); ++ value_str = strtok (NULL, ";"); ++ nds32_isr_vectors[0].total_n_vectors = atoi (value_str) + 8 + 1; ++ } ++ else ++ nds32_isr_vectors[0].total_n_vectors = 16 + 8 + 1; ++ strcpy (nds32_isr_vectors[0].func_name, func_name); ++ ++ ++ /* 2. Parse 'nmi_func=YYYY'. */ ++ ++ /* Copy original string into a character array so that ++ the string APIs can handle it. */ ++ strcpy (target_str, original_str); ++ nmi_str = strstr (target_str, "nmi_func="); ++ if (nmi_str != NULL) ++ { ++ /* Extract the value_str. */ ++ nmi_str = strtok (nmi_str, "="); ++ value_str = strtok (NULL, ";"); ++ strcpy (nds32_isr_vectors[0].nmi_name, value_str); ++ } ++ ++ /* 3. Parse 'warm_func=ZZZZ'. */ ++ ++ /* Copy original string into a character array so that ++ the string APIs can handle it. */ ++ strcpy (target_str, original_str); ++ warm_str = strstr (target_str, "warm_func="); ++ if (warm_str != NULL) ++ { ++ /* Extract the value_str. */ ++ warm_str = strtok (warm_str, "="); ++ value_str = strtok (NULL, ";"); ++ strcpy (nds32_isr_vectors[0].warm_name, value_str); ++ } ++ ++ return; ++} ++/* ------------------------------------------------------------- */ + + /* A helper function to emit section head template. */ + static void +@@ -79,6 +332,15 @@ + char section_name[100]; + char symbol_name[100]; + ++ /* A critical isr does not need jump table section because ++ its behavior is not performed by two-level handler. */ ++ if (nds32_isr_vectors[vector_id].nested_type == NDS32_CRITICAL) ++ { ++ fprintf (asm_out_file, "\t! The vector %02d is a critical isr !\n", ++ vector_id); ++ return; ++ } ++ + /* Prepare jmptbl section and symbol name. */ + snprintf (section_name, sizeof (section_name), + ".nds32_jmptbl.%02d", vector_id); +@@ -99,7 +361,6 @@ + const char *c_str = "CATEGORY"; + const char *sr_str = "SR"; + const char *nt_str = "NT"; +- const char *vs_str = "VS"; + char first_level_handler_name[100]; + char section_name[100]; + char symbol_name[100]; +@@ -147,30 +408,47 @@ + case NDS32_NESTED_READY: + nt_str = "nr"; + break; ++ case NDS32_CRITICAL: ++ /* The critical isr is not performed by two-level handler. */ ++ nt_str = ""; ++ break; + } + +- /* Currently we have 4-byte or 16-byte size for each vector. +- If it is 4-byte, the first level handler name has suffix string "_4b". */ +- vs_str = (nds32_isr_vector_size == 4) ? "_4b" : ""; +- + /* Now we can create first level handler name. */ +- snprintf (first_level_handler_name, sizeof (first_level_handler_name), +- "_nds32_%s_%s_%s%s", c_str, sr_str, nt_str, vs_str); ++ if (nds32_isr_vectors[vector_id].security_level == 0) ++ { ++ /* For security level 0, use normal first level handler name. */ ++ snprintf (first_level_handler_name, sizeof (first_level_handler_name), ++ "_nds32_%s_%s_%s", c_str, sr_str, nt_str); ++ } ++ else ++ { ++ /* For security level 1-3, use corresponding spl_1, spl_2, or spl_3. */ ++ snprintf (first_level_handler_name, sizeof (first_level_handler_name), ++ "_nds32_spl_%d", nds32_isr_vectors[vector_id].security_level); ++ } + + /* Prepare vector section and symbol name. */ + snprintf (section_name, sizeof (section_name), + ".nds32_vector.%02d", vector_id); + snprintf (symbol_name, sizeof (symbol_name), +- "_nds32_vector_%02d%s", vector_id, vs_str); ++ "_nds32_vector_%02d", vector_id); + + + /* Everything is ready. We can start emit vector section content. */ + nds32_emit_section_head_template (section_name, symbol_name, + floor_log2 (nds32_isr_vector_size), false); + +- /* According to the vector size, the instructions in the +- vector section may be different. */ +- if (nds32_isr_vector_size == 4) ++ /* First we check if it is a critical isr. ++ If so, jump to user handler directly; otherwise, the instructions ++ in the vector section may be different according to the vector size. */ ++ if (nds32_isr_vectors[vector_id].nested_type == NDS32_CRITICAL) ++ { ++ /* This block is for critical isr. Jump to user handler directly. */ ++ fprintf (asm_out_file, "\tj\t%s ! jump to user handler directly\n", ++ nds32_isr_vectors[vector_id].func_name); ++ } ++ else if (nds32_isr_vector_size == 4) + { + /* This block is for 4-byte vector size. + Hardware $VID support is necessary and only one instruction +@@ -239,13 +517,11 @@ + { + unsigned int i; + unsigned int total_n_vectors; +- const char *vs_str; + char reset_handler_name[100]; + char section_name[100]; + char symbol_name[100]; + + total_n_vectors = nds32_isr_vectors[0].total_n_vectors; +- vs_str = (nds32_isr_vector_size == 4) ? "_4b" : ""; + + fprintf (asm_out_file, "\t! RESET HANDLER CONTENT - BEGIN !\n"); + +@@ -261,7 +537,7 @@ + /* Emit vector references. */ + fprintf (asm_out_file, "\t ! references to vector section entries\n"); + for (i = 0; i < total_n_vectors; i++) +- fprintf (asm_out_file, "\t.word\t_nds32_vector_%02d%s\n", i, vs_str); ++ fprintf (asm_out_file, "\t.word\t_nds32_vector_%02d\n", i); + + /* Emit jmptbl_00 section. */ + snprintf (section_name, sizeof (section_name), ".nds32_jmptbl.00"); +@@ -275,9 +551,9 @@ + + /* Emit vector_00 section. */ + snprintf (section_name, sizeof (section_name), ".nds32_vector.00"); +- snprintf (symbol_name, sizeof (symbol_name), "_nds32_vector_00%s", vs_str); ++ snprintf (symbol_name, sizeof (symbol_name), "_nds32_vector_00"); + snprintf (reset_handler_name, sizeof (reset_handler_name), +- "_nds32_reset%s", vs_str); ++ "_nds32_reset"); + + fprintf (asm_out_file, "\t! ....................................\n"); + nds32_emit_section_head_template (section_name, symbol_name, +@@ -323,12 +599,12 @@ + nds32_check_isr_attrs_conflict (tree func_decl, tree func_attrs) + { + int save_all_p, partial_save_p; +- int nested_p, not_nested_p, nested_ready_p; ++ int nested_p, not_nested_p, nested_ready_p, critical_p; + int intr_p, excp_p, reset_p; + + /* Initialize variables. */ + save_all_p = partial_save_p = 0; +- nested_p = not_nested_p = nested_ready_p = 0; ++ nested_p = not_nested_p = nested_ready_p = critical_p = 0; + intr_p = excp_p = reset_p = 0; + + /* We must check at MOST one attribute to set save-reg. */ +@@ -347,8 +623,10 @@ + not_nested_p = 1; + if (lookup_attribute ("nested_ready", func_attrs)) + nested_ready_p = 1; ++ if (lookup_attribute ("critical", func_attrs)) ++ critical_p = 1; + +- if ((nested_p + not_nested_p + nested_ready_p) > 1) ++ if ((nested_p + not_nested_p + nested_ready_p + critical_p) > 1) + error ("multiple nested types attributes to function %qD", func_decl); + + /* We must check at MOST one attribute to +@@ -362,6 +640,17 @@ + + if ((intr_p + excp_p + reset_p) > 1) + error ("multiple interrupt attributes to function %qD", func_decl); ++ ++ /* Do not allow isr attributes under linux toolchain. */ ++ if (TARGET_LINUX_ABI && intr_p) ++ error ("cannot use interrupt attributes to function %qD " ++ "under linux toolchain", func_decl); ++ if (TARGET_LINUX_ABI && excp_p) ++ error ("cannot use exception attributes to function %qD " ++ "under linux toolchain", func_decl); ++ if (TARGET_LINUX_ABI && reset_p) ++ error ("cannot use reset attributes to function %qD " ++ "under linux toolchain", func_decl); + } + + /* Function to construct isr vectors information array. +@@ -373,15 +662,21 @@ + const char *func_name) + { + tree save_all, partial_save; +- tree nested, not_nested, nested_ready; ++ tree nested, not_nested, nested_ready, critical; + tree intr, excp, reset; + ++ tree secure; ++ tree security_level_list; ++ tree security_level; ++ unsigned int s_level; ++ + save_all = lookup_attribute ("save_all", func_attrs); + partial_save = lookup_attribute ("partial_save", func_attrs); + + nested = lookup_attribute ("nested", func_attrs); + not_nested = lookup_attribute ("not_nested", func_attrs); + nested_ready = lookup_attribute ("nested_ready", func_attrs); ++ critical = lookup_attribute ("critical", func_attrs); + + intr = lookup_attribute ("interrupt", func_attrs); + excp = lookup_attribute ("exception", func_attrs); +@@ -391,6 +686,63 @@ + if (!intr && !excp && !reset) + return; + ++ /* At first, we need to retrieve security level. */ ++ secure = lookup_attribute ("secure", func_attrs); ++ if (secure != NULL) ++ { ++ security_level_list = TREE_VALUE (secure); ++ security_level = TREE_VALUE (security_level_list); ++ s_level = TREE_INT_CST_LOW (security_level); ++ } ++ else ++ { ++ /* If there is no secure attribute, the security level is set by ++ nds32_isr_secure_level, which is controlled by -misr-secure=X option. ++ By default nds32_isr_secure_level should be 0. */ ++ s_level = nds32_isr_secure_level; ++ } ++ ++ /* ------------------------------------------------------------- */ ++ /* FIXME: ++ FOR BACKWARD COMPATIBILITY, we need to support following patterns: ++ ++ __attribute__((interrupt("XXX;YYY;id=ZZZ"))) ++ __attribute__((exception("XXX;YYY;id=ZZZ"))) ++ __attribute__((reset("vectors=XXX;nmi_func=YYY;warm_func=ZZZ"))) ++ ++ If interrupt/exception/reset appears and its argument is a ++ STRING_CST, we will parse string with some auxiliary functions ++ which set necessary isr information in the nds32_isr_vectors[] array. ++ After that, we can return immediately to avoid new-syntax isr ++ information construction. */ ++ if (intr != NULL_TREE ++ && TREE_CODE (TREE_VALUE (TREE_VALUE (intr))) == STRING_CST) ++ { ++ tree string_arg = TREE_VALUE (TREE_VALUE (intr)); ++ nds32_interrupt_attribute_parse_string (TREE_STRING_POINTER (string_arg), ++ func_name, ++ s_level); ++ return; ++ } ++ if (excp != NULL_TREE ++ && TREE_CODE (TREE_VALUE (TREE_VALUE (excp))) == STRING_CST) ++ { ++ tree string_arg = TREE_VALUE (TREE_VALUE (excp)); ++ nds32_exception_attribute_parse_string (TREE_STRING_POINTER (string_arg), ++ func_name, ++ s_level); ++ return; ++ } ++ if (reset != NULL_TREE ++ && TREE_CODE (TREE_VALUE (TREE_VALUE (reset))) == STRING_CST) ++ { ++ tree string_arg = TREE_VALUE (TREE_VALUE (reset)); ++ nds32_reset_attribute_parse_string (TREE_STRING_POINTER (string_arg), ++ func_name); ++ return; ++ } ++ /* ------------------------------------------------------------- */ ++ + /* If we are here, either we have interrupt/exception, + or reset attribute. */ + if (intr || excp) +@@ -417,6 +769,9 @@ + /* Add vector_number_offset to get actual vector number. */ + vector_id = TREE_INT_CST_LOW (id) + vector_number_offset; + ++ /* Set security level. */ ++ nds32_isr_vectors[vector_id].security_level = s_level; ++ + /* Enable corresponding vector and set function name. */ + nds32_isr_vectors[vector_id].category = (intr) + ? (NDS32_ISR_INTERRUPT) +@@ -436,6 +791,8 @@ + nds32_isr_vectors[vector_id].nested_type = NDS32_NOT_NESTED; + else if (nested_ready) + nds32_isr_vectors[vector_id].nested_type = NDS32_NESTED_READY; ++ else if (critical) ++ nds32_isr_vectors[vector_id].nested_type = NDS32_CRITICAL; + + /* Advance to next id. */ + id_list = TREE_CHAIN (id_list); +@@ -492,7 +849,6 @@ + } + } + +-/* A helper function to handle isr stuff at the beginning of asm file. */ + void + nds32_asm_file_start_for_isr (void) + { +@@ -505,15 +861,14 @@ + strcpy (nds32_isr_vectors[i].func_name, ""); + nds32_isr_vectors[i].save_reg = NDS32_PARTIAL_SAVE; + nds32_isr_vectors[i].nested_type = NDS32_NOT_NESTED; ++ nds32_isr_vectors[i].security_level = 0; + nds32_isr_vectors[i].total_n_vectors = 0; + strcpy (nds32_isr_vectors[i].nmi_name, ""); + strcpy (nds32_isr_vectors[i].warm_name, ""); + } + } + +-/* A helper function to handle isr stuff at the end of asm file. */ +-void +-nds32_asm_file_end_for_isr (void) ++void nds32_asm_file_end_for_isr (void) + { + int i; + +@@ -547,6 +902,8 @@ + /* Found one vector which is interupt or exception. + Output its jmptbl and vector section content. */ + fprintf (asm_out_file, "\t! interrupt/exception vector %02d\n", i); ++ fprintf (asm_out_file, "\t! security level: %d\n", ++ nds32_isr_vectors[i].security_level); + fprintf (asm_out_file, "\t! ------------------------------------\n"); + nds32_emit_isr_jmptbl_section (i); + fprintf (asm_out_file, "\t! ....................................\n"); +@@ -580,4 +937,65 @@ + || (t_reset != NULL_TREE)); + } + +-/* ------------------------------------------------------------------------ */ ++/* Return true if FUNC is a isr function with critical attribute. */ ++bool ++nds32_isr_function_critical_p (tree func) ++{ ++ tree t_intr; ++ tree t_excp; ++ tree t_critical; ++ ++ tree attrs; ++ ++ if (TREE_CODE (func) != FUNCTION_DECL) ++ abort (); ++ ++ attrs = DECL_ATTRIBUTES (func); ++ ++ t_intr = lookup_attribute ("interrupt", attrs); ++ t_excp = lookup_attribute ("exception", attrs); ++ ++ t_critical = lookup_attribute ("critical", attrs); ++ ++ /* If both interrupt and exception attribute does not appear, ++ we can return false immediately. */ ++ if ((t_intr == NULL_TREE) && (t_excp == NULL_TREE)) ++ return false; ++ ++ /* Here we can guarantee either interrupt or ecxception attribute ++ does exist, so further check critical attribute. ++ If it also appears, we can return true. */ ++ if (t_critical != NULL_TREE) ++ return true; ++ ++ /* ------------------------------------------------------------- */ ++ /* FIXME: ++ FOR BACKWARD COMPATIBILITY, we need to handle string type. ++ If the string 'critical' appears in the interrupt/exception ++ string argument, we can return true. */ ++ if (t_intr != NULL_TREE || t_excp != NULL_TREE) ++ { ++ char target_str[100]; ++ char *critical_str; ++ tree t_check; ++ tree string_arg; ++ ++ t_check = t_intr ? t_intr : t_excp; ++ if (TREE_CODE (TREE_VALUE (TREE_VALUE (t_check))) == STRING_CST) ++ { ++ string_arg = TREE_VALUE (TREE_VALUE (t_check)); ++ strcpy (target_str, TREE_STRING_POINTER (string_arg)); ++ critical_str = strstr (target_str, "critical"); ++ ++ /* Found 'critical' string, so return true. */ ++ if (critical_str) ++ return true; ++ } ++ } ++ /* ------------------------------------------------------------- */ ++ ++ /* Other cases, this isr function is not critical type. */ ++ return false; ++} ++ ++/* ------------------------------------------------------------- */ +diff -urN gcc-8.2.0.orig/gcc/config/nds32/nds32_isr.h gcc-8.2.0/gcc/config/nds32/nds32_isr.h +--- gcc-8.2.0.orig/gcc/config/nds32/nds32_isr.h 1970-01-01 01:00:00.000000000 +0100 ++++ gcc-8.2.0/gcc/config/nds32/nds32_isr.h 2019-01-25 15:38:32.833242671 +0100 +@@ -0,0 +1,526 @@ ++/* Intrinsic definitions of Andes NDS32 cpu for GNU compiler ++ Copyright (C) 2012-2018 Free Software Foundation, Inc. ++ Contributed by Andes Technology Corporation. ++ ++ This file is part of GCC. ++ ++ GCC is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published ++ by the Free Software Foundation; either version 3, or (at your ++ option) any later version. ++ ++ GCC is distributed in the hope that it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ++ License for more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ <http://www.gnu.org/licenses/>. */ ++ ++#ifndef _NDS32_ISR_H ++#define _NDS32_ISR_H ++ ++/* Attribute of a interrupt or exception handler: ++ ++ NDS32_READY_NESTED: This handler is interruptible if user re-enable GIE bit. ++ NDS32_NESTED : This handler is interruptible. This is not suitable ++ exception handler. ++ NDS32_NOT_NESTED : This handler is NOT interruptible. Users have to do ++ some work if nested is wanted ++ NDS32_CRITICAL : This handler is critical ISR, which means it is small ++ and efficient. */ ++#define NDS32_READY_NESTED 0 ++#define NDS32_NESTED 1 ++#define NDS32_NOT_NESTED 2 ++#define NDS32_CRITICAL 3 ++ ++/* Attribute of a interrupt or exception handler: ++ ++ NDS32_SAVE_ALL_REGS : Save all registers in a table. ++ NDS32_SAVE_PARTIAL_REGS: Save partial registers. */ ++#define NDS32_SAVE_CALLER_REGS 0 ++#define NDS32_SAVE_ALL_REGS 1 ++ ++/* There are two version of Register table for interrupt and exception handler, ++ one for 16-register CPU the other for 32-register CPU. These structures are ++ used for context switching or system call handling. The address of this ++ data can be get from the input argument of the handler functions. ++ ++ For system call handling, r0 to r5 are used to pass arguments. If more ++ arguments are used they are put into the stack and its starting address is ++ in sp. Return value of system call can be put into r0 and r1 upon exit from ++ system call handler. System call ID is in a system register and it can be ++ fetched via intrinsic function. For more information please read ABI and ++ other related documents. ++ ++ For context switching, at least 2 values need to saved in kernel. One is ++ IPC and the other is the stack address of current task. Use intrinsic ++ function to get IPC and the input argument of the handler functions + 8 to ++ get stack address of current task. To do context switching, you replace ++ new_sp with the stack address of new task and replace IPC system register ++ with IPC of new task, then, just return from handler. The context switching ++ will happen. */ ++ ++/* Register table for exception handler; 32-register version. */ ++typedef struct ++{ ++ int r0; ++ int r1; ++ int r2; ++ int r3; ++ int r4; ++ int r5; ++ int r6; ++ int r7; ++ int r8; ++ int r9; ++ int r10; ++ int r11; ++ int r12; ++ int r13; ++ int r14; ++ int r15; ++ int r16; ++ int r17; ++ int r18; ++ int r19; ++ int r20; ++ int r21; ++ int r22; ++ int r23; ++ int r24; ++ int r25; ++ int r26; ++ int r27; ++ int fp; ++ int gp; ++ int lp; ++ int sp; ++} NDS32_GPR32; ++ ++/* Register table for exception handler; 16-register version. */ ++typedef struct ++{ ++ int r0; ++ int r1; ++ int r2; ++ int r3; ++ int r4; ++ int r5; ++ int r6; ++ int r7; ++ int r8; ++ int r9; ++ int r10; ++ int r15; ++ int fp; ++ int gp; ++ int lp; ++ int sp; ++} NDS32_GPR16; ++ ++ ++/* Use NDS32_REG32_TAB or NDS32_REG16_TAB in your program to ++ access register table. */ ++typedef struct ++{ ++ union ++ { ++ int reg_a[32] ; ++ NDS32_GPR32 reg_s ; ++ } u ; ++} NDS32_REG32_TAB; ++ ++typedef struct ++{ ++ union ++ { ++ int reg_a[16] ; ++ NDS32_GPR16 reg_s ; ++ } u ; ++} NDS32_REG16_TAB; ++ ++typedef struct ++{ ++ int d0lo; ++ int d0hi; ++ int d1lo; ++ int d1hi; ++} NDS32_DX_TAB; ++ ++typedef struct ++{ ++#ifdef __NDS32_EB__ ++ float fsr0; ++ float fsr1; ++ float fsr2; ++ float fsr3; ++ float fsr4; ++ float fsr5; ++ float fsr6; ++ float fsr7; ++#else ++ float fsr1; ++ float fsr0; ++ float fsr3; ++ float fsr2; ++ float fsr5; ++ float fsr4; ++ float fsr7; ++ float fsr6; ++#endif ++} NDS32_FSR8; ++ ++typedef struct ++{ ++ double dsr0; ++ double dsr1; ++ double dsr2; ++ double dsr3; ++} NDS32_DSR4; ++ ++typedef struct ++{ ++#ifdef __NDS32_EB__ ++ float fsr0; ++ float fsr1; ++ float fsr2; ++ float fsr3; ++ float fsr4; ++ float fsr5; ++ float fsr6; ++ float fsr7; ++ float fsr8; ++ float fsr9; ++ float fsr10; ++ float fsr11; ++ float fsr12; ++ float fsr13; ++ float fsr14; ++ float fsr15; ++#else ++ float fsr1; ++ float fsr0; ++ float fsr3; ++ float fsr2; ++ float fsr5; ++ float fsr4; ++ float fsr7; ++ float fsr6; ++ float fsr9; ++ float fsr8; ++ float fsr11; ++ float fsr10; ++ float fsr13; ++ float fsr12; ++ float fsr15; ++ float fsr14; ++#endif ++} NDS32_FSR16; ++ ++typedef struct ++{ ++ double dsr0; ++ double dsr1; ++ double dsr2; ++ double dsr3; ++ double dsr4; ++ double dsr5; ++ double dsr6; ++ double dsr7; ++} NDS32_DSR8; ++ ++typedef struct ++{ ++#ifdef __NDS32_EB__ ++ float fsr0; ++ float fsr1; ++ float fsr2; ++ float fsr3; ++ float fsr4; ++ float fsr5; ++ float fsr6; ++ float fsr7; ++ float fsr8; ++ float fsr9; ++ float fsr10; ++ float fsr11; ++ float fsr12; ++ float fsr13; ++ float fsr14; ++ float fsr15; ++ float fsr16; ++ float fsr17; ++ float fsr18; ++ float fsr19; ++ float fsr20; ++ float fsr21; ++ float fsr22; ++ float fsr23; ++ float fsr24; ++ float fsr25; ++ float fsr26; ++ float fsr27; ++ float fsr28; ++ float fsr29; ++ float fsr30; ++ float fsr31; ++#else ++ float fsr1; ++ float fsr0; ++ float fsr3; ++ float fsr2; ++ float fsr5; ++ float fsr4; ++ float fsr7; ++ float fsr6; ++ float fsr9; ++ float fsr8; ++ float fsr11; ++ float fsr10; ++ float fsr13; ++ float fsr12; ++ float fsr15; ++ float fsr14; ++ float fsr17; ++ float fsr16; ++ float fsr19; ++ float fsr18; ++ float fsr21; ++ float fsr20; ++ float fsr23; ++ float fsr22; ++ float fsr25; ++ float fsr24; ++ float fsr27; ++ float fsr26; ++ float fsr29; ++ float fsr28; ++ float fsr31; ++ float fsr30; ++#endif ++} NDS32_FSR32; ++ ++typedef struct ++{ ++ double dsr0; ++ double dsr1; ++ double dsr2; ++ double dsr3; ++ double dsr4; ++ double dsr5; ++ double dsr6; ++ double dsr7; ++ double dsr8; ++ double dsr9; ++ double dsr10; ++ double dsr11; ++ double dsr12; ++ double dsr13; ++ double dsr14; ++ double dsr15; ++} NDS32_DSR16; ++ ++typedef struct ++{ ++ double dsr0; ++ double dsr1; ++ double dsr2; ++ double dsr3; ++ double dsr4; ++ double dsr5; ++ double dsr6; ++ double dsr7; ++ double dsr8; ++ double dsr9; ++ double dsr10; ++ double dsr11; ++ double dsr12; ++ double dsr13; ++ double dsr14; ++ double dsr15; ++ double dsr16; ++ double dsr17; ++ double dsr18; ++ double dsr19; ++ double dsr20; ++ double dsr21; ++ double dsr22; ++ double dsr23; ++ double dsr24; ++ double dsr25; ++ double dsr26; ++ double dsr27; ++ double dsr28; ++ double dsr29; ++ double dsr30; ++ double dsr31; ++} NDS32_DSR32; ++ ++typedef struct ++{ ++ union ++ { ++ NDS32_FSR8 fsr_s ; ++ NDS32_DSR4 dsr_s ; ++ } u ; ++} NDS32_FPU8_TAB; ++ ++typedef struct ++{ ++ union ++ { ++ NDS32_FSR16 fsr_s ; ++ NDS32_DSR8 dsr_s ; ++ } u ; ++} NDS32_FPU16_TAB; ++ ++typedef struct ++{ ++ union ++ { ++ NDS32_FSR32 fsr_s ; ++ NDS32_DSR16 dsr_s ; ++ } u ; ++} NDS32_FPU32_TAB; ++ ++typedef struct ++{ ++ union ++ { ++ NDS32_FSR32 fsr_s ; ++ NDS32_DSR32 dsr_s ; ++ } u ; ++} NDS32_FPU64_TAB; ++ ++typedef struct ++{ ++ int ipc; ++ int ipsw; ++#if defined(NDS32_EXT_FPU_CONFIG_0) ++ NDS32_FPU8_TAB fpr; ++#elif defined(NDS32_EXT_FPU_CONFIG_1) ++ NDS32_FPU16_TAB fpr; ++#elif defined(NDS32_EXT_FPU_CONFIG_2) ++ NDS32_FPU32_TAB fpr; ++#elif defined(NDS32_EXT_FPU_CONFIG_3) ++ NDS32_FPU64_TAB fpr; ++#endif ++#if __NDS32_DX_REGS__ ++ NDS32_DX_TAB dxr; ++#endif ++#if __NDS32_EXT_IFC__ ++ int ifc_lp; ++ int filler; ++#endif ++#if __NDS32_REDUCED_REGS__ || __NDS32_REDUCE_REGS ++ NDS32_REG16_TAB gpr; ++#else ++ NDS32_REG32_TAB gpr; ++#endif ++} NDS32_CONTEXT; ++ ++/* Predefined Vector Definition. ++ ++ For IVIC Mode: 9 to 14 are for hardware interrupt ++ and 15 is for software interrupt. ++ For EVIC Mode: 9 to 72 are for hardware interrupt ++ and software interrupt can be routed to any one of them. ++ ++ You may want to define your hardware interrupts in the following way ++ for easy maintainance. ++ ++ IVIC mode: ++ #define MY_HW_IVIC_TIMER NDS32_VECTOR_INTERRUPT_HW0 + 1 ++ #define MY_HW_IVIC_USB NDS32_VECTOR_INTERRUPT_HW0 + 3 ++ EVIC mode: ++ #define MY_HW_EVIC_DMA NDS32_VECTOR_INTERRUPT_HW0 + 2 ++ #define MY_HW_EVIC_SWI NDS32_VECTOR_INTERRUPT_HW0 + 10 */ ++#define NDS32_VECTOR_RESET 0 ++#define NDS32_VECTOR_TLB_FILL 1 ++#define NDS32_VECTOR_PTE_NOT_PRESENT 2 ++#define NDS32_VECTOR_TLB_MISC 3 ++#define NDS32_VECTOR_TLB_VLPT_MISS 4 ++#define NDS32_VECTOR_MACHINE_ERROR 5 ++#define NDS32_VECTOR_DEBUG_RELATED 6 ++#define NDS32_VECTOR_GENERAL_EXCEPTION 7 ++#define NDS32_VECTOR_SYSCALL 8 ++#define NDS32_VECTOR_INTERRUPT_HW0 9 ++#define NDS32_VECTOR_INTERRUPT_HW1 10 ++#define NDS32_VECTOR_INTERRUPT_HW2 11 ++#define NDS32_VECTOR_INTERRUPT_HW3 12 ++#define NDS32_VECTOR_INTERRUPT_HW4 13 ++#define NDS32_VECTOR_INTERRUPT_HW5 14 ++#define NDS32_VECTOR_INTERRUPT_HW6 15 ++#define NDS32_VECTOR_SWI 15 /* THIS IS FOR IVIC MODE ONLY */ ++#define NDS32_VECTOR_INTERRUPT_HW7 16 ++#define NDS32_VECTOR_INTERRUPT_HW8 17 ++#define NDS32_VECTOR_INTERRUPT_HW9 18 ++#define NDS32_VECTOR_INTERRUPT_HW10 19 ++#define NDS32_VECTOR_INTERRUPT_HW11 20 ++#define NDS32_VECTOR_INTERRUPT_HW12 21 ++#define NDS32_VECTOR_INTERRUPT_HW13 22 ++#define NDS32_VECTOR_INTERRUPT_HW14 23 ++#define NDS32_VECTOR_INTERRUPT_HW15 24 ++#define NDS32_VECTOR_INTERRUPT_HW16 25 ++#define NDS32_VECTOR_INTERRUPT_HW17 26 ++#define NDS32_VECTOR_INTERRUPT_HW18 27 ++#define NDS32_VECTOR_INTERRUPT_HW19 28 ++#define NDS32_VECTOR_INTERRUPT_HW20 29 ++#define NDS32_VECTOR_INTERRUPT_HW21 30 ++#define NDS32_VECTOR_INTERRUPT_HW22 31 ++#define NDS32_VECTOR_INTERRUPT_HW23 32 ++#define NDS32_VECTOR_INTERRUPT_HW24 33 ++#define NDS32_VECTOR_INTERRUPT_HW25 34 ++#define NDS32_VECTOR_INTERRUPT_HW26 35 ++#define NDS32_VECTOR_INTERRUPT_HW27 36 ++#define NDS32_VECTOR_INTERRUPT_HW28 37 ++#define NDS32_VECTOR_INTERRUPT_HW29 38 ++#define NDS32_VECTOR_INTERRUPT_HW30 39 ++#define NDS32_VECTOR_INTERRUPT_HW31 40 ++#define NDS32_VECTOR_INTERRUPT_HW32 41 ++#define NDS32_VECTOR_INTERRUPT_HW33 42 ++#define NDS32_VECTOR_INTERRUPT_HW34 43 ++#define NDS32_VECTOR_INTERRUPT_HW35 44 ++#define NDS32_VECTOR_INTERRUPT_HW36 45 ++#define NDS32_VECTOR_INTERRUPT_HW37 46 ++#define NDS32_VECTOR_INTERRUPT_HW38 47 ++#define NDS32_VECTOR_INTERRUPT_HW39 48 ++#define NDS32_VECTOR_INTERRUPT_HW40 49 ++#define NDS32_VECTOR_INTERRUPT_HW41 50 ++#define NDS32_VECTOR_INTERRUPT_HW42 51 ++#define NDS32_VECTOR_INTERRUPT_HW43 52 ++#define NDS32_VECTOR_INTERRUPT_HW44 53 ++#define NDS32_VECTOR_INTERRUPT_HW45 54 ++#define NDS32_VECTOR_INTERRUPT_HW46 55 ++#define NDS32_VECTOR_INTERRUPT_HW47 56 ++#define NDS32_VECTOR_INTERRUPT_HW48 57 ++#define NDS32_VECTOR_INTERRUPT_HW49 58 ++#define NDS32_VECTOR_INTERRUPT_HW50 59 ++#define NDS32_VECTOR_INTERRUPT_HW51 60 ++#define NDS32_VECTOR_INTERRUPT_HW52 61 ++#define NDS32_VECTOR_INTERRUPT_HW53 62 ++#define NDS32_VECTOR_INTERRUPT_HW54 63 ++#define NDS32_VECTOR_INTERRUPT_HW55 64 ++#define NDS32_VECTOR_INTERRUPT_HW56 65 ++#define NDS32_VECTOR_INTERRUPT_HW57 66 ++#define NDS32_VECTOR_INTERRUPT_HW58 67 ++#define NDS32_VECTOR_INTERRUPT_HW59 68 ++#define NDS32_VECTOR_INTERRUPT_HW60 69 ++#define NDS32_VECTOR_INTERRUPT_HW61 70 ++#define NDS32_VECTOR_INTERRUPT_HW62 71 ++#define NDS32_VECTOR_INTERRUPT_HW63 72 ++ ++#define NDS32ATTR_RESET(option) __attribute__((reset(option))) ++#define NDS32ATTR_EXCEPT(type) __attribute__((exception(type))) ++#define NDS32ATTR_EXCEPTION(type) __attribute__((exception(type))) ++#define NDS32ATTR_INTERRUPT(type) __attribute__((interrupt(type))) ++#define NDS32ATTR_ISR(type) __attribute__((interrupt(type))) ++ ++#endif /* nds32_isr.h */ +diff -urN gcc-8.2.0.orig/gcc/config/nds32/nds32-linux.opt gcc-8.2.0/gcc/config/nds32/nds32-linux.opt +--- gcc-8.2.0.orig/gcc/config/nds32/nds32-linux.opt 1970-01-01 01:00:00.000000000 +0100 ++++ gcc-8.2.0/gcc/config/nds32/nds32-linux.opt 2019-01-25 15:38:32.829242659 +0100 +@@ -0,0 +1,16 @@ ++mcmodel= ++Target RejectNegative Joined Enum(nds32_cmodel_type) Var(nds32_cmodel_option) Init(CMODEL_LARGE) ++Specify the address generation strategy for code model. ++ ++Enum ++Name(nds32_cmodel_type) Type(enum nds32_cmodel_type) ++Known cmodel types (for use with the -mcmodel= option): ++ ++EnumValue ++Enum(nds32_cmodel_type) String(small) Value(CMODEL_SMALL) ++ ++EnumValue ++Enum(nds32_cmodel_type) String(medium) Value(CMODEL_MEDIUM) ++ ++EnumValue ++Enum(nds32_cmodel_type) String(large) Value(CMODEL_LARGE) +diff -urN gcc-8.2.0.orig/gcc/config/nds32/nds32.md gcc-8.2.0/gcc/config/nds32/nds32.md +--- gcc-8.2.0.orig/gcc/config/nds32/nds32.md 2018-04-08 11:21:30.000000000 +0200 ++++ gcc-8.2.0/gcc/config/nds32/nds32.md 2019-01-25 15:38:32.833242671 +0100 +@@ -56,24 +56,29 @@ + ;; ------------------------------------------------------------------------ + + ;; CPU pipeline model. +-(define_attr "pipeline_model" "n7,n8,e8,n9,simple" ++(define_attr "pipeline_model" "n7,n8,e8,n9,n10,graywolf,n13,simple" + (const + (cond [(match_test "nds32_cpu_option == CPU_N7") (const_string "n7") + (match_test "nds32_cpu_option == CPU_E8") (const_string "e8") + (match_test "nds32_cpu_option == CPU_N6 || nds32_cpu_option == CPU_N8") (const_string "n8") + (match_test "nds32_cpu_option == CPU_N9") (const_string "n9") ++ (match_test "nds32_cpu_option == CPU_N10") (const_string "n10") ++ (match_test "nds32_cpu_option == CPU_GRAYWOLF") (const_string "graywolf") ++ (match_test "nds32_cpu_option == CPU_N12") (const_string "n13") ++ (match_test "nds32_cpu_option == CPU_N13") (const_string "n13") + (match_test "nds32_cpu_option == CPU_SIMPLE") (const_string "simple")] + (const_string "n9")))) + + ;; Insn type, it is used to default other attribute values. + (define_attr "type" + "unknown,load,store,load_multiple,store_multiple,alu,alu_shift,pbsad,pbsada,mul,mac,div,branch,mmu,misc,\ +- falu,fmuls,fmuld,fmacs,fmacd,fdivs,fdivd,fsqrts,fsqrtd,fcmp,fabs,fcpy,fcmov,fmfsr,fmfdr,fmtsr,fmtdr,fload,fstore" ++ falu,fmuls,fmuld,fmacs,fmacd,fdivs,fdivd,fsqrts,fsqrtd,fcmp,fabs,fcpy,fcmov,fmfsr,fmfdr,fmtsr,fmtdr,fload,fstore,\ ++ dalu,dalu64,daluround,dcmp,dclip,dmul,dmac,dinsb,dpack,dbpick,dwext" + (const_string "unknown")) + + ;; Insn sub-type + (define_attr "subtype" +- "simple,shift" ++ "simple,shift,saturation" + (const_string "simple")) + + ;; Length, in bytes, default is 4-bytes. +@@ -133,6 +138,7 @@ + + ;; ---------------------------------------------------------------------------- + ++(include "nds32-dspext.md") + + ;; Move instructions. + +@@ -209,6 +215,27 @@ + low12_int)); + DONE; + } ++ ++ if ((REG_P (operands[0]) || GET_CODE (operands[0]) == SUBREG) ++ && SYMBOLIC_CONST_P (operands[1])) ++ { ++ if (TARGET_ICT_MODEL_LARGE ++ && nds32_indirect_call_referenced_p (operands[1])) ++ { ++ nds32_expand_ict_move (operands); ++ DONE; ++ } ++ else if (nds32_tls_referenced_p (operands [1])) ++ { ++ nds32_expand_tls_move (operands); ++ DONE; ++ } ++ else if (flag_pic) ++ { ++ nds32_expand_pic_move (operands); ++ DONE; ++ } ++ } + }) + + (define_insn "*mov<mode>" +@@ -271,8 +298,8 @@ + ;; We use nds32_symbolic_operand to limit that only CONST/SYMBOL_REF/LABEL_REF + ;; are able to match such instruction template. + (define_insn "move_addr" +- [(set (match_operand:SI 0 "register_operand" "=l, r") +- (match_operand:SI 1 "nds32_symbolic_operand" " i, i"))] ++ [(set (match_operand:SI 0 "nds32_general_register_operand" "=l, r") ++ (match_operand:SI 1 "nds32_nonunspec_symbolic_operand" " i, i"))] + "" + "la\t%0, %1" + [(set_attr "type" "alu") +@@ -351,13 +378,58 @@ + + + ;; ---------------------------------------------------------------------------- ++(define_expand "extv" ++ [(set (match_operand 0 "register_operand" "") ++ (sign_extract (match_operand 1 "nonimmediate_operand" "") ++ (match_operand 2 "const_int_operand" "") ++ (match_operand 3 "const_int_operand" "")))] ++ "" ++{ ++ enum nds32_expand_result_type result = nds32_expand_extv (operands); ++ switch (result) ++ { ++ case EXPAND_DONE: ++ DONE; ++ break; ++ case EXPAND_FAIL: ++ FAIL; ++ break; ++ case EXPAND_CREATE_TEMPLATE: ++ break; ++ default: ++ gcc_unreachable (); ++ } ++}) ++ ++(define_expand "insv" ++ [(set (zero_extract (match_operand 0 "nonimmediate_operand" "") ++ (match_operand 1 "const_int_operand" "") ++ (match_operand 2 "const_int_operand" "")) ++ (match_operand 3 "register_operand" ""))] ++ "" ++{ ++ enum nds32_expand_result_type result = nds32_expand_insv (operands); ++ switch (result) ++ { ++ case EXPAND_DONE: ++ DONE; ++ break; ++ case EXPAND_FAIL: ++ FAIL; ++ break; ++ case EXPAND_CREATE_TEMPLATE: ++ break; ++ default: ++ gcc_unreachable (); ++ } ++}) + + ;; Arithmetic instructions. + + (define_insn "addsi3" + [(set (match_operand:SI 0 "register_operand" "= d, l, d, l, d, l, k, l, r, r") + (plus:SI (match_operand:SI 1 "register_operand" "% 0, l, 0, l, 0, l, 0, k, r, r") +- (match_operand:SI 2 "nds32_rimm15s_operand" " In05,In03,Iu05,Iu03, r, l,Is10,Iu06, Is15, r")))] ++ (match_operand:SI 2 "nds32_rimm15s_operand" " In05,In03,Iu05,Iu03, r, l,Is10,IU06, Is15, r")))] + "" + { + switch (which_alternative) +@@ -1428,11 +1500,30 @@ + (clobber (reg:SI LP_REGNUM)) + (clobber (reg:SI TA_REGNUM))])] + "" +- "" ++ { ++ rtx insn; ++ rtx sym = XEXP (operands[0], 0); ++ ++ if (TARGET_ICT_MODEL_LARGE ++ && nds32_indirect_call_referenced_p (sym)) ++ { ++ rtx reg = gen_reg_rtx (Pmode); ++ emit_move_insn (reg, sym); ++ operands[0] = gen_const_mem (Pmode, reg); ++ } ++ ++ if (flag_pic) ++ { ++ insn = emit_call_insn (gen_call_internal ++ (XEXP (operands[0], 0), GEN_INT (0))); ++ use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx); ++ DONE; ++ } ++ } + ) + + (define_insn "call_internal" +- [(parallel [(call (mem (match_operand:SI 0 "nds32_call_address_operand" "r, i")) ++ [(parallel [(call (mem (match_operand:SI 0 "nds32_call_address_operand" "r, S")) + (match_operand 1)) + (clobber (reg:SI LP_REGNUM)) + (clobber (reg:SI TA_REGNUM))])] +@@ -1474,9 +1565,11 @@ + (const_int 2) + (const_int 4)) + ;; Alternative 1 +- (if_then_else (match_test "nds32_long_call_p (operands[0])") +- (const_int 12) +- (const_int 4)) ++ (if_then_else (match_test "flag_pic") ++ (const_int 16) ++ (if_then_else (match_test "nds32_long_call_p (operands[0])") ++ (const_int 12) ++ (const_int 4))) + ])] + ) + +@@ -1492,11 +1585,33 @@ + (match_operand 2))) + (clobber (reg:SI LP_REGNUM)) + (clobber (reg:SI TA_REGNUM))])] +- "") ++ "" ++ { ++ rtx insn; ++ rtx sym = XEXP (operands[1], 0); ++ ++ if (TARGET_ICT_MODEL_LARGE ++ && nds32_indirect_call_referenced_p (sym)) ++ { ++ rtx reg = gen_reg_rtx (Pmode); ++ emit_move_insn (reg, sym); ++ operands[1] = gen_const_mem (Pmode, reg); ++ } ++ ++ if (flag_pic) ++ { ++ insn = ++ emit_call_insn (gen_call_value_internal ++ (operands[0], XEXP (operands[1], 0), GEN_INT (0))); ++ use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx); ++ DONE; ++ } ++ } ++) + + (define_insn "call_value_internal" + [(parallel [(set (match_operand 0) +- (call (mem (match_operand:SI 1 "nds32_call_address_operand" "r, i")) ++ (call (mem (match_operand:SI 1 "nds32_call_address_operand" "r, S")) + (match_operand 2))) + (clobber (reg:SI LP_REGNUM)) + (clobber (reg:SI TA_REGNUM))])] +@@ -1538,9 +1653,11 @@ + (const_int 2) + (const_int 4)) + ;; Alternative 1 +- (if_then_else (match_test "nds32_long_call_p (operands[1])") +- (const_int 12) +- (const_int 4)) ++ (if_then_else (match_test "flag_pic") ++ (const_int 16) ++ (if_then_else (match_test "nds32_long_call_p (operands[1])") ++ (const_int 12) ++ (const_int 4))) + ])] + ) + +@@ -1583,10 +1700,21 @@ + (const_int 0)) + (clobber (reg:SI TA_REGNUM)) + (return)])] +- "") ++ "" ++{ ++ rtx sym = XEXP (operands[0], 0); ++ ++ if (TARGET_ICT_MODEL_LARGE ++ && nds32_indirect_call_referenced_p (sym)) ++ { ++ rtx reg = gen_reg_rtx (Pmode); ++ emit_move_insn (reg, sym); ++ operands[0] = gen_const_mem (Pmode, reg); ++ } ++}) + + (define_insn "sibcall_internal" +- [(parallel [(call (mem (match_operand:SI 0 "nds32_call_address_operand" "r, i")) ++ [(parallel [(call (mem (match_operand:SI 0 "nds32_call_address_operand" "r, S")) + (match_operand 1)) + (clobber (reg:SI TA_REGNUM)) + (return)])] +@@ -1617,9 +1745,11 @@ + (const_int 2) + (const_int 4)) + ;; Alternative 1 +- (if_then_else (match_test "nds32_long_call_p (operands[0])") +- (const_int 12) +- (const_int 4)) ++ (if_then_else (match_test "flag_pic") ++ (const_int 16) ++ (if_then_else (match_test "nds32_long_call_p (operands[0])") ++ (const_int 12) ++ (const_int 4))) + ])] + ) + +@@ -1633,11 +1763,22 @@ + (const_int 0))) + (clobber (reg:SI TA_REGNUM)) + (return)])] +- "") ++ "" ++{ ++ rtx sym = XEXP (operands[1], 0); ++ ++ if (TARGET_ICT_MODEL_LARGE ++ && nds32_indirect_call_referenced_p (sym)) ++ { ++ rtx reg = gen_reg_rtx (Pmode); ++ emit_move_insn (reg, sym); ++ operands[1] = gen_const_mem (Pmode, reg); ++ } ++}) + + (define_insn "sibcall_value_internal" + [(parallel [(set (match_operand 0) +- (call (mem (match_operand:SI 1 "nds32_call_address_operand" "r, i")) ++ (call (mem (match_operand:SI 1 "nds32_call_address_operand" "r, S")) + (match_operand 2))) + (clobber (reg:SI TA_REGNUM)) + (return)])] +@@ -1668,9 +1809,11 @@ + (const_int 2) + (const_int 4)) + ;; Alternative 1 +- (if_then_else (match_test "nds32_long_call_p (operands[1])") +- (const_int 12) +- (const_int 4)) ++ (if_then_else (match_test "flag_pic") ++ (const_int 16) ++ (if_then_else (match_test "nds32_long_call_p (operands[1])") ++ (const_int 12) ++ (const_int 4))) + ])] + ) + +@@ -1687,12 +1830,33 @@ + nds32_expand_prologue_v3push (); + else + nds32_expand_prologue (); ++ ++ /* If cfun->machine->fp_as_gp_p is true, we can generate special ++ directive to guide linker doing fp-as-gp optimization. ++ However, for a naked function, which means ++ it should not have prologue/epilogue, ++ using fp-as-gp still requires saving $fp by push/pop behavior and ++ there is no benefit to use fp-as-gp on such small function. ++ So we need to make sure this function is NOT naked as well. */ ++ if (cfun->machine->fp_as_gp_p && !cfun->machine->naked_p) ++ emit_insn (gen_omit_fp_begin (gen_rtx_REG (SImode, FP_REGNUM))); ++ + DONE; + }) + + (define_expand "epilogue" [(const_int 0)] + "" + { ++ /* If cfun->machine->fp_as_gp_p is true, we can generate special ++ directive to guide linker doing fp-as-gp optimization. ++ However, for a naked function, which means ++ it should not have prologue/epilogue, ++ using fp-as-gp still requires saving $fp by push/pop behavior and ++ there is no benefit to use fp-as-gp on such small function. ++ So we need to make sure this function is NOT naked as well. */ ++ if (cfun->machine->fp_as_gp_p && !cfun->machine->naked_p) ++ emit_insn (gen_omit_fp_end (gen_rtx_REG (SImode, FP_REGNUM))); ++ + /* Note that only under V3/V3M ISA, we could use v3pop epilogue. + In addition, we need to check if v3push is indeed available. */ + if (NDS32_V3PUSH_AVAILABLE_P) +@@ -1792,7 +1956,8 @@ + "nds32_can_use_return_insn ()" + { + /* Emit as the simple return. */ +- if (cfun->machine->naked_p ++ if (!cfun->machine->fp_as_gp_p ++ && cfun->machine->naked_p + && (cfun->machine->va_args_size == 0)) + { + emit_jump_insn (gen_return_internal ()); +@@ -1802,9 +1967,14 @@ + + ;; This pattern is expanded only by the shrink-wrapping optimization + ;; on paths where the function prologue has not been executed. ++;; However, such optimization may reorder the prologue/epilogue blocks ++;; together with basic blocks within function body. ++;; So we must disable this pattern if we have already decided ++;; to perform fp_as_gp optimization, which requires prologue to be ++;; first block and epilogue to be last block. + (define_expand "simple_return" + [(simple_return)] +- "" ++ "!cfun->machine->fp_as_gp_p" + "" + ) + +@@ -1823,6 +1993,9 @@ + [(simple_return)] + "" + { ++ if (nds32_isr_function_critical_p (current_function_decl)) ++ return "iret"; ++ + if (TARGET_16_BIT) + return "ret5"; + else +@@ -1831,9 +2004,11 @@ + [(set_attr "type" "branch") + (set_attr "enabled" "yes") + (set (attr "length") +- (if_then_else (match_test "TARGET_16_BIT") +- (const_int 2) +- (const_int 4)))]) ++ (if_then_else (match_test "nds32_isr_function_critical_p (current_function_decl)") ++ (const_int 4) ++ (if_then_else (match_test "TARGET_16_BIT") ++ (const_int 2) ++ (const_int 4))))]) + + + ;; ---------------------------------------------------------------------------- +@@ -1868,6 +2043,7 @@ + { + rtx add_tmp; + rtx reg, test; ++ rtx tmp_reg; + + /* Step A: "k <-- (plus (operands[0]) (-operands[1]))". */ + if (operands[1] != const0_rtx) +@@ -1889,9 +2065,14 @@ + emit_jump_insn (gen_cbranchsi4 (test, operands[0], operands[2], + operands[4])); + +- /* Step C, D, E, and F, using another temporary register. */ +- rtx tmp = gen_reg_rtx (SImode); +- emit_jump_insn (gen_casesi_internal (operands[0], operands[3], tmp)); ++ tmp_reg = gen_reg_rtx (SImode); ++ /* Step C, D, E, and F, using another temporary register tmp_reg. */ ++ if (flag_pic) ++ emit_use (pic_offset_table_rtx); ++ ++ emit_jump_insn (gen_casesi_internal (operands[0], ++ operands[3], ++ tmp_reg)); + DONE; + }) + +@@ -1927,13 +2108,30 @@ + else + return nds32_output_casesi (operands); + } +- [(set_attr "length" "20") +- (set_attr "type" "branch")]) ++ [(set_attr "type" "branch") ++ (set (attr "length") ++ (if_then_else (match_test "flag_pic") ++ (const_int 28) ++ (const_int 20)))]) + + ;; ---------------------------------------------------------------------------- + + ;; Performance Extension + ++; If -fwrapv option is issued, GCC expects there will be ++; signed overflow situation. So the ABS(INT_MIN) is still INT_MIN ++; (e.g. ABS(0x80000000)=0x80000000). ++; However, the hardware ABS instruction of nds32 target ++; always performs saturation: abs 0x80000000 -> 0x7fffffff. ++; So that we can only enable abssi2 pattern if flag_wrapv is NOT presented. ++(define_insn "abssi2" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (abs:SI (match_operand:SI 1 "register_operand" " r")))] ++ "TARGET_EXT_PERF && TARGET_HW_ABS && !flag_wrapv" ++ "abs\t%0, %1" ++ [(set_attr "type" "alu") ++ (set_attr "length" "4")]) ++ + (define_insn "clzsi2" + [(set (match_operand:SI 0 "register_operand" "=r") + (clz:SI (match_operand:SI 1 "register_operand" " r")))] +@@ -1996,6 +2194,25 @@ + [(set_attr "length" "0")] + ) + ++;; Output .omit_fp_begin for fp-as-gp optimization. ++;; Also we have to set $fp register. ++(define_insn "omit_fp_begin" ++ [(set (match_operand:SI 0 "register_operand" "=x") ++ (unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_OMIT_FP_BEGIN))] ++ "" ++ "! -----\;.omit_fp_begin\;la\t$fp,_FP_BASE_\;! -----" ++ [(set_attr "length" "8")] ++) ++ ++;; Output .omit_fp_end for fp-as-gp optimization. ++;; Claim that we have to use $fp register. ++(define_insn "omit_fp_end" ++ [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "x")] UNSPEC_VOLATILE_OMIT_FP_END)] ++ "" ++ "! -----\;.omit_fp_end\;! -----" ++ [(set_attr "length" "0")] ++) ++ + (define_insn "pop25return" + [(return) + (unspec_volatile:SI [(reg:SI LP_REGNUM)] UNSPEC_VOLATILE_POP25_RETURN)] +@@ -2004,6 +2221,36 @@ + [(set_attr "length" "0")] + ) + ++;; Add pc ++(define_insn "add_pc" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (plus:SI (match_operand:SI 1 "register_operand" "0") ++ (pc)))] ++ "TARGET_LINUX_ABI || flag_pic" ++ "add5.pc\t%0" ++ [(set_attr "type" "alu") ++ (set_attr "length" "4")] ++) ++ ++(define_expand "bswapsi2" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (bswap:SI (match_operand:SI 1 "register_operand" "r")))] ++ "" ++{ ++ emit_insn (gen_unspec_wsbh (operands[0], operands[1])); ++ emit_insn (gen_rotrsi3 (operands[0], operands[0], GEN_INT (16))); ++ DONE; ++}) ++ ++(define_insn "bswaphi2" ++ [(set (match_operand:HI 0 "register_operand" "=r") ++ (bswap:HI (match_operand:HI 1 "register_operand" "r")))] ++ "" ++ "wsbh\t%0, %1" ++ [(set_attr "type" "alu") ++ (set_attr "length" "4")] ++) ++ + ;; ---------------------------------------------------------------------------- + + ;; Patterns for exception handling +@@ -2068,3 +2315,57 @@ + }) + + ;; ---------------------------------------------------------------------------- ++ ++;; Patterns for TLS. ++;; The following two tls patterns don't be expanded directly because the ++;; intermediate value may be spilled into the stack. As a result, it is ++;; hard to analyze the define-use chain in the relax_opt pass. ++ ++ ++;; There is a unspec operand to record RELAX_GROUP number because each ++;; emitted instruction need a relax_hint above it. ++(define_insn "tls_desc" ++ [(set (reg:SI 0) ++ (call (unspec_volatile:SI [(match_operand:SI 0 "nds32_symbolic_operand" "i")] UNSPEC_TLS_DESC) ++ (const_int 1))) ++ (use (unspec [(match_operand:SI 1 "immediate_operand" "i")] UNSPEC_VOLATILE_RELAX_GROUP)) ++ (use (reg:SI GP_REGNUM)) ++ (clobber (reg:SI LP_REGNUM)) ++ (clobber (reg:SI TA_REGNUM))] ++ "" ++ { ++ return nds32_output_tls_desc (operands); ++ } ++ [(set_attr "length" "20") ++ (set_attr "type" "branch")] ++) ++ ++;; There is a unspec operand to record RELAX_GROUP number because each ++;; emitted instruction need a relax_hint above it. ++(define_insn "tls_ie" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (unspec:SI [(match_operand:SI 1 "nds32_symbolic_operand" "i")] UNSPEC_TLS_IE)) ++ (use (unspec [(match_operand:SI 2 "immediate_operand" "i")] UNSPEC_VOLATILE_RELAX_GROUP)) ++ (use (reg:SI GP_REGNUM))] ++ "" ++ { ++ return nds32_output_tls_ie (operands); ++ } ++ [(set (attr "length") (if_then_else (match_test "flag_pic") ++ (const_int 12) ++ (const_int 8))) ++ (set_attr "type" "misc")] ++) ++ ++;; The pattern is for some relaxation groups that have to keep addsi3 in 32-bit mode. ++(define_insn "addsi3_32bit" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (unspec:SI [(match_operand:SI 1 "register_operand" "%r") ++ (match_operand:SI 2 "register_operand" " r")] UNSPEC_ADD32))] ++ "" ++ "add\t%0, %1, %2"; ++ [(set_attr "type" "alu") ++ (set_attr "length" "4") ++ (set_attr "feature" "v1")]) ++ ++;; ---------------------------------------------------------------------------- +diff -urN gcc-8.2.0.orig/gcc/config/nds32/nds32-md-auxiliary.c gcc-8.2.0/gcc/config/nds32/nds32-md-auxiliary.c +--- gcc-8.2.0.orig/gcc/config/nds32/nds32-md-auxiliary.c 2018-04-08 08:00:34.000000000 +0200 ++++ gcc-8.2.0/gcc/config/nds32/nds32-md-auxiliary.c 2019-01-25 15:38:32.829242659 +0100 +@@ -39,6 +39,9 @@ + #include "expr.h" + #include "emit-rtl.h" + #include "explow.h" ++#include "stringpool.h" ++#include "attribs.h" ++ + + /* ------------------------------------------------------------------------ */ + +@@ -261,6 +264,118 @@ + output_asm_insn (pattern, operands); + } + ++static void ++nds32_split_shiftrtdi3 (rtx dst, rtx src, rtx shiftamount, bool logic_shift_p) ++{ ++ rtx src_high_part; ++ rtx dst_high_part, dst_low_part; ++ ++ dst_high_part = nds32_di_high_part_subreg (dst); ++ src_high_part = nds32_di_high_part_subreg (src); ++ dst_low_part = nds32_di_low_part_subreg (dst); ++ ++ if (CONST_INT_P (shiftamount)) ++ { ++ if (INTVAL (shiftamount) < 32) ++ { ++ if (logic_shift_p) ++ { ++ emit_insn (gen_uwext (dst_low_part, src, ++ shiftamount)); ++ emit_insn (gen_lshrsi3 (dst_high_part, src_high_part, ++ shiftamount)); ++ } ++ else ++ { ++ emit_insn (gen_wext (dst_low_part, src, ++ shiftamount)); ++ emit_insn (gen_ashrsi3 (dst_high_part, src_high_part, ++ shiftamount)); ++ } ++ } ++ else ++ { ++ rtx new_shift_amout = gen_int_mode(INTVAL (shiftamount) - 32, SImode); ++ ++ if (logic_shift_p) ++ { ++ emit_insn (gen_lshrsi3 (dst_low_part, src_high_part, ++ new_shift_amout)); ++ emit_move_insn (dst_high_part, const0_rtx); ++ } ++ else ++ { ++ emit_insn (gen_ashrsi3 (dst_low_part, src_high_part, ++ new_shift_amout)); ++ emit_insn (gen_ashrsi3 (dst_high_part, src_high_part, ++ GEN_INT (31))); ++ } ++ } ++ } ++ else ++ { ++ rtx dst_low_part_l32, dst_high_part_l32; ++ rtx dst_low_part_g32, dst_high_part_g32; ++ rtx new_shift_amout, select_reg; ++ dst_low_part_l32 = gen_reg_rtx (SImode); ++ dst_high_part_l32 = gen_reg_rtx (SImode); ++ dst_low_part_g32 = gen_reg_rtx (SImode); ++ dst_high_part_g32 = gen_reg_rtx (SImode); ++ new_shift_amout = gen_reg_rtx (SImode); ++ select_reg = gen_reg_rtx (SImode); ++ ++ emit_insn (gen_andsi3 (shiftamount, shiftamount, GEN_INT (0x3f))); ++ ++ if (logic_shift_p) ++ { ++ /* ++ if (shiftamount < 32) ++ dst_low_part = wext (src, shiftamount) ++ dst_high_part = src_high_part >> shiftamount ++ else ++ dst_low_part = src_high_part >> (shiftamount & 0x1f) ++ dst_high_part = 0 ++ */ ++ emit_insn (gen_uwext (dst_low_part_l32, src, shiftamount)); ++ emit_insn (gen_lshrsi3 (dst_high_part_l32, src_high_part, ++ shiftamount)); ++ ++ emit_insn (gen_andsi3 (new_shift_amout, shiftamount, GEN_INT (0x1f))); ++ emit_insn (gen_lshrsi3 (dst_low_part_g32, src_high_part, ++ new_shift_amout)); ++ emit_move_insn (dst_high_part_g32, const0_rtx); ++ } ++ else ++ { ++ /* ++ if (shiftamount < 32) ++ dst_low_part = wext (src, shiftamount) ++ dst_high_part = src_high_part >> shiftamount ++ else ++ dst_low_part = src_high_part >> (shiftamount & 0x1f) ++ # shift 31 for sign extend ++ dst_high_part = src_high_part >> 31 ++ */ ++ emit_insn (gen_wext (dst_low_part_l32, src, shiftamount)); ++ emit_insn (gen_ashrsi3 (dst_high_part_l32, src_high_part, ++ shiftamount)); ++ ++ emit_insn (gen_andsi3 (new_shift_amout, shiftamount, GEN_INT (0x1f))); ++ emit_insn (gen_ashrsi3 (dst_low_part_g32, src_high_part, ++ new_shift_amout)); ++ emit_insn (gen_ashrsi3 (dst_high_part_g32, src_high_part, ++ GEN_INT (31))); ++ } ++ ++ emit_insn (gen_slt_compare (select_reg, shiftamount, GEN_INT (32))); ++ ++ emit_insn (gen_cmovnsi (dst_low_part, select_reg, ++ dst_low_part_l32, dst_low_part_g32)); ++ emit_insn (gen_cmovnsi (dst_high_part, select_reg, ++ dst_high_part_l32, dst_high_part_g32)); ++ } ++} ++ + /* ------------------------------------------------------------------------ */ + + /* Auxiliary function for expand RTL pattern. */ +@@ -1195,8 +1310,166 @@ + } + } + ++enum nds32_expand_result_type ++nds32_expand_extv (rtx *operands) ++{ ++ gcc_assert (CONST_INT_P (operands[2]) && CONST_INT_P (operands[3])); ++ HOST_WIDE_INT width = INTVAL (operands[2]); ++ HOST_WIDE_INT bitpos = INTVAL (operands[3]); ++ rtx dst = operands[0]; ++ rtx src = operands[1]; ++ ++ if (MEM_P (src) ++ && width == 32 ++ && (bitpos % BITS_PER_UNIT) == 0 ++ && GET_MODE_BITSIZE (GET_MODE (dst)) == width) ++ { ++ rtx newmem = adjust_address (src, GET_MODE (dst), ++ bitpos / BITS_PER_UNIT); ++ ++ rtx base_addr = force_reg (Pmode, XEXP (newmem, 0)); ++ ++ emit_insn (gen_unaligned_loadsi (dst, base_addr)); ++ ++ return EXPAND_DONE; ++ } ++ return EXPAND_FAIL; ++} ++ ++enum nds32_expand_result_type ++nds32_expand_insv (rtx *operands) ++{ ++ gcc_assert (CONST_INT_P (operands[1]) && CONST_INT_P (operands[2])); ++ HOST_WIDE_INT width = INTVAL (operands[1]); ++ HOST_WIDE_INT bitpos = INTVAL (operands[2]); ++ rtx dst = operands[0]; ++ rtx src = operands[3]; ++ ++ if (MEM_P (dst) ++ && width == 32 ++ && (bitpos % BITS_PER_UNIT) == 0 ++ && GET_MODE_BITSIZE (GET_MODE (src)) == width) ++ { ++ rtx newmem = adjust_address (dst, GET_MODE (src), ++ bitpos / BITS_PER_UNIT); ++ ++ rtx base_addr = force_reg (Pmode, XEXP (newmem, 0)); ++ ++ emit_insn (gen_unaligned_storesi (base_addr, src)); ++ ++ return EXPAND_DONE; ++ } ++ return EXPAND_FAIL; ++} ++ + /* ------------------------------------------------------------------------ */ + ++/* Function to generate PC relative jump table. ++ Refer to nds32.md for more details. ++ ++ The following is the sample for the case that diff value ++ can be presented in '.short' size. ++ ++ addi $r1, $r1, -(case_lower_bound) ++ slti $ta, $r1, (case_number) ++ beqz $ta, .L_skip_label ++ ++ la $ta, .L35 ! get jump table address ++ lh $r1, [$ta + $r1 << 1] ! load symbol diff from jump table entry ++ addi $ta, $r1, $ta ++ jr5 $ta ++ ++ ! jump table entry ++ L35: ++ .short .L25-.L35 ++ .short .L26-.L35 ++ .short .L27-.L35 ++ .short .L28-.L35 ++ .short .L29-.L35 ++ .short .L30-.L35 ++ .short .L31-.L35 ++ .short .L32-.L35 ++ .short .L33-.L35 ++ .short .L34-.L35 */ ++const char * ++nds32_output_casesi_pc_relative (rtx *operands) ++{ ++ machine_mode mode; ++ rtx diff_vec; ++ ++ diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[1]))); ++ ++ gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC); ++ ++ /* Step C: "t <-- operands[1]". */ ++ if (flag_pic) ++ { ++ output_asm_insn ("sethi\t$ta, hi20(%l1@GOTOFF)", operands); ++ output_asm_insn ("ori\t$ta, $ta, lo12(%l1@GOTOFF)", operands); ++ output_asm_insn ("add\t$ta, $ta, $gp", operands); ++ } ++ else ++ output_asm_insn ("la\t$ta, %l1", operands); ++ ++ /* Get the mode of each element in the difference vector. */ ++ mode = GET_MODE (diff_vec); ++ ++ /* Step D: "z <-- (mem (plus (operands[0] << m) t))", ++ where m is 0, 1, or 2 to load address-diff value from table. */ ++ switch (mode) ++ { ++ case E_QImode: ++ output_asm_insn ("lb\t%2, [$ta + %0 << 0]", operands); ++ break; ++ case E_HImode: ++ output_asm_insn ("lh\t%2, [$ta + %0 << 1]", operands); ++ break; ++ case E_SImode: ++ output_asm_insn ("lw\t%2, [$ta + %0 << 2]", operands); ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ ++ /* Step E: "t <-- z + t". ++ Add table label_ref with address-diff value to ++ obtain target case address. */ ++ output_asm_insn ("add\t$ta, %2, $ta", operands); ++ ++ /* Step F: jump to target with register t. */ ++ if (TARGET_16_BIT) ++ return "jr5\t$ta"; ++ else ++ return "jr\t$ta"; ++} ++ ++/* Function to generate normal jump table. */ ++const char * ++nds32_output_casesi (rtx *operands) ++{ ++ /* Step C: "t <-- operands[1]". */ ++ if (flag_pic) ++ { ++ output_asm_insn ("sethi\t$ta, hi20(%l1@GOTOFF)", operands); ++ output_asm_insn ("ori\t$ta, $ta, lo12(%l1@GOTOFF)", operands); ++ output_asm_insn ("add\t$ta, $ta, $gp", operands); ++ } ++ else ++ output_asm_insn ("la\t$ta, %l1", operands); ++ ++ /* Step D: "z <-- (mem (plus (operands[0] << 2) t))". */ ++ output_asm_insn ("lw\t%2, [$ta + %0 << 2]", operands); ++ ++ /* No need to perform Step E, which is only used for ++ pc relative jump table. */ ++ ++ /* Step F: jump to target with register z. */ ++ if (TARGET_16_BIT) ++ return "jr5\t%2"; ++ else ++ return "jr\t%2"; ++} ++ + /* Function to return memory format. */ + enum nds32_16bit_address_type + nds32_mem_format (rtx op) +@@ -1757,11 +2030,8 @@ + + /* If we step here, we are going to do v3push or multiple push operation. */ + +- /* The v3push/v3pop instruction should only be applied on +- none-isr and none-variadic function. */ +- if (TARGET_V3PUSH +- && !nds32_isr_function_p (current_function_decl) +- && (cfun->machine->va_args_size == 0)) ++ /* Refer to nds32.h, where we comment when push25/pop25 are available. */ ++ if (NDS32_V3PUSH_AVAILABLE_P) + { + /* For stack v3push: + operands[0]: Re +@@ -1881,11 +2151,8 @@ + + /* If we step here, we are going to do v3pop or multiple pop operation. */ + +- /* The v3push/v3pop instruction should only be applied on +- none-isr and none-variadic function. */ +- if (TARGET_V3PUSH +- && !nds32_isr_function_p (current_function_decl) +- && (cfun->machine->va_args_size == 0)) ++ /* Refer to nds32.h, where we comment when push25/pop25 are available. */ ++ if (NDS32_V3PUSH_AVAILABLE_P) + { + /* For stack v3pop: + operands[0]: Re +@@ -2022,77 +2289,6 @@ + return ""; + } + +-/* Function to generate PC relative jump table. +- Refer to nds32.md for more details. +- +- The following is the sample for the case that diff value +- can be presented in '.short' size. +- +- addi $r1, $r1, -(case_lower_bound) +- slti $ta, $r1, (case_number) +- beqz $ta, .L_skip_label +- +- la $ta, .L35 ! get jump table address +- lh $r1, [$ta + $r1 << 1] ! load symbol diff from jump table entry +- addi $ta, $r1, $ta +- jr5 $ta +- +- ! jump table entry +- L35: +- .short .L25-.L35 +- .short .L26-.L35 +- .short .L27-.L35 +- .short .L28-.L35 +- .short .L29-.L35 +- .short .L30-.L35 +- .short .L31-.L35 +- .short .L32-.L35 +- .short .L33-.L35 +- .short .L34-.L35 */ +-const char * +-nds32_output_casesi_pc_relative (rtx *operands) +-{ +- machine_mode mode; +- rtx diff_vec; +- +- diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[1]))); +- +- gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC); +- +- /* Step C: "t <-- operands[1]". */ +- output_asm_insn ("la\t$ta, %l1", operands); +- +- /* Get the mode of each element in the difference vector. */ +- mode = GET_MODE (diff_vec); +- +- /* Step D: "z <-- (mem (plus (operands[0] << m) t))", +- where m is 0, 1, or 2 to load address-diff value from table. */ +- switch (mode) +- { +- case E_QImode: +- output_asm_insn ("lb\t%2, [$ta + %0 << 0]", operands); +- break; +- case E_HImode: +- output_asm_insn ("lh\t%2, [$ta + %0 << 1]", operands); +- break; +- case E_SImode: +- output_asm_insn ("lw\t%2, [$ta + %0 << 2]", operands); +- break; +- default: +- gcc_unreachable (); +- } +- +- /* Step E: "t <-- z + t". +- Add table label_ref with address-diff value to +- obtain target case address. */ +- output_asm_insn ("add\t$ta, %2, $ta", operands); +- +- /* Step F: jump to target with register t. */ +- if (TARGET_16_BIT) +- return "jr5\t$ta"; +- else +- return "jr\t$ta"; +-} + + /* output a float load instruction */ + const char * +@@ -2250,52 +2446,51 @@ + return ""; + } + +-/* Function to generate normal jump table. */ + const char * +-nds32_output_casesi (rtx *operands) ++nds32_output_smw_single_word (rtx *operands) + { +- /* Step C: "t <-- operands[1]". */ +- output_asm_insn ("la\t$ta, %l1", operands); +- +- /* Step D: "z <-- (mem (plus (operands[0] << 2) t))". */ +- output_asm_insn ("lw\t%2, [$ta + %0 << 2]", operands); +- +- /* No need to perform Step E, which is only used for +- pc relative jump table. */ ++ char buff[100]; ++ unsigned regno; ++ int enable4; ++ bool update_base_p; ++ rtx base_addr = operands[0]; ++ rtx base_reg; ++ rtx otherops[2]; + +- /* Step F: jump to target with register z. */ +- if (TARGET_16_BIT) +- return "jr5\t%2"; ++ if (REG_P (XEXP (base_addr, 0))) ++ { ++ update_base_p = false; ++ base_reg = XEXP (base_addr, 0); ++ } + else +- return "jr\t%2"; +-} ++ { ++ update_base_p = true; ++ base_reg = XEXP (XEXP (base_addr, 0), 0); ++ } + +-/* Auxiliary functions for lwm/smw. */ +-bool +-nds32_valid_smw_lwm_base_p (rtx op) +-{ +- rtx base_addr; ++ const char *update_base = update_base_p ? "m" : ""; + +- if (!MEM_P (op)) +- return false; ++ regno = REGNO (operands[1]); + +- base_addr = XEXP (op, 0); ++ otherops[0] = base_reg; ++ otherops[1] = operands[1]; + +- if (REG_P (base_addr)) +- return true; ++ if (regno >= 28) ++ { ++ enable4 = nds32_regno_to_enable4 (regno); ++ sprintf (buff, "smw.bi%s\t$sp, [%%0], $sp, %x", update_base, enable4); ++ } + else + { +- if (GET_CODE (base_addr) == POST_INC +- && REG_P (XEXP (base_addr, 0))) +- return true; ++ sprintf (buff, "smw.bi%s\t%%1, [%%0], %%1", update_base); + } +- +- return false; ++ output_asm_insn (buff, otherops); ++ return ""; + } + + /* ------------------------------------------------------------------------ */ + const char * +-nds32_output_smw_single_word (rtx *operands) ++nds32_output_smw_double_word (rtx *operands) + { + char buff[100]; + unsigned regno; +@@ -2303,7 +2498,7 @@ + bool update_base_p; + rtx base_addr = operands[0]; + rtx base_reg; +- rtx otherops[2]; ++ rtx otherops[3]; + + if (REG_P (XEXP (base_addr, 0))) + { +@@ -2322,15 +2517,22 @@ + + otherops[0] = base_reg; + otherops[1] = operands[1]; ++ otherops[2] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);; + + if (regno >= 28) + { +- enable4 = nds32_regno_to_enable4 (regno); ++ enable4 = nds32_regno_to_enable4 (regno) ++ | nds32_regno_to_enable4 (regno + 1); + sprintf (buff, "smw.bi%s\t$sp, [%%0], $sp, %x", update_base, enable4); + } ++ else if (regno == 27) ++ { ++ enable4 = nds32_regno_to_enable4 (regno + 1); ++ sprintf (buff, "smw.bi%s\t%%1, [%%0], %%1, %x", update_base, enable4); ++ } + else + { +- sprintf (buff, "smw.bi%s\t%%1, [%%0], %%1", update_base); ++ sprintf (buff, "smw.bi%s\t%%1, [%%0], %%2", update_base); + } + output_asm_insn (buff, otherops); + return ""; +@@ -2415,16 +2617,17 @@ + if (mode == DImode) + { + /* Load doubleword, we need two registers to access. */ +- reg[0] = simplify_gen_subreg (SImode, operands[0], +- GET_MODE (operands[0]), 0); +- reg[1] = simplify_gen_subreg (SImode, operands[0], +- GET_MODE (operands[0]), 4); ++ reg[0] = nds32_di_low_part_subreg (operands[0]); ++ reg[1] = nds32_di_high_part_subreg (operands[0]); + /* A register only store 4 byte. */ + width = GET_MODE_SIZE (SImode) - 1; + } + else + { +- reg[0] = operands[0]; ++ if (VECTOR_MODE_P (mode)) ++ reg[0] = gen_reg_rtx (SImode); ++ else ++ reg[0] = operands[0]; + } + + for (num_reg = (mode == DImode) ? 2 : 1; num_reg > 0; num_reg--) +@@ -2466,6 +2669,8 @@ + offset = offset + offset_adj; + } + } ++ if (VECTOR_MODE_P (mode)) ++ convert_move (operands[0], reg[0], false); + } + + void +@@ -2499,16 +2704,20 @@ + if (mode == DImode) + { + /* Load doubleword, we need two registers to access. */ +- reg[0] = simplify_gen_subreg (SImode, operands[1], +- GET_MODE (operands[1]), 0); +- reg[1] = simplify_gen_subreg (SImode, operands[1], +- GET_MODE (operands[1]), 4); ++ reg[0] = nds32_di_low_part_subreg (operands[1]); ++ reg[1] = nds32_di_high_part_subreg (operands[1]); + /* A register only store 4 byte. */ + width = GET_MODE_SIZE (SImode) - 1; + } + else + { +- reg[0] = operands[1]; ++ if (VECTOR_MODE_P (mode)) ++ { ++ reg[0] = gen_reg_rtx (SImode); ++ convert_move (reg[0], operands[1], false); ++ } ++ else ++ reg[0] = operands[1]; + } + + for (num_reg = (mode == DImode) ? 2 : 1; num_reg > 0; num_reg--) +@@ -2765,6 +2974,36 @@ + return ""; + } + ++const char * ++nds32_output_unpkd8 (rtx output, rtx input, ++ rtx high_idx_rtx, rtx low_idx_rtx, ++ bool signed_p) ++{ ++ char pattern[100]; ++ rtx output_operands[2]; ++ HOST_WIDE_INT high_idx, low_idx; ++ high_idx = INTVAL (high_idx_rtx); ++ low_idx = INTVAL (low_idx_rtx); ++ ++ gcc_assert (high_idx >= 0 && high_idx <= 3); ++ gcc_assert (low_idx >= 0 && low_idx <= 3); ++ ++ /* We only have 10, 20, 30 and 31. */ ++ if ((low_idx != 0 || high_idx == 0) && ++ !(low_idx == 1 && high_idx == 3)) ++ return "#"; ++ ++ char sign_char = signed_p ? 's' : 'z'; ++ ++ sprintf (pattern, ++ "%cunpkd8" HOST_WIDE_INT_PRINT_DEC HOST_WIDE_INT_PRINT_DEC "\t%%0, %%1", ++ sign_char, high_idx, low_idx); ++ output_operands[0] = output; ++ output_operands[1] = input; ++ output_asm_insn (pattern, output_operands); ++ return ""; ++} ++ + /* Return true if SYMBOL_REF X binds locally. */ + + static bool +@@ -2782,22 +3021,15 @@ + char pattern[100]; + bool noreturn_p; + +- if (GET_CODE (symbol) == CONST) +- { +- symbol= XEXP (symbol, 0); +- +- if (GET_CODE (symbol) == PLUS) +- symbol = XEXP (symbol, 0); +- } +- +- gcc_assert (GET_CODE (symbol) == SYMBOL_REF +- || REG_P (symbol)); +- + if (nds32_long_call_p (symbol)) + strcpy (pattern, long_call); + else + strcpy (pattern, call); + ++ if (flag_pic && CONSTANT_P (symbol) ++ && !nds32_symbol_binds_local_p (symbol)) ++ strcat (pattern, "@PLT"); ++ + if (align_p) + strcat (pattern, "\n\t.align 2"); + +@@ -2815,6 +3047,91 @@ + return ""; + } + ++bool ++nds32_need_split_sms_p (rtx in0_idx0, rtx in1_idx0, ++ rtx in0_idx1, rtx in1_idx1) ++{ ++ /* smds or smdrs. */ ++ if (INTVAL (in0_idx0) == INTVAL (in1_idx0) ++ && INTVAL (in0_idx1) == INTVAL (in1_idx1) ++ && INTVAL (in0_idx0) != INTVAL (in0_idx1)) ++ return false; ++ ++ /* smxds. */ ++ if (INTVAL (in0_idx0) != INTVAL (in0_idx1) ++ && INTVAL (in1_idx0) != INTVAL (in1_idx1)) ++ return false; ++ ++ return true; ++} ++ ++const char * ++nds32_output_sms (rtx in0_idx0, rtx in1_idx0, ++ rtx in0_idx1, rtx in1_idx1) ++{ ++ if (nds32_need_split_sms_p (in0_idx0, in1_idx0, ++ in0_idx1, in1_idx1)) ++ return "#"; ++ /* out = in0[in0_idx0] * in1[in1_idx0] - in0[in0_idx1] * in1[in1_idx1] */ ++ ++ /* smds or smdrs. */ ++ if (INTVAL (in0_idx0) == INTVAL (in1_idx0) ++ && INTVAL (in0_idx1) == INTVAL (in1_idx1) ++ && INTVAL (in0_idx0) != INTVAL (in0_idx1)) ++ { ++ if (INTVAL (in0_idx0) == 0) ++ { ++ if (TARGET_BIG_ENDIAN) ++ return "smds\t%0, %1, %2"; ++ else ++ return "smdrs\t%0, %1, %2"; ++ } ++ else ++ { ++ if (TARGET_BIG_ENDIAN) ++ return "smdrs\t%0, %1, %2"; ++ else ++ return "smds\t%0, %1, %2"; ++ } ++ } ++ ++ if (INTVAL (in0_idx0) != INTVAL (in0_idx1) ++ && INTVAL (in1_idx0) != INTVAL (in1_idx1)) ++ { ++ if (INTVAL (in0_idx0) == 1) ++ { ++ if (TARGET_BIG_ENDIAN) ++ return "smxds\t%0, %2, %1"; ++ else ++ return "smxds\t%0, %1, %2"; ++ } ++ else ++ { ++ if (TARGET_BIG_ENDIAN) ++ return "smxds\t%0, %1, %2"; ++ else ++ return "smxds\t%0, %2, %1"; ++ } ++ } ++ ++ gcc_unreachable (); ++ return ""; ++} ++ ++void ++nds32_split_sms (rtx out, rtx in0, rtx in1, ++ rtx in0_idx0, rtx in1_idx0, ++ rtx in0_idx1, rtx in1_idx1) ++{ ++ rtx result0 = gen_reg_rtx (SImode); ++ rtx result1 = gen_reg_rtx (SImode); ++ emit_insn (gen_mulhisi3v (result0, in0, in1, ++ in0_idx0, in1_idx0)); ++ emit_insn (gen_mulhisi3v (result1, in0, in1, ++ in0_idx1, in1_idx1)); ++ emit_insn (gen_subsi3 (out, result0, result1)); ++} ++ + /* Spilt a doubleword instrucion to two single word instructions. */ + void + nds32_spilt_doubleword (rtx *operands, bool load_p) +@@ -2846,16 +3163,30 @@ + /* generate low_part and high_part memory format: + low_part: (post_modify ((reg) (plus (reg) (const 4))) + high_part: (post_modify ((reg) (plus (reg) (const -12))) */ +- low_part[mem] = gen_frame_mem (SImode, +- gen_rtx_POST_MODIFY (Pmode, sub_mem, +- gen_rtx_PLUS (Pmode, +- sub_mem, +- GEN_INT (4)))); +- high_part[mem] = gen_frame_mem (SImode, +- gen_rtx_POST_MODIFY (Pmode, sub_mem, +- gen_rtx_PLUS (Pmode, +- sub_mem, +- GEN_INT (-12)))); ++ low_part[mem] = gen_rtx_MEM (SImode, ++ gen_rtx_POST_MODIFY (Pmode, sub_mem, ++ gen_rtx_PLUS (Pmode, ++ sub_mem, ++ GEN_INT (4)))); ++ high_part[mem] = gen_rtx_MEM (SImode, ++ gen_rtx_POST_MODIFY (Pmode, sub_mem, ++ gen_rtx_PLUS (Pmode, ++ sub_mem, ++ GEN_INT (-12)))); ++ } ++ else if (GET_CODE (sub_mem) == POST_INC) ++ { ++ /* memory format is (post_inc (reg)), ++ so that extract (reg) from the (post_inc (reg)) pattern. */ ++ sub_mem = XEXP (sub_mem, 0); ++ ++ /* generate low_part and high_part memory format: ++ low_part: (post_inc (reg)) ++ high_part: (post_inc (reg)) */ ++ low_part[mem] = gen_rtx_MEM (SImode, ++ gen_rtx_POST_INC (Pmode, sub_mem)); ++ high_part[mem] = gen_rtx_MEM (SImode, ++ gen_rtx_POST_INC (Pmode, sub_mem)); + } + else if (GET_CODE (sub_mem) == POST_MODIFY) + { +@@ -2872,14 +3203,14 @@ + /* Generate low_part and high_part memory format: + low_part: (post_modify ((reg) (plus (reg) (const))) + high_part: ((plus (reg) (const 4))) */ +- low_part[mem] = gen_frame_mem (SImode, +- gen_rtx_POST_MODIFY (Pmode, post_mem, +- gen_rtx_PLUS (Pmode, +- post_mem, +- post_val))); +- high_part[mem] = gen_frame_mem (SImode, plus_constant (Pmode, +- post_mem, +- 4)); ++ low_part[mem] = gen_rtx_MEM (SImode, ++ gen_rtx_POST_MODIFY (Pmode, post_mem, ++ gen_rtx_PLUS (Pmode, ++ post_mem, ++ post_val))); ++ high_part[mem] = gen_rtx_MEM (SImode, plus_constant (Pmode, ++ post_mem, ++ 4)); + } + else + { +@@ -2924,11 +3255,516 @@ + } + } + ++void ++nds32_split_ashiftdi3 (rtx dst, rtx src, rtx shiftamount) ++{ ++ rtx src_high_part, src_low_part; ++ rtx dst_high_part, dst_low_part; ++ ++ dst_high_part = nds32_di_high_part_subreg (dst); ++ dst_low_part = nds32_di_low_part_subreg (dst); ++ ++ src_high_part = nds32_di_high_part_subreg (src); ++ src_low_part = nds32_di_low_part_subreg (src); ++ ++ /* We need to handle shift more than 32 bit!!!! */ ++ if (CONST_INT_P (shiftamount)) ++ { ++ if (INTVAL (shiftamount) < 32) ++ { ++ rtx ext_start; ++ ext_start = gen_int_mode(32 - INTVAL (shiftamount), SImode); ++ ++ emit_insn (gen_wext (dst_high_part, src, ext_start)); ++ emit_insn (gen_ashlsi3 (dst_low_part, src_low_part, shiftamount)); ++ } ++ else ++ { ++ rtx new_shift_amout = gen_int_mode(INTVAL (shiftamount) - 32, SImode); ++ ++ emit_insn (gen_ashlsi3 (dst_high_part, src_low_part, ++ new_shift_amout)); ++ ++ emit_move_insn (dst_low_part, GEN_INT (0)); ++ } ++ } ++ else ++ { ++ rtx dst_low_part_l32, dst_high_part_l32; ++ rtx dst_low_part_g32, dst_high_part_g32; ++ rtx new_shift_amout, select_reg; ++ dst_low_part_l32 = gen_reg_rtx (SImode); ++ dst_high_part_l32 = gen_reg_rtx (SImode); ++ dst_low_part_g32 = gen_reg_rtx (SImode); ++ dst_high_part_g32 = gen_reg_rtx (SImode); ++ new_shift_amout = gen_reg_rtx (SImode); ++ select_reg = gen_reg_rtx (SImode); ++ ++ rtx ext_start; ++ ext_start = gen_reg_rtx (SImode); ++ ++ /* ++ if (shiftamount < 32) ++ dst_low_part = src_low_part << shiftamout ++ dst_high_part = wext (src, 32 - shiftamount) ++ # wext can't handle wext (src, 32) since it's only take rb[0:4] ++ # for extract. ++ dst_high_part = shiftamount == 0 ? src_high_part : dst_high_part ++ else ++ dst_low_part = 0 ++ dst_high_part = src_low_part << shiftamount & 0x1f ++ */ ++ ++ emit_insn (gen_subsi3 (ext_start, ++ gen_int_mode (32, SImode), ++ shiftamount)); ++ emit_insn (gen_wext (dst_high_part_l32, src, ext_start)); ++ ++ /* Handle for shiftamout == 0. */ ++ emit_insn (gen_cmovzsi (dst_high_part_l32, shiftamount, ++ src_high_part, dst_high_part_l32)); ++ ++ emit_insn (gen_ashlsi3 (dst_low_part_l32, src_low_part, shiftamount)); ++ ++ emit_move_insn (dst_low_part_g32, const0_rtx); ++ emit_insn (gen_andsi3 (new_shift_amout, shiftamount, GEN_INT (0x1f))); ++ emit_insn (gen_ashlsi3 (dst_high_part_g32, src_low_part, ++ new_shift_amout)); ++ ++ emit_insn (gen_slt_compare (select_reg, shiftamount, GEN_INT (32))); ++ ++ emit_insn (gen_cmovnsi (dst_low_part, select_reg, ++ dst_low_part_l32, dst_low_part_g32)); ++ emit_insn (gen_cmovnsi (dst_high_part, select_reg, ++ dst_high_part_l32, dst_high_part_g32)); ++ } ++} ++ ++void ++nds32_split_ashiftrtdi3 (rtx dst, rtx src, rtx shiftamount) ++{ ++ nds32_split_shiftrtdi3 (dst, src, shiftamount, false); ++} ++ ++void ++nds32_split_lshiftrtdi3 (rtx dst, rtx src, rtx shiftamount) ++{ ++ nds32_split_shiftrtdi3 (dst, src, shiftamount, true); ++} ++ ++void ++nds32_split_rotatertdi3 (rtx dst, rtx src, rtx shiftamount) ++{ ++ rtx dst_low_part_l32, dst_high_part_l32; ++ rtx dst_low_part_g32, dst_high_part_g32; ++ rtx select_reg, low5bit, low5bit_inv, minus32sa; ++ rtx dst_low_part_g32_tmph; ++ rtx dst_low_part_g32_tmpl; ++ rtx dst_high_part_l32_tmph; ++ rtx dst_high_part_l32_tmpl; ++ ++ rtx src_low_part, src_high_part; ++ rtx dst_high_part, dst_low_part; ++ ++ shiftamount = force_reg (SImode, shiftamount); ++ ++ emit_insn (gen_andsi3 (shiftamount, ++ shiftamount, ++ gen_int_mode (0x3f, SImode))); ++ ++ dst_high_part = nds32_di_high_part_subreg (dst); ++ dst_low_part = nds32_di_low_part_subreg (dst); ++ ++ src_high_part = nds32_di_high_part_subreg (src); ++ src_low_part = nds32_di_low_part_subreg (src); ++ ++ dst_low_part_l32 = gen_reg_rtx (SImode); ++ dst_high_part_l32 = gen_reg_rtx (SImode); ++ dst_low_part_g32 = gen_reg_rtx (SImode); ++ dst_high_part_g32 = gen_reg_rtx (SImode); ++ low5bit = gen_reg_rtx (SImode); ++ low5bit_inv = gen_reg_rtx (SImode); ++ minus32sa = gen_reg_rtx (SImode); ++ select_reg = gen_reg_rtx (SImode); ++ ++ dst_low_part_g32_tmph = gen_reg_rtx (SImode); ++ dst_low_part_g32_tmpl = gen_reg_rtx (SImode); ++ ++ dst_high_part_l32_tmph = gen_reg_rtx (SImode); ++ dst_high_part_l32_tmpl = gen_reg_rtx (SImode); ++ ++ emit_insn (gen_slt_compare (select_reg, shiftamount, GEN_INT (32))); ++ ++ /* if shiftamount < 32 ++ dst_low_part = wext(src, shiftamount) ++ else ++ dst_low_part = ((src_high_part >> (shiftamount & 0x1f)) ++ | (src_low_part << (32 - (shiftamount & 0x1f)))) ++ */ ++ emit_insn (gen_andsi3 (low5bit, shiftamount, gen_int_mode (0x1f, SImode))); ++ emit_insn (gen_subsi3 (low5bit_inv, gen_int_mode (32, SImode), low5bit)); ++ ++ emit_insn (gen_wext (dst_low_part_l32, src, shiftamount)); ++ ++ emit_insn (gen_lshrsi3 (dst_low_part_g32_tmpl, src_high_part, low5bit)); ++ emit_insn (gen_ashlsi3 (dst_low_part_g32_tmph, src_low_part, low5bit_inv)); ++ ++ emit_insn (gen_iorsi3 (dst_low_part_g32, ++ dst_low_part_g32_tmpl, ++ dst_low_part_g32_tmph)); ++ ++ emit_insn (gen_cmovnsi (dst_low_part, select_reg, ++ dst_low_part_l32, dst_low_part_g32)); ++ ++ /* if shiftamount < 32 ++ dst_high_part = ((src_high_part >> shiftamount) ++ | (src_low_part << (32 - shiftamount))) ++ dst_high_part = shiftamount == 0 ? src_high_part : dst_high_part ++ else ++ dst_high_part = wext(src, shiftamount & 0x1f) ++ */ ++ ++ emit_insn (gen_subsi3 (minus32sa, gen_int_mode (32, SImode), shiftamount)); ++ ++ emit_insn (gen_lshrsi3 (dst_high_part_l32_tmpl, src_high_part, shiftamount)); ++ emit_insn (gen_ashlsi3 (dst_high_part_l32_tmph, src_low_part, minus32sa)); ++ ++ emit_insn (gen_iorsi3 (dst_high_part_l32, ++ dst_high_part_l32_tmpl, ++ dst_high_part_l32_tmph)); ++ ++ emit_insn (gen_cmovzsi (dst_high_part_l32, shiftamount, ++ src_high_part, dst_high_part_l32)); ++ ++ emit_insn (gen_wext (dst_high_part_g32, src, low5bit)); ++ ++ emit_insn (gen_cmovnsi (dst_high_part, select_reg, ++ dst_high_part_l32, dst_high_part_g32)); ++} ++ ++/* Return true if OP contains a symbol reference. */ ++bool ++symbolic_reference_mentioned_p (rtx op) ++{ ++ const char *fmt; ++ int i; ++ ++ if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF) ++ return true; ++ ++ fmt = GET_RTX_FORMAT (GET_CODE (op)); ++ for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--) ++ { ++ if (fmt[i] == 'E') ++ { ++ int j; ++ ++ for (j = XVECLEN (op, i) - 1; j >= 0; j--) ++ if (symbolic_reference_mentioned_p (XVECEXP (op, i, j))) ++ return true; ++ } ++ ++ else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i))) ++ return true; ++ } ++ ++ return false; ++} ++ ++/* Expand PIC code for @GOTOFF and @GOT. ++ ++ Example for @GOTOFF: ++ ++ la $r0, symbol@GOTOFF ++ -> sethi $ta, hi20(symbol@GOTOFF) ++ ori $ta, $ta, lo12(symbol@GOTOFF) ++ add $r0, $ta, $gp ++ ++ Example for @GOT: ++ ++ la $r0, symbol@GOT ++ -> sethi $ta, hi20(symbol@GOT) ++ ori $ta, $ta, lo12(symbol@GOT) ++ lw $r0, [$ta + $gp] ++*/ ++rtx ++nds32_legitimize_pic_address (rtx x) ++{ ++ rtx addr = x; ++ rtx reg = gen_reg_rtx (Pmode); ++ rtx pat; ++ ++ if (GET_CODE (x) == LABEL_REF ++ || (GET_CODE (x) == SYMBOL_REF ++ && (CONSTANT_POOL_ADDRESS_P (x) ++ || SYMBOL_REF_LOCAL_P (x)))) ++ { ++ addr = gen_rtx_UNSPEC (SImode, gen_rtvec (1, x), UNSPEC_GOTOFF); ++ addr = gen_rtx_CONST (SImode, addr); ++ emit_insn (gen_sethi (reg, addr)); ++ emit_insn (gen_lo_sum (reg, reg, addr)); ++ x = gen_rtx_PLUS (Pmode, reg, pic_offset_table_rtx); ++ } ++ else if (GET_CODE (x) == SYMBOL_REF) ++ { ++ addr = gen_rtx_UNSPEC (SImode, gen_rtvec (1, x), UNSPEC_GOT); ++ addr = gen_rtx_CONST (SImode, addr); ++ emit_insn (gen_sethi (reg, addr)); ++ emit_insn (gen_lo_sum (reg, reg, addr)); ++ ++ x = gen_const_mem (SImode, gen_rtx_PLUS (Pmode, pic_offset_table_rtx, ++ reg)); ++ } ++ else if (GET_CODE (x) == CONST) ++ { ++ /* We don't split constant in expand_pic_move because GOTOFF can combine ++ the addend with the symbol. */ ++ addr = XEXP (x, 0); ++ gcc_assert (GET_CODE (addr) == PLUS); ++ ++ rtx op0 = XEXP (addr, 0); ++ rtx op1 = XEXP (addr, 1); ++ ++ if ((GET_CODE (op0) == LABEL_REF ++ || (GET_CODE (op0) == SYMBOL_REF ++ && (CONSTANT_POOL_ADDRESS_P (op0) ++ || SYMBOL_REF_LOCAL_P (op0)))) ++ && GET_CODE (op1) == CONST_INT) ++ { ++ pat = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), UNSPEC_GOTOFF); ++ pat = gen_rtx_PLUS (Pmode, pat, op1); ++ pat = gen_rtx_CONST (Pmode, pat); ++ emit_insn (gen_sethi (reg, pat)); ++ emit_insn (gen_lo_sum (reg, reg, pat)); ++ x = gen_rtx_PLUS (Pmode, reg, pic_offset_table_rtx); ++ } ++ else if (GET_CODE (op0) == SYMBOL_REF ++ && GET_CODE (op1) == CONST_INT) ++ { ++ /* This is a constant offset from a @GOT symbol reference. */ ++ addr = gen_rtx_UNSPEC (SImode, gen_rtvec (1, op0), UNSPEC_GOT); ++ addr = gen_rtx_CONST (SImode, addr); ++ emit_insn (gen_sethi (reg, addr)); ++ emit_insn (gen_lo_sum (reg, reg, addr)); ++ addr = gen_const_mem (SImode, gen_rtx_PLUS (Pmode, ++ pic_offset_table_rtx, ++ reg)); ++ emit_move_insn (reg, addr); ++ if (satisfies_constraint_Is15 (op1)) ++ x = gen_rtx_PLUS (Pmode, reg, op1); ++ else ++ { ++ rtx tmp_reg = gen_reg_rtx (SImode); ++ emit_insn (gen_movsi (tmp_reg, op1)); ++ x = gen_rtx_PLUS (Pmode, reg, tmp_reg); ++ } ++ } ++ else ++ { ++ /* Don't handle this pattern. */ ++ debug_rtx (x); ++ gcc_unreachable (); ++ } ++ } ++ return x; ++} ++ ++void ++nds32_expand_pic_move (rtx *operands) ++{ ++ rtx src; ++ ++ src = nds32_legitimize_pic_address (operands[1]); ++ emit_move_insn (operands[0], src); ++} ++ ++/* Expand ICT symbol. ++ Example for @ICT and ICT model=large: ++ ++ la $r0, symbol@ICT ++ -> sethi $rt, hi20(symbol@ICT) ++ lwi $r0, [$rt + lo12(symbol@ICT)] ++ ++*/ ++rtx ++nds32_legitimize_ict_address (rtx x) ++{ ++ rtx symbol = x; ++ rtx addr = x; ++ rtx reg = gen_reg_rtx (Pmode); ++ gcc_assert (GET_CODE (x) == SYMBOL_REF ++ && nds32_indirect_call_referenced_p (x)); ++ ++ addr = gen_rtx_UNSPEC (SImode, gen_rtvec (1, symbol), UNSPEC_ICT); ++ addr = gen_rtx_CONST (SImode, addr); ++ emit_insn (gen_sethi (reg, addr)); ++ ++ x = gen_const_mem (SImode, gen_rtx_LO_SUM (Pmode, reg, addr)); ++ ++ return x; ++} ++ ++void ++nds32_expand_ict_move (rtx *operands) ++{ ++ rtx src = operands[1]; ++ ++ src = nds32_legitimize_ict_address (src); ++ ++ emit_move_insn (operands[0], src); ++} ++ ++/* Return true X is a indirect call symbol. */ ++bool ++nds32_indirect_call_referenced_p (rtx x) ++{ ++ if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_ICT) ++ x = XVECEXP (x, 0, 0); ++ ++ if (GET_CODE (x) == SYMBOL_REF) ++ { ++ tree decl = SYMBOL_REF_DECL (x); ++ ++ return decl ++ && (lookup_attribute("indirect_call", ++ DECL_ATTRIBUTES(decl)) ++ != NULL); ++ } ++ ++ return false; ++} ++ + /* Return true X is need use long call. */ + bool + nds32_long_call_p (rtx symbol) + { +- return TARGET_CMODEL_LARGE; ++ if (nds32_indirect_call_referenced_p (symbol)) ++ return TARGET_ICT_MODEL_LARGE; ++ else ++ return TARGET_CMODEL_LARGE; ++} ++ ++/* Return true if X contains a thread-local symbol. */ ++bool ++nds32_tls_referenced_p (rtx x) ++{ ++ if (!targetm.have_tls) ++ return false; ++ ++ if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS) ++ x = XEXP (XEXP (x, 0), 0); ++ ++ if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x)) ++ return true; ++ ++ return false; ++} ++ ++/* ADDR contains a thread-local SYMBOL_REF. Generate code to compute ++ this (thread-local) address. */ ++rtx ++nds32_legitimize_tls_address (rtx x) ++{ ++ rtx tmp_reg; ++ rtx tp_reg = gen_rtx_REG (Pmode, TP_REGNUM); ++ rtx pat, insns, reg0; ++ ++ if (GET_CODE (x) == SYMBOL_REF) ++ switch (SYMBOL_REF_TLS_MODEL (x)) ++ { ++ case TLS_MODEL_GLOBAL_DYNAMIC: ++ case TLS_MODEL_LOCAL_DYNAMIC: ++ /* Emit UNSPEC_TLS_DESC rather than expand rtl directly because spill ++ may destroy the define-use chain anylysis to insert relax_hint. */ ++ if (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_GLOBAL_DYNAMIC) ++ pat = gen_rtx_UNSPEC (SImode, gen_rtvec (1, x), UNSPEC_TLSGD); ++ else ++ pat = gen_rtx_UNSPEC (SImode, gen_rtvec (1, x), UNSPEC_TLSLD); ++ ++ pat = gen_rtx_CONST (SImode, pat); ++ reg0 = gen_rtx_REG (Pmode, 0); ++ /* If we can confirm all clobber reigsters, it doesn't have to use call ++ instruction. */ ++ insns = emit_call_insn (gen_tls_desc (pat, GEN_INT (0))); ++ use_reg (&CALL_INSN_FUNCTION_USAGE (insns), pic_offset_table_rtx); ++ RTL_CONST_CALL_P (insns) = 1; ++ tmp_reg = gen_reg_rtx (SImode); ++ emit_move_insn (tmp_reg, reg0); ++ x = tmp_reg; ++ break; ++ ++ case TLS_MODEL_INITIAL_EXEC: ++ pat = gen_rtx_UNSPEC (SImode, gen_rtvec (1, x), UNSPEC_TLSIE); ++ tmp_reg = gen_reg_rtx (SImode); ++ pat = gen_rtx_CONST (SImode, pat); ++ emit_insn (gen_tls_ie (tmp_reg, pat, GEN_INT (0))); ++ if (flag_pic) ++ emit_use (pic_offset_table_rtx); ++ x = gen_rtx_PLUS (Pmode, tmp_reg, tp_reg); ++ break; ++ ++ case TLS_MODEL_LOCAL_EXEC: ++ /* Expand symbol_ref@TPOFF': ++ sethi $ta, hi20(symbol_ref@TPOFF) ++ ori $ta, $ta, lo12(symbol_ref@TPOFF) ++ add $r0, $ta, $tp */ ++ tmp_reg = gen_reg_rtx (SImode); ++ pat = gen_rtx_UNSPEC (SImode, gen_rtvec (1, x), UNSPEC_TLSLE); ++ pat = gen_rtx_CONST (SImode, pat); ++ emit_insn (gen_sethi (tmp_reg, pat)); ++ emit_insn (gen_lo_sum (tmp_reg, tmp_reg, pat)); ++ x = gen_rtx_PLUS (Pmode, tmp_reg, tp_reg); ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ else if (GET_CODE (x) == CONST) ++ { ++ rtx base, addend; ++ split_const (x, &base, &addend); ++ ++ if (SYMBOL_REF_TLS_MODEL (base) == TLS_MODEL_LOCAL_EXEC) ++ { ++ /* Expand symbol_ref@TPOFF': ++ sethi $ta, hi20(symbol_ref@TPOFF + addend) ++ ori $ta, $ta, lo12(symbol_ref@TPOFF + addend) ++ add $r0, $ta, $tp */ ++ tmp_reg = gen_reg_rtx (SImode); ++ pat = gen_rtx_UNSPEC (SImode, gen_rtvec (1, base), UNSPEC_TLSLE); ++ pat = gen_rtx_PLUS (SImode, pat, addend); ++ pat = gen_rtx_CONST (SImode, pat); ++ emit_insn (gen_sethi (tmp_reg, pat)); ++ emit_insn (gen_lo_sum (tmp_reg, tmp_reg, pat)); ++ x = gen_rtx_PLUS (Pmode, tmp_reg, tp_reg); ++ } ++ } ++ ++ return x; ++} ++ ++void ++nds32_expand_tls_move (rtx *operands) ++{ ++ rtx src = operands[1]; ++ rtx base, addend; ++ ++ if (CONSTANT_P (src)) ++ split_const (src, &base, &addend); ++ ++ if (SYMBOL_REF_TLS_MODEL (base) == TLS_MODEL_LOCAL_EXEC) ++ src = nds32_legitimize_tls_address (src); ++ else ++ { ++ src = nds32_legitimize_tls_address (base); ++ if (addend != const0_rtx) ++ { ++ src = gen_rtx_PLUS (SImode, src, addend); ++ src = force_operand (src, operands[0]); ++ } ++ } ++ ++ emit_move_insn (operands[0], src); + } + + void +@@ -2976,3 +3812,105 @@ + emit_move_insn (target, gen_rtx_fmt_ee (AND, mode, source, temp)); + } + } ++ ++/* Auxiliary functions for lwm/smw. */ ++bool ++nds32_valid_smw_lwm_base_p (rtx op) ++{ ++ rtx base_addr; ++ ++ if (!MEM_P (op)) ++ return false; ++ ++ base_addr = XEXP (op, 0); ++ ++ if (REG_P (base_addr)) ++ return true; ++ else ++ { ++ if (GET_CODE (base_addr) == POST_INC ++ && REG_P (XEXP (base_addr, 0))) ++ return true; ++ } ++ ++ return false; ++} ++ ++/* Auxiliary functions for manipulation DI mode. */ ++rtx nds32_di_high_part_subreg(rtx reg) ++{ ++ unsigned high_part_offset = subreg_highpart_offset (SImode, DImode); ++ ++ return simplify_gen_subreg ( ++ SImode, reg, ++ DImode, high_part_offset); ++} ++ ++rtx nds32_di_low_part_subreg(rtx reg) ++{ ++ unsigned low_part_offset = subreg_lowpart_offset (SImode, DImode); ++ ++ return simplify_gen_subreg ( ++ SImode, reg, ++ DImode, low_part_offset); ++} ++ ++/* ------------------------------------------------------------------------ */ ++ ++/* Auxiliary function for output TLS patterns. */ ++ ++const char * ++nds32_output_tls_desc (rtx *operands) ++{ ++ char pattern[1000]; ++ ++ if (TARGET_RELAX_HINT) ++ snprintf (pattern, sizeof (pattern), ++ ".relax_hint %%1\n\tsethi $r0, hi20(%%0)\n\t" ++ ".relax_hint %%1\n\tori $r0, $r0, lo12(%%0)\n\t" ++ ".relax_hint %%1\n\tlw $r15, [$r0 + $gp]\n\t" ++ ".relax_hint %%1\n\tadd $r0, $r0, $gp\n\t" ++ ".relax_hint %%1\n\tjral $r15"); ++ else ++ snprintf (pattern, sizeof (pattern), ++ "sethi $r0, hi20(%%0)\n\t" ++ "ori $r0, $r0, lo12(%%0)\n\t" ++ "lw $r15, [$r0 + $gp]\n\t" ++ "add $r0, $r0, $gp\n\t" ++ "jral $r15"); ++ output_asm_insn (pattern, operands); ++ return ""; ++} ++ ++const char * ++nds32_output_tls_ie (rtx *operands) ++{ ++ char pattern[1000]; ++ ++ if (flag_pic) ++ { ++ if (TARGET_RELAX_HINT) ++ snprintf (pattern, sizeof (pattern), ++ ".relax_hint %%2\n\tsethi %%0, hi20(%%1)\n\t" ++ ".relax_hint %%2\n\tori %%0, %%0, lo12(%%1)\n\t" ++ ".relax_hint %%2\n\tlw %%0, [%%0 + $gp]"); ++ else ++ snprintf (pattern, sizeof (pattern), ++ "sethi %%0, hi20(%%1)\n\t" ++ "ori %%0, %%0, lo12(%%1)\n\t" ++ "lw %%0, [%%0 + $gp]"); ++ } ++ else ++ { ++ if (TARGET_RELAX_HINT) ++ snprintf (pattern, sizeof (pattern), ++ ".relax_hint %%2\n\tsethi %%0, hi20(%%1)\n\t" ++ ".relax_hint %%2\n\tlwi %%0, [%%0 + lo12(%%1)]"); ++ else ++ snprintf (pattern, sizeof (pattern), ++ "sethi %%0, hi20(%%1)\n\t" ++ "lwi %%0, [%%0 + lo12(%%1)]"); ++ } ++ output_asm_insn (pattern, operands); ++ return ""; ++} +diff -urN gcc-8.2.0.orig/gcc/config/nds32/nds32-memory-manipulation.c gcc-8.2.0/gcc/config/nds32/nds32-memory-manipulation.c +--- gcc-8.2.0.orig/gcc/config/nds32/nds32-memory-manipulation.c 2018-03-11 09:24:33.000000000 +0100 ++++ gcc-8.2.0/gcc/config/nds32/nds32-memory-manipulation.c 2019-01-25 15:38:32.829242659 +0100 +@@ -257,8 +257,124 @@ + nds32_expand_movmemsi_loop_known_size (rtx dstmem, rtx srcmem, + rtx size, rtx alignment) + { +- return nds32_expand_movmemsi_loop_unknown_size (dstmem, srcmem, +- size, alignment); ++ rtx dst_base_reg, src_base_reg; ++ rtx dst_itr, src_itr; ++ rtx dstmem_m, srcmem_m, dst_itr_m, src_itr_m; ++ rtx dst_end; ++ rtx double_word_mode_loop, byte_mode_loop; ++ rtx tmp; ++ int start_regno; ++ bool align_to_4_bytes = (INTVAL (alignment) & 3) == 0; ++ unsigned HOST_WIDE_INT total_bytes = UINTVAL (size); ++ ++ if (TARGET_ISA_V3M && !align_to_4_bytes) ++ return 0; ++ ++ if (TARGET_REDUCED_REGS) ++ start_regno = 2; ++ else ++ start_regno = 16; ++ ++ dst_itr = gen_reg_rtx (Pmode); ++ src_itr = gen_reg_rtx (Pmode); ++ dst_end = gen_reg_rtx (Pmode); ++ tmp = gen_reg_rtx (QImode); ++ ++ double_word_mode_loop = gen_label_rtx (); ++ byte_mode_loop = gen_label_rtx (); ++ ++ dst_base_reg = copy_to_mode_reg (Pmode, XEXP (dstmem, 0)); ++ src_base_reg = copy_to_mode_reg (Pmode, XEXP (srcmem, 0)); ++ ++ if (total_bytes < 8) ++ { ++ /* Emit total_bytes less than 8 loop version of movmem. ++ add $dst_end, $dst, $size ++ move $dst_itr, $dst ++ .Lbyte_mode_loop: ++ lbi.bi $tmp, [$src_itr], #1 ++ sbi.bi $tmp, [$dst_itr], #1 ++ ! Not readch upper bound. Loop. ++ bne $dst_itr, $dst_end, .Lbyte_mode_loop */ ++ ++ /* add $dst_end, $dst, $size */ ++ dst_end = expand_binop (Pmode, add_optab, dst_base_reg, size, ++ NULL_RTX, 0, OPTAB_WIDEN); ++ /* move $dst_itr, $dst ++ move $src_itr, $src */ ++ emit_move_insn (dst_itr, dst_base_reg); ++ emit_move_insn (src_itr, src_base_reg); ++ ++ /* .Lbyte_mode_loop: */ ++ emit_label (byte_mode_loop); ++ ++ /* lbi.bi $tmp, [$src_itr], #1 */ ++ nds32_emit_post_inc_load_store (tmp, src_itr, QImode, true); ++ ++ /* sbi.bi $tmp, [$dst_itr], #1 */ ++ nds32_emit_post_inc_load_store (tmp, dst_itr, QImode, false); ++ /* ! Not readch upper bound. Loop. ++ bne $dst_itr, $dst_end, .Lbyte_mode_loop */ ++ emit_cmp_and_jump_insns (dst_itr, dst_end, NE, NULL, ++ SImode, 1, byte_mode_loop); ++ return true; ++ } ++ else if (total_bytes % 8 == 0) ++ { ++ /* Emit multiple of 8 loop version of movmem. ++ ++ add $dst_end, $dst, $size ++ move $dst_itr, $dst ++ move $src_itr, $src ++ ++ .Ldouble_word_mode_loop: ++ lmw.bim $tmp-begin, [$src_itr], $tmp-end, #0 ! $src_itr' = $src_itr ++ smw.bim $tmp-begin, [$dst_itr], $tmp-end, #0 ! $dst_itr' = $dst_itr ++ ! move will delete after register allocation ++ move $src_itr, $src_itr' ++ move $dst_itr, $dst_itr' ++ ! Not readch upper bound. Loop. ++ bne $double_word_end, $dst_itr, .Ldouble_word_mode_loop */ ++ ++ /* add $dst_end, $dst, $size */ ++ dst_end = expand_binop (Pmode, add_optab, dst_base_reg, size, ++ NULL_RTX, 0, OPTAB_WIDEN); ++ ++ /* move $dst_itr, $dst ++ move $src_itr, $src */ ++ emit_move_insn (dst_itr, dst_base_reg); ++ emit_move_insn (src_itr, src_base_reg); ++ ++ /* .Ldouble_word_mode_loop: */ ++ emit_label (double_word_mode_loop); ++ /* lmw.bim $tmp-begin, [$src_itr], $tmp-end, #0 ! $src_itr' = $src_itr ++ smw.bim $tmp-begin, [$dst_itr], $tmp-end, #0 ! $dst_itr' = $dst_itr */ ++ src_itr_m = src_itr; ++ dst_itr_m = dst_itr; ++ srcmem_m = srcmem; ++ dstmem_m = dstmem; ++ nds32_emit_mem_move_block (start_regno, 2, ++ &dst_itr_m, &dstmem_m, ++ &src_itr_m, &srcmem_m, ++ true); ++ /* move $src_itr, $src_itr' ++ move $dst_itr, $dst_itr' */ ++ emit_move_insn (dst_itr, dst_itr_m); ++ emit_move_insn (src_itr, src_itr_m); ++ ++ /* ! Not readch upper bound. Loop. ++ bne $double_word_end, $dst_itr, .Ldouble_word_mode_loop */ ++ emit_cmp_and_jump_insns (dst_end, dst_itr, NE, NULL, ++ Pmode, 1, double_word_mode_loop); ++ } ++ else ++ { ++ /* Handle size greater than 8, and not a multiple of 8. */ ++ return nds32_expand_movmemsi_loop_unknown_size (dstmem, srcmem, ++ size, alignment); ++ } ++ ++ return true; + } + + static bool +@@ -433,10 +549,8 @@ + /* Auxiliary function for expand setmem pattern. */ + + static rtx +-nds32_gen_dup_4_byte_to_word_value (rtx value) ++nds32_gen_dup_4_byte_to_word_value_aux (rtx value, rtx value4word) + { +- rtx value4word = gen_reg_rtx (SImode); +- + gcc_assert (GET_MODE (value) == QImode || CONST_INT_P (value)); + + if (CONST_INT_P (value)) +@@ -449,36 +563,74 @@ + } + else + { +- /* ! prepare word +- andi $tmp1, $value, 0xff ! $tmp1 <- 0x000000ab +- slli $tmp2, $tmp1, 8 ! $tmp2 <- 0x0000ab00 +- or $tmp3, $tmp1, $tmp2 ! $tmp3 <- 0x0000abab +- slli $tmp4, $tmp3, 16 ! $tmp4 <- 0xabab0000 +- or $val4word, $tmp3, $tmp4 ! $value4word <- 0xabababab */ +- +- rtx tmp1, tmp2, tmp3, tmp4, final_value; +- tmp1 = expand_binop (SImode, and_optab, value, +- gen_int_mode (0xff, SImode), +- NULL_RTX, 0, OPTAB_WIDEN); +- tmp2 = expand_binop (SImode, ashl_optab, tmp1, +- gen_int_mode (8, SImode), +- NULL_RTX, 0, OPTAB_WIDEN); +- tmp3 = expand_binop (SImode, ior_optab, tmp1, tmp2, +- NULL_RTX, 0, OPTAB_WIDEN); +- tmp4 = expand_binop (SImode, ashl_optab, tmp3, +- gen_int_mode (16, SImode), +- NULL_RTX, 0, OPTAB_WIDEN); +- +- final_value = expand_binop (SImode, ior_optab, tmp3, tmp4, +- NULL_RTX, 0, OPTAB_WIDEN); +- emit_move_insn (value4word, final_value); ++ if (NDS32_EXT_DSP_P ()) ++ { ++ /* ! prepare word ++ insb $tmp, $value, 1 ! $tmp <- 0x0000abab ++ pkbb16 $tmp6, $tmp2, $tmp2 ! $value4word <- 0xabababab */ ++ rtx tmp = gen_reg_rtx (SImode); ++ ++ convert_move (tmp, value, true); ++ ++ emit_insn ( ++ gen_insvsi_internal (tmp, gen_int_mode (0x8, SImode), tmp)); ++ ++ emit_insn (gen_pkbbsi_1 (value4word, tmp, tmp)); ++ } ++ else ++ { ++ /* ! prepare word ++ andi $tmp1, $value, 0xff ! $tmp1 <- 0x000000ab ++ slli $tmp2, $tmp1, 8 ! $tmp2 <- 0x0000ab00 ++ or $tmp3, $tmp1, $tmp2 ! $tmp3 <- 0x0000abab ++ slli $tmp4, $tmp3, 16 ! $tmp4 <- 0xabab0000 ++ or $val4word, $tmp3, $tmp4 ! $value4word <- 0xabababab */ ++ ++ rtx tmp1, tmp2, tmp3, tmp4; ++ tmp1 = expand_binop (SImode, and_optab, value, ++ gen_int_mode (0xff, SImode), ++ NULL_RTX, 0, OPTAB_WIDEN); ++ tmp2 = expand_binop (SImode, ashl_optab, tmp1, ++ gen_int_mode (8, SImode), ++ NULL_RTX, 0, OPTAB_WIDEN); ++ tmp3 = expand_binop (SImode, ior_optab, tmp1, tmp2, ++ NULL_RTX, 0, OPTAB_WIDEN); ++ tmp4 = expand_binop (SImode, ashl_optab, tmp3, ++ gen_int_mode (16, SImode), ++ NULL_RTX, 0, OPTAB_WIDEN); ++ ++ emit_insn (gen_iorsi3 (value4word, tmp3, tmp4)); ++ } + } + + return value4word; + } + + static rtx +-emit_setmem_word_loop (rtx itr, rtx size, rtx value) ++nds32_gen_dup_4_byte_to_word_value (rtx value) ++{ ++ rtx value4word = gen_reg_rtx (SImode); ++ nds32_gen_dup_4_byte_to_word_value_aux (value, value4word); ++ ++ return value4word; ++} ++ ++static rtx ++nds32_gen_dup_8_byte_to_double_word_value (rtx value) ++{ ++ rtx value4doubleword = gen_reg_rtx (DImode); ++ ++ nds32_gen_dup_4_byte_to_word_value_aux ( ++ value, nds32_di_low_part_subreg(value4doubleword)); ++ ++ emit_move_insn (nds32_di_high_part_subreg(value4doubleword), ++ nds32_di_low_part_subreg(value4doubleword)); ++ return value4doubleword; ++} ++ ++ ++static rtx ++emit_setmem_doubleword_loop (rtx itr, rtx size, rtx value) + { + rtx word_mode_label = gen_label_rtx (); + rtx word_mode_end_label = gen_label_rtx (); +@@ -487,9 +639,9 @@ + rtx word_mode_end = gen_reg_rtx (SImode); + rtx size_for_word = gen_reg_rtx (SImode); + +- /* and $size_for_word, $size, #~3 */ ++ /* and $size_for_word, $size, #~0x7 */ + size_for_word = expand_binop (SImode, and_optab, size, +- gen_int_mode (~3, SImode), ++ gen_int_mode (~0x7, SImode), + NULL_RTX, 0, OPTAB_WIDEN); + + emit_move_insn (byte_mode_size, size); +@@ -501,8 +653,8 @@ + word_mode_end = expand_binop (Pmode, add_optab, itr, size_for_word, + NULL_RTX, 0, OPTAB_WIDEN); + +- /* andi $byte_mode_size, $size, 3 */ +- byte_mode_size_tmp = expand_binop (SImode, and_optab, size, GEN_INT (3), ++ /* andi $byte_mode_size, $size, 0x7 */ ++ byte_mode_size_tmp = expand_binop (SImode, and_optab, size, GEN_INT (0x7), + NULL_RTX, 0, OPTAB_WIDEN); + + emit_move_insn (byte_mode_size, byte_mode_size_tmp); +@@ -512,9 +664,9 @@ + /* ! word-mode set loop + smw.bim $value4word, [$dst_itr], $value4word, 0 + bne $word_mode_end, $dst_itr, .Lword_mode */ +- emit_insn (gen_unaligned_store_update_base_w (itr, +- itr, +- value)); ++ emit_insn (gen_unaligned_store_update_base_dw (itr, ++ itr, ++ value)); + emit_cmp_and_jump_insns (word_mode_end, itr, NE, NULL, + Pmode, 1, word_mode_label); + +@@ -566,7 +718,7 @@ + static bool + nds32_expand_setmem_loop (rtx dstmem, rtx size, rtx value) + { +- rtx value4word; ++ rtx value4doubleword; + rtx value4byte; + rtx dst; + rtx byte_mode_size; +@@ -609,7 +761,7 @@ + or $tmp3, $tmp1, $tmp2 ! $tmp3 <- 0x0000abab + slli $tmp4, $tmp3, 16 ! $tmp4 <- 0xabab0000 + or $val4word, $tmp3, $tmp4 ! $value4word <- 0xabababab */ +- value4word = nds32_gen_dup_4_byte_to_word_value (value); ++ value4doubleword = nds32_gen_dup_8_byte_to_double_word_value (value); + + /* and $size_for_word, $size, #-4 + beqz $size_for_word, .Lword_mode_end +@@ -622,7 +774,7 @@ + smw.bim $value4word, [$dst], $value4word, 0 + bne $word_mode_end, $dst, .Lword_mode + .Lword_mode_end: */ +- byte_mode_size = emit_setmem_word_loop (dst, size, value4word); ++ byte_mode_size = emit_setmem_doubleword_loop (dst, size, value4doubleword); + + /* beqz $byte_mode_size, .Lend + add $byte_mode_end, $dst, $byte_mode_size +@@ -633,8 +785,8 @@ + bne $byte_mode_end, $dst, .Lbyte_mode + .Lend: */ + +- value4byte = simplify_gen_subreg (QImode, value4word, SImode, +- subreg_lowpart_offset (QImode, SImode)); ++ value4byte = simplify_gen_subreg (QImode, value4doubleword, DImode, ++ subreg_lowpart_offset (QImode, DImode)); + + emit_setmem_byte_loop (dst, byte_mode_size, value4byte, false); + +@@ -651,14 +803,15 @@ + rtx byte_loop_size = gen_reg_rtx (SImode); + rtx remain_size = gen_reg_rtx (SImode); + rtx new_base_reg; +- rtx value4byte, value4word; ++ rtx value4byte, value4doubleword; + rtx byte_mode_size; + rtx last_byte_loop_label = gen_label_rtx (); + + size = force_reg (SImode, size); + +- value4word = nds32_gen_dup_4_byte_to_word_value (value); +- value4byte = simplify_gen_subreg (QImode, value4word, SImode, 0); ++ value4doubleword = nds32_gen_dup_8_byte_to_double_word_value (value); ++ value4byte = simplify_gen_subreg (QImode, value4doubleword, DImode, ++ subreg_lowpart_offset (QImode, DImode)); + + emit_move_insn (byte_loop_size, size); + emit_move_insn (byte_loop_base, base_reg); +@@ -686,9 +839,9 @@ + emit_insn (gen_subsi3 (remain_size, size, need_align_bytes)); + + /* Set memory word by word. */ +- byte_mode_size = emit_setmem_word_loop (new_base_reg, +- remain_size, +- value4word); ++ byte_mode_size = emit_setmem_doubleword_loop (new_base_reg, ++ remain_size, ++ value4doubleword); + + emit_move_insn (byte_loop_base, new_base_reg); + emit_move_insn (byte_loop_size, byte_mode_size); +diff -urN gcc-8.2.0.orig/gcc/config/nds32/nds32-multiple.md gcc-8.2.0/gcc/config/nds32/nds32-multiple.md +--- gcc-8.2.0.orig/gcc/config/nds32/nds32-multiple.md 2018-03-11 09:24:33.000000000 +0100 ++++ gcc-8.2.0/gcc/config/nds32/nds32-multiple.md 2019-01-25 15:38:32.829242659 +0100 +@@ -2854,6 +2854,25 @@ + (set_attr "length" "4")] + ) + ++(define_expand "unaligned_store_update_base_dw" ++ [(parallel [(set (match_operand:SI 0 "register_operand" "=r") ++ (plus:SI (match_operand:SI 1 "register_operand" "0") (const_int 8))) ++ (set (mem:DI (match_dup 1)) ++ (unspec:DI [(match_operand:DI 2 "register_operand" "r")] UNSPEC_UASTORE_DW))])] ++ "" ++{ ++ /* DO NOT emit unaligned_store_w_m immediately since web pass don't ++ recognize post_inc, try it again after GCC 5.0. ++ REF: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63156 */ ++ emit_insn (gen_unaligned_store_dw (gen_rtx_MEM (DImode, operands[1]), operands[2])); ++ emit_insn (gen_addsi3 (operands[0], operands[1], gen_int_mode (8, Pmode))); ++ DONE; ++} ++ [(set_attr "type" "store_multiple") ++ (set_attr "combo" "2") ++ (set_attr "length" "4")] ++) ++ + (define_insn "*stmsi25" + [(match_parallel 0 "nds32_store_multiple_operation" + [(set (mem:SI (match_operand:SI 1 "register_operand" "r")) +diff -urN gcc-8.2.0.orig/gcc/config/nds32/nds32-n10.md gcc-8.2.0/gcc/config/nds32/nds32-n10.md +--- gcc-8.2.0.orig/gcc/config/nds32/nds32-n10.md 1970-01-01 01:00:00.000000000 +0100 ++++ gcc-8.2.0/gcc/config/nds32/nds32-n10.md 2019-01-25 15:38:32.829242659 +0100 +@@ -0,0 +1,439 @@ ++;; Pipeline descriptions of Andes NDS32 cpu for GNU compiler ++;; Copyright (C) 2012-2018 Free Software Foundation, Inc. ++;; Contributed by Andes Technology Corporation. ++;; ++;; This file is part of GCC. ++;; ++;; GCC is free software; you can redistribute it and/or modify it ++;; under the terms of the GNU General Public License as published ++;; by the Free Software Foundation; either version 3, or (at your ++;; option) any later version. ++;; ++;; GCC is distributed in the hope that it will be useful, but WITHOUT ++;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ++;; License for more details. ++;; ++;; You should have received a copy of the GNU General Public License ++;; along with GCC; see the file COPYING3. If not see ++;; <http://www.gnu.org/licenses/>. ++ ++ ++;; ------------------------------------------------------------------------ ++;; Define N10 pipeline settings. ++;; ------------------------------------------------------------------------ ++ ++(define_automaton "nds32_n10_machine") ++ ++;; ------------------------------------------------------------------------ ++;; Pipeline Stages ++;; ------------------------------------------------------------------------ ++;; IF - Instruction Fetch ++;; II - Instruction Issue / Instruction Decode ++;; EX - Instruction Execution ++;; MM - Memory Execution ++;; WB - Instruction Retire / Result Write-Back ++ ++(define_cpu_unit "n10_ii" "nds32_n10_machine") ++(define_cpu_unit "n10_ex" "nds32_n10_machine") ++(define_cpu_unit "n10_mm" "nds32_n10_machine") ++(define_cpu_unit "n10_wb" "nds32_n10_machine") ++(define_cpu_unit "n10f_iq" "nds32_n10_machine") ++(define_cpu_unit "n10f_rf" "nds32_n10_machine") ++(define_cpu_unit "n10f_e1" "nds32_n10_machine") ++(define_cpu_unit "n10f_e2" "nds32_n10_machine") ++(define_cpu_unit "n10f_e3" "nds32_n10_machine") ++(define_cpu_unit "n10f_e4" "nds32_n10_machine") ++ ++(define_insn_reservation "nds_n10_unknown" 1 ++ (and (eq_attr "type" "unknown") ++ (eq_attr "pipeline_model" "n10")) ++ "n10_ii, n10_ex, n10_mm, n10_wb") ++ ++(define_insn_reservation "nds_n10_misc" 1 ++ (and (eq_attr "type" "misc") ++ (eq_attr "pipeline_model" "n10")) ++ "n10_ii, n10_ex, n10_mm, n10_wb") ++ ++(define_insn_reservation "nds_n10_mmu" 1 ++ (and (eq_attr "type" "mmu") ++ (eq_attr "pipeline_model" "n10")) ++ "n10_ii, n10_ex, n10_mm, n10_wb") ++ ++(define_insn_reservation "nds_n10_alu" 1 ++ (and (eq_attr "type" "alu") ++ (eq_attr "pipeline_model" "n10")) ++ "n10_ii, n10_ex, n10_mm, n10_wb") ++ ++(define_insn_reservation "nds_n10_alu_shift" 1 ++ (and (eq_attr "type" "alu_shift") ++ (eq_attr "pipeline_model" "n10")) ++ "n10_ii, n10_ii+n10_ex, n10_ex+n10_mm, n10_mm+n10_wb, n10_wb") ++ ++(define_insn_reservation "nds_n10_pbsad" 1 ++ (and (eq_attr "type" "pbsad") ++ (eq_attr "pipeline_model" "n10")) ++ "n10_ii, n10_ex*3, n10_mm, n10_wb") ++ ++(define_insn_reservation "nds_n10_pbsada" 1 ++ (and (eq_attr "type" "pbsada") ++ (eq_attr "pipeline_model" "n10")) ++ "n10_ii, n10_ex*3, n10_mm, n10_wb") ++ ++(define_insn_reservation "nds_n10_load" 1 ++ (and (match_test "nds32::load_single_p (insn)") ++ (eq_attr "pipeline_model" "n10")) ++ "n10_ii, n10_ex, n10_mm, n10_wb") ++ ++(define_insn_reservation "nds_n10_store" 1 ++ (and (match_test "nds32::store_single_p (insn)") ++ (eq_attr "pipeline_model" "n10")) ++ "n10_ii, n10_ex, n10_mm, n10_wb") ++ ++(define_insn_reservation "nds_n10_load_multiple_1" 1 ++ (and (eq_attr "pipeline_model" "n10") ++ (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "1"))) ++ "n10_ii, n10_ex, n10_mm, n10_wb") ++ ++(define_insn_reservation "nds_n10_load_multiple_2" 1 ++ (and (eq_attr "pipeline_model" "n10") ++ (ior (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "2")) ++ (match_test "nds32::load_double_p (insn)"))) ++ "n10_ii, n10_ii+n10_ex, n10_ex+n10_mm, n10_mm+n10_wb, n10_wb") ++ ++(define_insn_reservation "nds_n10_load_multiple_3" 1 ++ (and (eq_attr "pipeline_model" "n10") ++ (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "3"))) ++ "n10_ii, n10_ii+n10_ex, n10_ii+n10_ex+n10_mm, n10_ex+n10_mm+n10_wb, n10_mm+n10_wb, n10_wb") ++ ++(define_insn_reservation "nds_n10_load_multiple_4" 1 ++ (and (eq_attr "pipeline_model" "n10") ++ (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "4"))) ++ "n10_ii, n10_ii+n10_ex, n10_ii+n10_ex+n10_mm, n10_ii+n10_ex+n10_mm+n10_wb, n10_ex+n10_mm+n10_wb, n10_mm+n10_wb, n10_wb") ++ ++(define_insn_reservation "nds_n10_load_multiple_5" 1 ++ (and (eq_attr "pipeline_model" "n10") ++ (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "5"))) ++ "n10_ii, n10_ii+n10_ex, n10_ii+n10_ex+n10_mm, (n10_ii+n10_ex+n10_mm+n10_wb)*2, n10_ex+n10_mm+n10_wb, n10_mm+n10_wb, n10_wb") ++ ++(define_insn_reservation "nds_n10_load_multiple_6" 1 ++ (and (eq_attr "pipeline_model" "n10") ++ (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "6"))) ++ "n10_ii, n10_ii+n10_ex, n10_ii+n10_ex+n10_mm, (n10_ii+n10_ex+n10_mm+n10_wb)*3, n10_ex+n10_mm+n10_wb, n10_mm+n10_wb, n10_wb") ++ ++(define_insn_reservation "nds_n10_load_multiple_7" 1 ++ (and (eq_attr "pipeline_model" "n10") ++ (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "7"))) ++ "n10_ii, n10_ii+n10_ex, n10_ii+n10_ex+n10_mm, (n10_ii+n10_ex+n10_mm+n10_wb)*4, n10_ex+n10_mm+n10_wb, n10_mm+n10_wb, n10_wb") ++ ++(define_insn_reservation "nds_n10_load_multiple_N" 1 ++ (and (eq_attr "pipeline_model" "n10") ++ (and (eq_attr "type" "load_multiple") ++ (match_test "get_attr_combo (insn) >= 8"))) ++ "n10_ii, n10_ii+n10_ex, n10_ii+n10_ex+n10_mm, (n10_ii+n10_ex+n10_mm+n10_wb)*5, n10_ex+n10_mm+n10_wb, n10_mm+n10_wb, n10_wb") ++ ++(define_insn_reservation "nds_n10_store_multiple_1" 1 ++ (and (eq_attr "pipeline_model" "n10") ++ (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "1"))) ++ "n10_ii, n10_ex, n10_mm, n10_wb") ++ ++(define_insn_reservation "nds_n10_store_multiple_2" 1 ++ (and (eq_attr "pipeline_model" "n10") ++ (ior (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "2")) ++ (match_test "nds32::store_double_p (insn)"))) ++ "n10_ii, n10_ii+n10_ex, n10_ex+n10_mm, n10_mm+n10_wb, n10_wb") ++ ++(define_insn_reservation "nds_n10_store_multiple_3" 1 ++ (and (eq_attr "pipeline_model" "n10") ++ (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "3"))) ++ "n10_ii, n10_ii+n10_ex, n10_ii+n10_ex+n10_mm, n10_ex+n10_mm+n10_wb, n10_mm+n10_wb, n10_wb") ++ ++(define_insn_reservation "nds_n10_store_multiple_4" 1 ++ (and (eq_attr "pipeline_model" "n10") ++ (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "4"))) ++ "n10_ii, n10_ii+n10_ex, n10_ii+n10_ex+n10_mm, n10_ii+n10_ex+n10_mm+n10_wb, n10_ex+n10_mm+n10_wb, n10_mm+n10_wb, n10_wb") ++ ++(define_insn_reservation "nds_n10_store_multiple_5" 1 ++ (and (eq_attr "pipeline_model" "n10") ++ (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "5"))) ++ "n10_ii, n10_ii+n10_ex, n10_ii+n10_ex+n10_mm, (n10_ii+n10_ex+n10_mm+n10_wb)*2, n10_ex+n10_mm+n10_wb, n10_mm+n10_wb, n10_wb") ++ ++(define_insn_reservation "nds_n10_store_multiple_6" 1 ++ (and (eq_attr "pipeline_model" "n10") ++ (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "6"))) ++ "n10_ii, n10_ii+n10_ex, n10_ii+n10_ex+n10_mm, (n10_ii+n10_ex+n10_mm+n10_wb)*3, n10_ex+n10_mm+n10_wb, n10_mm+n10_wb, n10_wb") ++ ++(define_insn_reservation "nds_n10_store_multiple_7" 1 ++ (and (eq_attr "pipeline_model" "n10") ++ (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "7"))) ++ "n10_ii, n10_ii+n10_ex, n10_ii+n10_ex+n10_mm, (n10_ii+n10_ex+n10_mm+n10_wb)*4, n10_ex+n10_mm+n10_wb, n10_mm+n10_wb, n10_wb") ++ ++(define_insn_reservation "nds_n10_store_multiple_N" 1 ++ (and (eq_attr "pipeline_model" "n10") ++ (and (eq_attr "type" "store_multiple") ++ (match_test "get_attr_combo (insn) >= 8"))) ++ "n10_ii, n10_ii+n10_ex, n10_ii+n10_ex+n10_mm, (n10_ii+n10_ex+n10_mm+n10_wb)*5, n10_ex+n10_mm+n10_wb, n10_mm+n10_wb, n10_wb") ++ ++(define_insn_reservation "nds_n10_mul" 1 ++ (and (eq_attr "type" "mul") ++ (eq_attr "pipeline_model" "n10")) ++ "n10_ii, n10_ex, n10_mm, n10_wb") ++ ++(define_insn_reservation "nds_n10_mac" 1 ++ (and (eq_attr "type" "mac") ++ (eq_attr "pipeline_model" "n10")) ++ "n10_ii, n10_ex, n10_mm, n10_wb") ++ ++(define_insn_reservation "nds_n10_div" 1 ++ (and (eq_attr "type" "div") ++ (eq_attr "pipeline_model" "n10")) ++ "n10_ii, n10_ex*34, n10_mm, n10_wb") ++ ++(define_insn_reservation "nds_n10_branch" 1 ++ (and (eq_attr "type" "branch") ++ (eq_attr "pipeline_model" "n10")) ++ "n10_ii, n10_ex, n10_mm, n10_wb") ++ ++(define_insn_reservation "nds_n10_dsp_alu" 1 ++ (and (eq_attr "type" "dalu") ++ (eq_attr "pipeline_model" "n10")) ++ "n10_ii, n10_ex, n10_mm, n10_wb") ++ ++(define_insn_reservation "nds_n10_dsp_alu64" 1 ++ (and (eq_attr "type" "dalu64") ++ (eq_attr "pipeline_model" "n10")) ++ "n10_ii, n10_ex, n10_mm, n10_wb") ++ ++(define_insn_reservation "nds_n10_dsp_alu_round" 1 ++ (and (eq_attr "type" "daluround") ++ (eq_attr "pipeline_model" "n10")) ++ "n10_ii, n10_ex, n10_mm, n10_wb") ++ ++(define_insn_reservation "nds_n10_dsp_cmp" 1 ++ (and (eq_attr "type" "dcmp") ++ (eq_attr "pipeline_model" "n10")) ++ "n10_ii, n10_ex, n10_mm, n10_wb") ++ ++(define_insn_reservation "nds_n10_dsp_clip" 1 ++ (and (eq_attr "type" "dclip") ++ (eq_attr "pipeline_model" "n10")) ++ "n10_ii, n10_ex, n10_mm, n10_wb") ++ ++(define_insn_reservation "nds_n10_dsp_mul" 1 ++ (and (eq_attr "type" "dmul") ++ (eq_attr "pipeline_model" "n10")) ++ "n10_ii, n10_ex, n10_mm, n10_wb") ++ ++(define_insn_reservation "nds_n10_dsp_mac" 1 ++ (and (eq_attr "type" "dmac") ++ (eq_attr "pipeline_model" "n10")) ++ "n10_ii, n10_ex, n10_mm, n10_wb") ++ ++(define_insn_reservation "nds_n10_dsp_insb" 1 ++ (and (eq_attr "type" "dinsb") ++ (eq_attr "pipeline_model" "n10")) ++ "n10_ii, n10_ex, n10_mm, n10_wb") ++ ++(define_insn_reservation "nds_n10_dsp_pack" 1 ++ (and (eq_attr "type" "dpack") ++ (eq_attr "pipeline_model" "n10")) ++ "n10_ii, n10_ex, n10_mm, n10_wb") ++ ++(define_insn_reservation "nds_n10_dsp_bpick" 1 ++ (and (eq_attr "type" "dbpick") ++ (eq_attr "pipeline_model" "n10")) ++ "n10_ii, n10_ex, n10_mm, n10_wb") ++ ++(define_insn_reservation "nds_n10_dsp_wext" 1 ++ (and (eq_attr "type" "dwext") ++ (eq_attr "pipeline_model" "n10")) ++ "n10_ii, n10_ex, n10_mm, n10_wb") ++ ++(define_insn_reservation "nds_n10_fpu_alu" 4 ++ (and (eq_attr "type" "falu") ++ (eq_attr "pipeline_model" "n10")) ++ "n10_ii, n10f_iq, n10f_rf, n10f_e1, n10f_e2, n10f_e3, n10f_e4") ++ ++(define_insn_reservation "nds_n10_fpu_muls" 4 ++ (and (eq_attr "type" "fmuls") ++ (eq_attr "pipeline_model" "n10")) ++ "n10_ii, n10f_iq, n10f_rf, n10f_e1, n10f_e2, n10f_e3, n10f_e4") ++ ++(define_insn_reservation "nds_n10_fpu_muld" 4 ++ (and (eq_attr "type" "fmuld") ++ (eq_attr "pipeline_model" "n10")) ++ "n10_ii, n10f_iq, n10f_rf, n10f_e1, n10f_e2*2, n10f_e3, n10f_e4") ++ ++(define_insn_reservation "nds_n10_fpu_macs" 4 ++ (and (eq_attr "type" "fmacs") ++ (eq_attr "pipeline_model" "n10")) ++ "n10_ii, n10f_iq, n10f_rf, n10f_e1, n10f_e2*3, n10f_e3, n10f_e4") ++ ++(define_insn_reservation "nds_n10_fpu_macd" 4 ++ (and (eq_attr "type" "fmacd") ++ (eq_attr "pipeline_model" "n10")) ++ "n10_ii, n10f_iq, n10f_rf, n10f_e1, n10f_e2*4, n10f_e3, n10f_e4") ++ ++(define_insn_reservation "nds_n10_fpu_divs" 4 ++ (and (ior (eq_attr "type" "fdivs") ++ (eq_attr "type" "fsqrts")) ++ (eq_attr "pipeline_model" "n10")) ++ "n10_ii, n10f_iq, n10f_rf, n10f_e1, n10f_e2*14, n10f_e3, n10f_e4") ++ ++(define_insn_reservation "nds_n10_fpu_divd" 4 ++ (and (ior (eq_attr "type" "fdivd") ++ (eq_attr "type" "fsqrtd")) ++ (eq_attr "pipeline_model" "n10")) ++ "n10_ii, n10f_iq, n10f_rf, n10f_e1, n10f_e2*28, n10f_e3, n10f_e4") ++ ++(define_insn_reservation "nds_n10_fpu_fast_alu" 2 ++ (and (ior (eq_attr "type" "fcmp") ++ (ior (eq_attr "type" "fabs") ++ (ior (eq_attr "type" "fcpy") ++ (eq_attr "type" "fcmov")))) ++ (eq_attr "pipeline_model" "n10")) ++ "n10_ii, n10f_iq, n10f_rf, n10f_e1, n10f_e2, n10f_e3, n10f_e4") ++ ++(define_insn_reservation "nds_n10_fpu_fmtsr" 4 ++ (and (eq_attr "type" "fmtsr") ++ (eq_attr "pipeline_model" "n10")) ++ "n10_ii, n10f_iq, n10f_rf, n10f_e1, n10f_e2, n10f_e3, n10f_e4") ++ ++(define_insn_reservation "nds_n10_fpu_fmtdr" 4 ++ (and (eq_attr "type" "fmtdr") ++ (eq_attr "pipeline_model" "n10")) ++ "n10_ii, n10_ii+n10f_iq, n10f_iq+n10f_rf, n10f_rf+n10f_e1, n10f_e1+n10f_e2, n10f_e2+n10f_e3, n10f_e3+n10f_e4, n10f_e4") ++ ++(define_insn_reservation "nds_n10_fpu_fmfsr" 2 ++ (and (eq_attr "type" "fmfsr") ++ (eq_attr "pipeline_model" "n10")) ++ "n10_ii, n10f_iq, n10f_rf, n10f_e1, n10f_e2, n10f_e3, n10f_e4") ++ ++(define_insn_reservation "nds_n10_fpu_fmfdr" 2 ++ (and (eq_attr "type" "fmfdr") ++ (eq_attr "pipeline_model" "n10")) ++ "n10_ii, n10_ii+n10f_iq, n10f_iq+n10f_rf, n10f_rf+n10f_e1, n10f_e1+n10f_e2, n10f_e2+n10f_e3, n10f_e3+n10f_e4, n10f_e4") ++ ++(define_insn_reservation "nds_n10_fpu_load" 3 ++ (and (eq_attr "type" "fload") ++ (eq_attr "pipeline_model" "n10")) ++ "n10_ii, n10f_iq, n10f_rf, n10f_e1, n10f_e2, n10f_e3, n10f_e4") ++ ++(define_insn_reservation "nds_n10_fpu_store" 1 ++ (and (eq_attr "type" "fstore") ++ (eq_attr "pipeline_model" "n10")) ++ "n10_ii, n10f_iq, n10f_rf, n10f_e1, n10f_e2, n10f_e3, n10f_e4") ++ ++;; ------------------------------------------------------------------------ ++;; Comment Notations and Bypass Rules ++;; ------------------------------------------------------------------------ ++;; Producers (LHS) ++;; LD ++;; Load data from the memory and produce the loaded data. The result is ++;; ready at MM. ++;; LMW(N, M) ++;; There are N micro-operations within an instruction that loads multiple ++;; words. The result produced by the M-th micro-operation is sent to ++;; consumers. The result is ready at MM. ++;; MUL, MAC ++;; Compute data in the multiply-adder and produce the data. The result ++;; is ready at MM. ++;; DIV ++;; Compute data in the divider and produce the data. The result is ready ++;; at MM. ++;; ++;; Consumers (RHS) ++;; ALU, MOVD44, PBSAD, PBSADA_RaRb, MUL, MAC, DIV, MMU ++;; Require operands at EX. ++;; ALU_SHIFT_Rb ++;; An ALU-SHIFT instruction consists of a shift micro-operation followed ++;; by an arithmetic micro-operation. The operand Rb is used by the first ++;; micro-operation, and there are some latencies if data dependency occurs. ++;; MAC_RaRb ++;; A MAC instruction does multiplication at EX and does accumulation at MM, ++;; so the operand Rt is required at MM, and operands Ra and Rb are required ++;; at EX. ++;; ADDR_IN ++;; If an instruction requires an address as its input operand, the address ++;; is required at EX. ++;; ST ++;; A store instruction requires its data at MM. ++;; SMW(N, M) ++;; There are N micro-operations within an instruction that stores multiple ++;; words. Each M-th micro-operation requires its data at MM. ++;; BR ++;; If a branch instruction is conditional, its input data is required at EX. ++ ++;; FPU_ADDR_OUT -> FPU_ADDR_IN ++;; Main pipeline rules don't need this because those default latency is 1. ++(define_bypass 1 ++ "nds_n10_fpu_load, nds_n10_fpu_store" ++ "nds_n10_fpu_load, nds_n10_fpu_store" ++ "nds32_n10_ex_to_ex_p" ++) ++ ++;; LD, MUL, MAC, DIV, DALU64, DMUL, DMAC, DALUROUND, DBPICK, DWEXT ++;; -> ALU, ALU_SHIFT_Rb, PBSAD, PBSADA_RaRb, MOVD44, MUL, MAC_RaRb, DIV, ADDR_IN, BR, MMU, ++;; DALU, DALUROUND, DMUL, DMAC_RaRb, DPACK, DINSB, DCMP, DCLIP, WEXT_O, BPICK_RaRb ++(define_bypass 2 ++ "nds_n10_load, nds_n10_mul, nds_n10_mac, nds_n10_div,\ ++ nds_n10_dsp_alu64, nds_n10_dsp_mul, nds_n10_dsp_mac,\ ++ nds_n10_dsp_alu_round, nds_n10_dsp_bpick, nds_n10_dsp_wext" ++ "nds_n10_alu, nds_n10_alu_shift,\ ++ nds_n10_pbsad, nds_n10_pbsada,\ ++ nds_n10_mul, nds_n10_mac, nds_n10_div,\ ++ nds_n10_branch,\ ++ nds_n10_load, nds_n10_store,\ ++ nds_n10_load_multiple_1, nds_n10_load_multiple_2, nds_n10_load_multiple_3,\ ++ nds_n10_load_multiple_4, nds_n10_load_multiple_5, nds_n10_load_multiple_6,\ ++ nds_n10_load_multiple_7, nds_n10_load_multiple_N,\ ++ nds_n10_store_multiple_1, nds_n10_store_multiple_2, nds_n10_store_multiple_3,\ ++ nds_n10_store_multiple_4, nds_n10_store_multiple_5, nds_n10_store_multiple_6,\ ++ nds_n10_store_multiple_7, nds_n10_store_multiple_N,\ ++ nds_n10_mmu,\ ++ nds_n10_dsp_alu, nds_n10_dsp_alu_round,\ ++ nds_n10_dsp_mul, nds_n10_dsp_mac, nds_n10_dsp_pack,\ ++ nds_n10_dsp_insb, nds_n10_dsp_cmp, nds_n10_dsp_clip,\ ++ nds_n10_dsp_wext, nds_n10_dsp_bpick" ++ "nds32_n10_mm_to_ex_p" ++) ++ ++;; LMW(N, N) ++;; -> ALU, ALU_SHIFT_Rb, PBSAD, PBSADA_RaRb, MOVD44, MUL, MAC_RaRb, DIV, ADDR_IN, BR, MMU ++;; DALU, DALUROUND, DMUL, DMAC_RaRb, DPACK, DINSB, DCMP, DCLIP, WEXT_O, BPICK_RaRb ++(define_bypass 2 ++ "nds_n10_load_multiple_1, nds_n10_load_multiple_2, nds_n10_load_multiple_3,\ ++ nds_n10_load_multiple_4, nds_n10_load_multiple_5, nds_n10_load_multiple_6,\ ++ nds_n10_load_multiple_7, nds_n10_load_multiple_N" ++ "nds_n10_alu, nds_n10_alu_shift,\ ++ nds_n10_pbsad, nds_n10_pbsada,\ ++ nds_n10_mul, nds_n10_mac, nds_n10_div,\ ++ nds_n10_branch,\ ++ nds_n10_load, nds_n10_store,\ ++ nds_n10_load_multiple_1, nds_n10_load_multiple_2, nds_n10_load_multiple_3,\ ++ nds_n10_load_multiple_4, nds_n10_load_multiple_5, nds_n10_load_multiple_6,\ ++ nds_n10_load_multiple_7, nds_n10_load_multiple_N,\ ++ nds_n10_store_multiple_1, nds_n10_store_multiple_2, nds_n10_store_multiple_3,\ ++ nds_n10_store_multiple_4, nds_n10_store_multiple_5, nds_n10_store_multiple_6,\ ++ nds_n10_store_multiple_7, nds_n10_store_multiple_N,\ ++ nds_n10_mmu,\ ++ nds_n10_dsp_alu, nds_n10_dsp_alu_round,\ ++ nds_n10_dsp_mul, nds_n10_dsp_mac, nds_n10_dsp_pack,\ ++ nds_n10_dsp_insb, nds_n10_dsp_cmp, nds_n10_dsp_clip,\ ++ nds_n10_dsp_wext, nds_n10_dsp_bpick" ++ "nds32_n10_last_load_to_ex_p" ++) +diff -urN gcc-8.2.0.orig/gcc/config/nds32/nds32-n13.md gcc-8.2.0/gcc/config/nds32/nds32-n13.md +--- gcc-8.2.0.orig/gcc/config/nds32/nds32-n13.md 1970-01-01 01:00:00.000000000 +0100 ++++ gcc-8.2.0/gcc/config/nds32/nds32-n13.md 2019-01-25 15:38:32.829242659 +0100 +@@ -0,0 +1,401 @@ ++;; Pipeline descriptions of Andes NDS32 cpu for GNU compiler ++;; Copyright (C) 2012-2018 Free Software Foundation, Inc. ++;; Contributed by Andes Technology Corporation. ++;; ++;; This file is part of GCC. ++;; ++;; GCC is free software; you can redistribute it and/or modify it ++;; under the terms of the GNU General Public License as published ++;; by the Free Software Foundation; either version 3, or (at your ++;; option) any later version. ++;; ++;; GCC is distributed in the hope that it will be useful, but WITHOUT ++;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ++;; License for more details. ++;; ++;; You should have received a copy of the GNU General Public License ++;; along with GCC; see the file COPYING3. If not see ++;; <http://www.gnu.org/licenses/>. ++ ++ ++;; ------------------------------------------------------------------------ ++;; Define N13 pipeline settings. ++;; ------------------------------------------------------------------------ ++ ++(define_automaton "nds32_n13_machine") ++ ++;; ------------------------------------------------------------------------ ++;; Pipeline Stages ++;; ------------------------------------------------------------------------ ++;; F1 - Instruction Fetch First ++;; Instruction Tag/Data Arrays ++;; ITLB Address Translation ++;; Branch Target Buffer Prediction ++;; F2 - Instruction Fetch Second ++;; Instruction Cache Hit Detection ++;; Cache Way Selection ++;; Inustruction Alignment ++;; I1 - Instruction Issue First / Instruction Decode ++;; Instruction Cache Replay Triggering ++;; 32/16-Bit Instruction Decode ++;; Return Address Stack Prediction ++;; I2 - Instruction Issue Second / Register File Access ++;; Instruction Issue Logic ++;; Register File Access ++;; E1 - Instruction Execute First / Address Generation / MAC First ++;; Data Access Address generation ++;; Multiply Operation ++;; E2 - Instruction Execute Second / Data Access First / MAC Second / ++;; ALU Execute ++;; Skewed ALU ++;; Branch/Jump/Return Resolution ++;; Data Tag/Data arrays ++;; DTLB address translation ++;; Accumulation Operation ++;; E3 - Instruction Execute Third / Data Access Second ++;; Data Cache Hit Detection ++;; Cache Way Selection ++;; Data Alignment ++;; E4 - Instruction Execute Fourth / Write Back ++;; Interruption Resolution ++;; Instruction Retire ++;; Register File Write Back ++ ++(define_cpu_unit "n13_i1" "nds32_n13_machine") ++(define_cpu_unit "n13_i2" "nds32_n13_machine") ++(define_cpu_unit "n13_e1" "nds32_n13_machine") ++(define_cpu_unit "n13_e2" "nds32_n13_machine") ++(define_cpu_unit "n13_e3" "nds32_n13_machine") ++(define_cpu_unit "n13_e4" "nds32_n13_machine") ++ ++(define_insn_reservation "nds_n13_unknown" 1 ++ (and (eq_attr "type" "unknown") ++ (eq_attr "pipeline_model" "n13")) ++ "n13_i1, n13_i2, n13_e1, n13_e2, n13_e3, n13_e4") ++ ++(define_insn_reservation "nds_n13_misc" 1 ++ (and (eq_attr "type" "misc") ++ (eq_attr "pipeline_model" "n13")) ++ "n13_i1, n13_i2, n13_e1, n13_e2, n13_e3, n13_e4") ++ ++(define_insn_reservation "nds_n13_mmu" 1 ++ (and (eq_attr "type" "mmu") ++ (eq_attr "pipeline_model" "n13")) ++ "n13_i1, n13_i2, n13_e1, n13_e2, n13_e3, n13_e4") ++ ++(define_insn_reservation "nds_n13_alu" 1 ++ (and (eq_attr "type" "alu") ++ (eq_attr "pipeline_model" "n13")) ++ "n13_i1, n13_i2, n13_e1, n13_e2, n13_e3, n13_e4") ++ ++(define_insn_reservation "nds_n13_alu_shift" 1 ++ (and (eq_attr "type" "alu_shift") ++ (eq_attr "pipeline_model" "n13")) ++ "n13_i1, n13_i1+n13_i2, n13_i2+n13_e1, n13_e1+n13_e2, n13_e2+n13_e3, n13_e3+n13_e4, n13_e4") ++ ++(define_insn_reservation "nds_n13_pbsad" 1 ++ (and (eq_attr "type" "pbsad") ++ (eq_attr "pipeline_model" "n13")) ++ "n13_i1, n13_i2, n13_e1, n13_e2*2, n13_e3, n13_e4") ++ ++(define_insn_reservation "nds_n13_pbsada" 1 ++ (and (eq_attr "type" "pbsada") ++ (eq_attr "pipeline_model" "n13")) ++ "n13_i1, n13_i2, n13_e1, n13_e2*3, n13_e3, n13_e4") ++ ++(define_insn_reservation "nds_n13_load" 1 ++ (and (match_test "nds32::load_single_p (insn)") ++ (eq_attr "pipeline_model" "n13")) ++ "n13_i1, n13_i2, n13_e1, n13_e2, n13_e3, n13_e4") ++ ++(define_insn_reservation "nds_n13_store" 1 ++ (and (match_test "nds32::store_single_p (insn)") ++ (eq_attr "pipeline_model" "n13")) ++ "n13_i1, n13_i2, n13_e1, n13_e2, n13_e3, n13_e4") ++ ++(define_insn_reservation "nds_n13_load_multiple_1" 1 ++ (and (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "1")) ++ (eq_attr "pipeline_model" "n13")) ++ "n13_i1, n13_i2, n13_e1, n13_e2, n13_e3, n13_e4") ++ ++(define_insn_reservation "nds_n13_load_multiple_2" 1 ++ (and (ior (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "2")) ++ (match_test "nds32::load_double_p (insn)")) ++ (eq_attr "pipeline_model" "n13")) ++ "n13_i1, n13_i1+n13_i2, n13_i2+n13_e1, n13_e1+n13_e2, n13_e2+n13_e3, n13_e3+n13_e4, n13_e4") ++ ++(define_insn_reservation "nds_n13_load_multiple_3" 1 ++ (and (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "3")) ++ (eq_attr "pipeline_model" "n13")) ++ "n13_i1, n13_i2+n13_i2, n13_i1+n13_i2+n13_e1, n13_i2+n13_e1+n13_e2, n13_e1+n13_e2+n13_e3, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4") ++ ++(define_insn_reservation "nds_n13_load_multiple_4" 1 ++ (and (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "4")) ++ (eq_attr "pipeline_model" "n13")) ++ "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i2+n13_e1+n13_e2+n13_e3, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4") ++ ++(define_insn_reservation "nds_n13_load_multiple_5" 1 ++ (and (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "5")) ++ (eq_attr "pipeline_model" "n13")) ++ "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3, n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4") ++ ++(define_insn_reservation "nds_n13_load_multiple_6" 1 ++ (and (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "6")) ++ (eq_attr "pipeline_model" "n13")) ++ "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4") ++ ++(define_insn_reservation "nds_n13_load_multiple_7" 1 ++ (and (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "7")) ++ (eq_attr "pipeline_model" "n13")) ++ "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3, (n13_i1+n13_i2+n13_e1+n13_e2+n13_e3+n13_e4)*2, n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4") ++ ++(define_insn_reservation "nds_n13_load_multiple_8" 1 ++ (and (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "8")) ++ (eq_attr "pipeline_model" "n13")) ++ "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3, (n13_i1+n13_i2+n13_e1+n13_e2+n13_e3+n13_e4)*3, n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4") ++ ++(define_insn_reservation "nds_n13_load_multiple_12" 1 ++ (and (and (eq_attr "type" "load_multiple") ++ (eq_attr "combo" "12")) ++ (eq_attr "pipeline_model" "n13")) ++ "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3, (n13_i1+n13_i2+n13_e1+n13_e2+n13_e3+n13_e4)*7, n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4") ++ ++(define_insn_reservation "nds_n13_store_multiple_1" 1 ++ (and (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "1")) ++ (eq_attr "pipeline_model" "n13")) ++ "n13_i1, n13_i2, n13_e1, n13_e2, n13_e3, n13_e4") ++ ++(define_insn_reservation "nds_n13_store_multiple_2" 1 ++ (and (ior (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "2")) ++ (match_test "nds32::store_double_p (insn)")) ++ (eq_attr "pipeline_model" "n13")) ++ "n13_i1, n13_i1+n13_i2, n13_i2+n13_e1, n13_e1+n13_e2, n13_e2+n13_e3, n13_e3+n13_e4, n13_e4") ++ ++(define_insn_reservation "nds_n13_store_multiple_3" 1 ++ (and (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "3")) ++ (eq_attr "pipeline_model" "n13")) ++ "n13_i1, n13_i2+n13_i2, n13_i1+n13_i2+n13_e1, n13_i2+n13_e1+n13_e2, n13_e1+n13_e2+n13_e3, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4") ++ ++(define_insn_reservation "nds_n13_store_multiple_4" 1 ++ (and (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "4")) ++ (eq_attr "pipeline_model" "n13")) ++ "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i2+n13_e1+n13_e2+n13_e3, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4") ++ ++(define_insn_reservation "nds_n13_store_multiple_5" 1 ++ (and (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "5")) ++ (eq_attr "pipeline_model" "n13")) ++ "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3, n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4") ++ ++(define_insn_reservation "nds_n13_store_multiple_6" 1 ++ (and (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "6")) ++ (eq_attr "pipeline_model" "n13")) ++ "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4") ++ ++(define_insn_reservation "nds_n13_store_multiple_7" 1 ++ (and (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "7")) ++ (eq_attr "pipeline_model" "n13")) ++ "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3, (n13_i1+n13_i2+n13_e1+n13_e2+n13_e3+n13_e4)*2, n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4") ++ ++(define_insn_reservation "nds_n13_store_multiple_8" 1 ++ (and (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "8")) ++ (eq_attr "pipeline_model" "n13")) ++ "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3, (n13_i1+n13_i2+n13_e1+n13_e2+n13_e3+n13_e4)*3, n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4") ++ ++(define_insn_reservation "nds_n13_store_multiple_12" 1 ++ (and (and (eq_attr "type" "store_multiple") ++ (eq_attr "combo" "12")) ++ (eq_attr "pipeline_model" "n13")) ++ "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3, (n13_i1+n13_i2+n13_e1+n13_e2+n13_e3+n13_e4)*7, n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4") ++ ++;; The multiplier at E1 takes two cycles. ++(define_insn_reservation "nds_n13_mul" 1 ++ (and (eq_attr "type" "mul") ++ (eq_attr "pipeline_model" "n13")) ++ "n13_i1, n13_i2, n13_e1*2, n13_e2, n13_e3, n13_e4") ++ ++(define_insn_reservation "nds_n13_mac" 1 ++ (and (eq_attr "type" "mac") ++ (eq_attr "pipeline_model" "n13")) ++ "n13_i1, n13_i2, n13_e1*2, n13_e2, n13_e3, n13_e4") ++ ++;; The cycles consumed at E2 are 32 - CLZ(abs(Ra)) + 2, ++;; so the worst case is 34. ++(define_insn_reservation "nds_n13_div" 1 ++ (and (eq_attr "type" "div") ++ (eq_attr "pipeline_model" "n13")) ++ "n13_i1, n13_i2, n13_e1, n13_e2*34, n13_e3, n13_e4") ++ ++(define_insn_reservation "nds_n13_branch" 1 ++ (and (eq_attr "type" "branch") ++ (eq_attr "pipeline_model" "n13")) ++ "n13_i1, n13_i2, n13_e1, n13_e2, n13_e3, n13_e4") ++ ++;; ------------------------------------------------------------------------ ++;; Comment Notations and Bypass Rules ++;; ------------------------------------------------------------------------ ++;; Producers (LHS) ++;; LD ++;; Load data from the memory and produce the loaded data. The result is ++;; ready at E3. ++;; LMW(N, M) ++;; There are N micro-operations within an instruction that loads multiple ++;; words. The result produced by the M-th micro-operation is sent to ++;; consumers. The result is ready at E3. ++;; ADDR_OUT ++;; Most load/store instructions can produce an address output if updating ++;; the base register is required. The result is ready at E2, which is ++;; produced by ALU. ++;; ALU, ALU_SHIFT, SIMD ++;; Compute data in ALU and produce the data. The result is ready at E2. ++;; MUL, MAC ++;; Compute data in the multiply-adder and produce the data. The result ++;; is ready at E2. ++;; DIV ++;; Compute data in the divider and produce the data. The result is ready ++;; at E2. ++;; BR ++;; Branch-with-link instructions produces a result containing the return ++;; address. The result is ready at E2. ++;; ++;; Consumers (RHS) ++;; ALU ++;; General ALU instructions require operands at E2. ++;; ALU_E1 ++;; Some special ALU instructions, such as BSE, BSP and MOVD44, require ++;; operand at E1. ++;; MUL, DIV, PBSAD, MMU ++;; Operands are required at E1. ++;; PBSADA_Rt, PBSADA_RaRb ++;; Operands Ra and Rb are required at E1, and the operand Rt is required ++;; at E2. ++;; ALU_SHIFT_Rb ++;; An ALU-SHIFT instruction consists of a shift micro-operation followed ++;; by an arithmetic micro-operation. The operand Rb is used by the first ++;; micro-operation, and there are some latencies if data dependency occurs. ++;; MAC_RaRb ++;; A MAC instruction does multiplication at E1 and does accumulation at E2, ++;; so the operand Rt is required at E2, and operands Ra and Rb are required ++;; at E1. ++;; ADDR_IN ++;; If an instruction requires an address as its input operand, the address ++;; is required at E1. ++;; ST ++;; A store instruction requires its data at E2. ++;; SMW(N, M) ++;; There are N micro-operations within an instruction that stores multiple ++;; words. Each M-th micro-operation requires its data at E2. ++;; BR ++;; If a branch instruction is conditional, its input data is required at E2. ++ ++;; LD -> ALU_E1, PBSAD, PBSADA_RaRb, MUL, MAC_RaRb, DIV, MMU, ADDR_IN ++(define_bypass 3 ++ "nds_n13_load" ++ "nds_n13_alu, nds_n13_pbsad, nds_n13_pbsada,\ ++ nds_n13_mul, nds_n13_mac, nds_n13_div,\ ++ nds_n13_mmu,\ ++ nds_n13_load, nds_n13_store,\ ++ nds_n13_load_multiple_1,nds_n13_load_multiple_2, nds_n13_load_multiple_3,\ ++ nds_n13_load_multiple_4,nds_n13_load_multiple_5, nds_n13_load_multiple_6,\ ++ nds_n13_load_multiple_7,nds_n13_load_multiple_8, nds_n13_load_multiple_12,\ ++ nds_n13_store_multiple_1,nds_n13_store_multiple_2, nds_n13_store_multiple_3,\ ++ nds_n13_store_multiple_4,nds_n13_store_multiple_5, nds_n13_store_multiple_6,\ ++ nds_n13_store_multiple_7,nds_n13_store_multiple_8, nds_n13_store_multiple_12" ++ "nds32_n13_load_to_e1_p" ++) ++ ++;; LD -> ALU, ALU_SHIFT_Rb, PBSADA_Rt, BR, ST, SMW(N, 1) ++(define_bypass 2 ++ "nds_n13_load" ++ "nds_n13_alu, nds_n13_alu_shift, nds_n13_pbsada, nds_n13_branch, nds_n13_store,\ ++ nds_n13_store_multiple_1,nds_n13_store_multiple_2, nds_n13_store_multiple_3,\ ++ nds_n13_store_multiple_4,nds_n13_store_multiple_5, nds_n13_store_multiple_6,\ ++ nds_n13_store_multiple_7,nds_n13_store_multiple_8, nds_n13_store_multiple_12" ++ "nds32_n13_load_to_e2_p" ++) ++ ++;; LMW(N, N) -> ALU_E1, PBSAD, PBSADA_RaRb, MUL, MAC_RaRb, DIV, MMU, ADDR_IN ++(define_bypass 3 ++ "nds_n13_load_multiple_1,nds_n13_load_multiple_2, nds_n13_load_multiple_3,\ ++ nds_n13_load_multiple_4,nds_n13_load_multiple_5, nds_n13_load_multiple_6,\ ++ nds_n13_load_multiple_7,nds_n13_load_multiple_8, nds_n13_load_multiple_12" ++ "nds_n13_alu, nds_n13_pbsad, nds_n13_pbsada,\ ++ nds_n13_mul, nds_n13_mac, nds_n13_div,\ ++ nds_n13_mmu,\ ++ nds_n13_load, nds_n13_store,\ ++ nds_n13_load_multiple_1,nds_n13_load_multiple_2, nds_n13_load_multiple_3,\ ++ nds_n13_load_multiple_4,nds_n13_load_multiple_5, nds_n13_load_multiple_6,\ ++ nds_n13_load_multiple_7,nds_n13_load_multiple_8, nds_n13_load_multiple_12,\ ++ nds_n13_store_multiple_1,nds_n13_store_multiple_2, nds_n13_store_multiple_3,\ ++ nds_n13_store_multiple_4,nds_n13_store_multiple_5, nds_n13_store_multiple_6,\ ++ nds_n13_store_multiple_7,nds_n13_store_multiple_8, nds_n13_store_multiple_12" ++ "nds32_n13_last_load_to_e1_p") ++ ++;; LMW(N, N) -> ALU, ALU_SHIFT_Rb, PBSADA_Rt, BR, ST, SMW(N, 1) ++(define_bypass 2 ++ "nds_n13_load_multiple_1,nds_n13_load_multiple_2, nds_n13_load_multiple_3,\ ++ nds_n13_load_multiple_4,nds_n13_load_multiple_5, nds_n13_load_multiple_6,\ ++ nds_n13_load_multiple_7,nds_n13_load_multiple_8, nds_n13_load_multiple_12" ++ "nds_n13_alu, nds_n13_alu_shift, nds_n13_pbsada, nds_n13_branch, nds_n13_store,\ ++ nds_n13_store_multiple_1,nds_n13_store_multiple_2, nds_n13_store_multiple_3,\ ++ nds_n13_store_multiple_4,nds_n13_store_multiple_5, nds_n13_store_multiple_6,\ ++ nds_n13_store_multiple_7,nds_n13_store_multiple_8, nds_n13_store_multiple_12" ++ "nds32_n13_last_load_to_e2_p" ++) ++ ++;; LMW(N, N - 1) -> ALU_E1, PBSAD, PBSADA_RaRb, MUL, MAC_RaRb, DIV, MMU, ADDR_IN ++(define_bypass 2 ++ "nds_n13_load_multiple_1,nds_n13_load_multiple_2, nds_n13_load_multiple_3,\ ++ nds_n13_load_multiple_4,nds_n13_load_multiple_5, nds_n13_load_multiple_6,\ ++ nds_n13_load_multiple_7,nds_n13_load_multiple_8, nds_n13_load_multiple_12" ++ "nds_n13_alu, nds_n13_pbsad, nds_n13_pbsada,\ ++ nds_n13_mul, nds_n13_mac, nds_n13_div,\ ++ nds_n13_mmu,\ ++ nds_n13_load, nds_n13_store,\ ++ nds_n13_load_multiple_1,nds_n13_load_multiple_2, nds_n13_load_multiple_3,\ ++ nds_n13_load_multiple_4,nds_n13_load_multiple_5, nds_n13_load_multiple_6,\ ++ nds_n13_load_multiple_7,nds_n13_load_multiple_8, nds_n13_load_multiple_12,\ ++ nds_n13_store_multiple_1,nds_n13_store_multiple_2, nds_n13_store_multiple_3,\ ++ nds_n13_store_multiple_4,nds_n13_store_multiple_5, nds_n13_store_multiple_6,\ ++ nds_n13_store_multiple_7,nds_n13_store_multiple_8, nds_n13_store_multiple_12" ++ "nds32_n13_last_two_load_to_e1_p") ++ ++;; ALU, ALU_SHIFT, SIMD, BR, MUL, MAC, DIV, ADDR_OUT ++;; -> ALU_E1, PBSAD, PBSADA_RaRb, MUL, MAC_RaRb, DIV, MMU, ADDR_IN ++(define_bypass 2 ++ "nds_n13_alu, nds_n13_alu_shift, nds_n13_pbsad, nds_n13_pbsada, nds_n13_branch,\ ++ nds_n13_mul, nds_n13_mac, nds_n13_div,\ ++ nds_n13_load, nds_n13_store,\ ++ nds_n13_load_multiple_1,nds_n13_load_multiple_2, nds_n13_load_multiple_3,\ ++ nds_n13_load_multiple_4,nds_n13_load_multiple_5, nds_n13_load_multiple_6,\ ++ nds_n13_load_multiple_7,nds_n13_load_multiple_8, nds_n13_load_multiple_12,\ ++ nds_n13_store_multiple_1,nds_n13_store_multiple_2, nds_n13_store_multiple_3,\ ++ nds_n13_store_multiple_4,nds_n13_store_multiple_5, nds_n13_store_multiple_6,\ ++ nds_n13_store_multiple_7,nds_n13_store_multiple_8, nds_n13_store_multiple_12" ++ "nds_n13_alu, nds_n13_pbsad, nds_n13_pbsada,\ ++ nds_n13_mul, nds_n13_mac, nds_n13_div,\ ++ nds_n13_mmu,\ ++ nds_n13_load, nds_n13_store,\ ++ nds_n13_load_multiple_1,nds_n13_load_multiple_2, nds_n13_load_multiple_3,\ ++ nds_n13_load_multiple_4,nds_n13_load_multiple_5, nds_n13_load_multiple_6,\ ++ nds_n13_load_multiple_7,nds_n13_load_multiple_8, nds_n13_load_multiple_12,\ ++ nds_n13_store_multiple_1,nds_n13_store_multiple_2, nds_n13_store_multiple_3,\ ++ nds_n13_store_multiple_4,nds_n13_store_multiple_5, nds_n13_store_multiple_6,\ ++ nds_n13_store_multiple_7,nds_n13_store_multiple_8, nds_n13_store_multiple_12" ++ "nds32_n13_e2_to_e1_p") +diff -urN gcc-8.2.0.orig/gcc/config/nds32/nds32.opt gcc-8.2.0/gcc/config/nds32/nds32.opt +--- gcc-8.2.0.orig/gcc/config/nds32/nds32.opt 2018-04-22 12:10:00.000000000 +0200 ++++ gcc-8.2.0/gcc/config/nds32/nds32.opt 2019-01-25 15:38:32.833242671 +0100 +@@ -32,6 +32,13 @@ + Target RejectNegative Alias(mlittle-endian) + Generate code in little-endian mode. + ++mfp-as-gp ++Target RejectNegative Alias(mforce-fp-as-gp) ++Force performing fp-as-gp optimization. ++ ++mno-fp-as-gp ++Target RejectNegative Alias(mforbid-fp-as-gp) ++Forbid performing fp-as-gp optimization. + + ; --------------------------------------------------------------- + +@@ -85,11 +92,36 @@ + Target Undocumented RejectNegative Negative(mbig-endian) InverseMask(BIG_ENDIAN) + Generate code in little-endian mode. + ++mforce-fp-as-gp ++Target Undocumented Mask(FORCE_FP_AS_GP) ++Prevent $fp being allocated during register allocation so that compiler is able to force performing fp-as-gp optimization. ++ ++mforbid-fp-as-gp ++Target Undocumented Mask(FORBID_FP_AS_GP) ++Forbid using $fp to access static and global variables. This option strictly forbids fp-as-gp optimization regardless of '-mforce-fp-as-gp'. ++ ++mict-model= ++Target Undocumented RejectNegative Joined Enum(nds32_ict_model_type) Var(nds32_ict_model) Init(ICT_MODEL_SMALL) ++Specify the address generation strategy for ICT call's code model. ++ ++Enum ++Name(nds32_ict_model_type) Type(enum nds32_ict_model_type) ++Known cmodel types (for use with the -mict-model= option): ++ ++EnumValue ++Enum(nds32_ict_model_type) String(small) Value(ICT_MODEL_SMALL) ++ ++EnumValue ++Enum(nds32_ict_model_type) String(large) Value(ICT_MODEL_LARGE) + + mcmov + Target Report Mask(CMOV) + Generate conditional move instructions. + ++mhw-abs ++Target Report Mask(HW_ABS) ++Generate hardware abs instructions. ++ + mext-perf + Target Report Mask(EXT_PERF) + Generate performance extension instructions. +@@ -102,6 +134,10 @@ + Target Report Mask(EXT_STRING) + Generate string extension instructions. + ++mext-dsp ++Target Report Mask(EXT_DSP) ++Generate DSP extension instructions. ++ + mv3push + Target Report Mask(V3PUSH) + Generate v3 push25/pop25 instructions. +@@ -115,13 +151,17 @@ + Insert relax hint for linker to do relaxation. + + mvh +-Target Report Mask(VH) ++Target Report Mask(VH) Condition(!TARGET_LINUX_ABI) + Enable Virtual Hosting support. + + misr-vector-size= + Target RejectNegative Joined UInteger Var(nds32_isr_vector_size) Init(NDS32_DEFAULT_ISR_VECTOR_SIZE) + Specify the size of each interrupt vector, which must be 4 or 16. + ++misr-secure= ++Target RejectNegative Joined UInteger Var(nds32_isr_secure_level) Init(0) ++Specify the security level of c-isr for the whole file. ++ + mcache-block-size= + Target RejectNegative Joined UInteger Var(nds32_cache_block_size) Init(NDS32_DEFAULT_CACHE_BLOCK_SIZE) + Specify the size of each cache block, which must be a power of 2 between 4 and 512. +@@ -141,6 +181,9 @@ + Enum(nds32_arch_type) String(v3) Value(ARCH_V3) + + EnumValue ++Enum(nds32_arch_type) String(v3j) Value(ARCH_V3J) ++ ++EnumValue + Enum(nds32_arch_type) String(v3m) Value(ARCH_V3M) + + EnumValue +@@ -149,23 +192,6 @@ + EnumValue + Enum(nds32_arch_type) String(v3s) Value(ARCH_V3S) + +-mcmodel= +-Target RejectNegative Joined Enum(nds32_cmodel_type) Var(nds32_cmodel_option) Init(CMODEL_LARGE) +-Specify the address generation strategy for code model. +- +-Enum +-Name(nds32_cmodel_type) Type(enum nds32_cmodel_type) +-Known cmodel types (for use with the -mcmodel= option): +- +-EnumValue +-Enum(nds32_cmodel_type) String(small) Value(CMODEL_SMALL) +- +-EnumValue +-Enum(nds32_cmodel_type) String(medium) Value(CMODEL_MEDIUM) +- +-EnumValue +-Enum(nds32_cmodel_type) String(large) Value(CMODEL_LARGE) +- + mcpu= + Target RejectNegative Joined Enum(nds32_cpu_type) Var(nds32_cpu_option) Init(CPU_N9) + Specify the cpu for pipeline model. +@@ -235,6 +261,99 @@ + Enum(nds32_cpu_type) String(n968a) Value(CPU_N9) + + EnumValue ++Enum(nds32_cpu_type) String(n10) Value(CPU_N10) ++ ++EnumValue ++Enum(nds32_cpu_type) String(n1033) Value(CPU_N10) ++ ++EnumValue ++Enum(nds32_cpu_type) String(n1033a) Value(CPU_N10) ++ ++EnumValue ++Enum(nds32_cpu_type) String(n1033-fpu) Value(CPU_N10) ++ ++EnumValue ++Enum(nds32_cpu_type) String(n1033-spu) Value(CPU_N10) ++ ++EnumValue ++Enum(nds32_cpu_type) String(n1068) Value(CPU_N10) ++ ++EnumValue ++Enum(nds32_cpu_type) String(n1068a) Value(CPU_N10) ++ ++EnumValue ++Enum(nds32_cpu_type) String(n1068-fpu) Value(CPU_N10) ++ ++EnumValue ++Enum(nds32_cpu_type) String(n1068a-fpu) Value(CPU_N10) ++ ++EnumValue ++Enum(nds32_cpu_type) String(n1068-spu) Value(CPU_N10) ++ ++EnumValue ++Enum(nds32_cpu_type) String(n1068a-spu) Value(CPU_N10) ++ ++EnumValue ++Enum(nds32_cpu_type) String(d10) Value(CPU_N10) ++ ++EnumValue ++Enum(nds32_cpu_type) String(d1088) Value(CPU_N10) ++ ++EnumValue ++Enum(nds32_cpu_type) String(d1088-fpu) Value(CPU_N10) ++ ++EnumValue ++Enum(nds32_cpu_type) String(d1088-spu) Value(CPU_N10) ++ ++EnumValue ++Enum(nds32_cpu_type) Undocumented String(graywolf) Value(CPU_GRAYWOLF) ++ ++EnumValue ++Enum(nds32_cpu_type) String(n15) Value(CPU_GRAYWOLF) ++ ++EnumValue ++Enum(nds32_cpu_type) String(d15) Value(CPU_GRAYWOLF) ++ ++EnumValue ++Enum(nds32_cpu_type) String(n15s) Value(CPU_GRAYWOLF) ++ ++EnumValue ++Enum(nds32_cpu_type) String(d15s) Value(CPU_GRAYWOLF) ++ ++EnumValue ++Enum(nds32_cpu_type) String(n15f) Value(CPU_GRAYWOLF) ++ ++EnumValue ++Enum(nds32_cpu_type) String(d15f) Value(CPU_GRAYWOLF) ++ ++EnumValue ++Enum(nds32_cpu_type) String(n12) Value(CPU_N12) ++ ++EnumValue ++Enum(nds32_cpu_type) String(n1213) Value(CPU_N12) ++ ++EnumValue ++Enum(nds32_cpu_type) String(n1233) Value(CPU_N12) ++ ++EnumValue ++Enum(nds32_cpu_type) String(n1233-fpu) Value(CPU_N12) ++ ++EnumValue ++Enum(nds32_cpu_type) String(n1233-spu) Value(CPU_N12) ++ ++EnumValue ++Enum(nds32_cpu_type) String(n13) Value(CPU_N13) ++ ++EnumValue ++Enum(nds32_cpu_type) String(n1337) Value(CPU_N13) ++ ++EnumValue ++Enum(nds32_cpu_type) String(n1337-fpu) Value(CPU_N13) ++ ++EnumValue ++Enum(nds32_cpu_type) String(n1337-spu) Value(CPU_N13) ++ ++EnumValue + Enum(nds32_cpu_type) String(simple) Value(CPU_SIMPLE) + + mconfig-fpu= +@@ -321,6 +440,18 @@ + Target Report Mask(FPU_DOUBLE) + Generate double-precision floating-point instructions. + ++mforce-no-ext-dsp ++Target Undocumented Report Mask(FORCE_NO_EXT_DSP) ++Force disable hardware loop, even use -mext-dsp. ++ ++msched-prolog-epilog ++Target Var(flag_sched_prolog_epilog) Init(0) ++Permit scheduling of a function's prologue and epilogue sequence. ++ ++mret-in-naked-func ++Target Var(flag_ret_in_naked_func) Init(1) ++Generate return instruction in naked function. ++ + malways-save-lp + Target Var(flag_always_save_lp) Init(0) + Always save $lp in the stack. +@@ -328,3 +459,7 @@ + munaligned-access + Target Report Var(flag_unaligned_access) Init(0) + Enable unaligned word and halfword accesses to packed data. ++ ++minline-asm-r15 ++Target Report Var(flag_inline_asm_r15) Init(0) ++Allow use r15 for inline ASM. +diff -urN gcc-8.2.0.orig/gcc/config/nds32/nds32-opts.h gcc-8.2.0/gcc/config/nds32/nds32-opts.h +--- gcc-8.2.0.orig/gcc/config/nds32/nds32-opts.h 2018-04-08 11:21:30.000000000 +0200 ++++ gcc-8.2.0/gcc/config/nds32/nds32-opts.h 2019-01-25 15:38:32.833242671 +0100 +@@ -29,6 +29,7 @@ + { + ARCH_V2, + ARCH_V3, ++ ARCH_V3J, + ARCH_V3M, + ARCH_V3F, + ARCH_V3S +@@ -42,6 +43,10 @@ + CPU_N8, + CPU_E8, + CPU_N9, ++ CPU_N10, ++ CPU_GRAYWOLF, ++ CPU_N12, ++ CPU_N13, + CPU_SIMPLE + }; + +@@ -53,6 +58,13 @@ + CMODEL_LARGE + }; + ++/* The code model defines the address generation strategy. */ ++enum nds32_ict_model_type ++{ ++ ICT_MODEL_SMALL, ++ ICT_MODEL_LARGE ++}; ++ + /* Multiply instruction configuration. */ + enum nds32_mul_type + { +diff -urN gcc-8.2.0.orig/gcc/config/nds32/nds32-peephole2.md gcc-8.2.0/gcc/config/nds32/nds32-peephole2.md +--- gcc-8.2.0.orig/gcc/config/nds32/nds32-peephole2.md 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/gcc/config/nds32/nds32-peephole2.md 2019-01-25 15:38:32.833242671 +0100 +@@ -22,3 +22,139 @@ + ;; Use define_peephole2 to handle possible target-specific optimization. + + ;; ------------------------------------------------------------------------ ++;; Try to utilize 16-bit instruction by swap operand if possible. ++;; ------------------------------------------------------------------------ ++ ++;; Try to make add as add45. ++(define_peephole2 ++ [(set (match_operand:QIHISI 0 "register_operand" "") ++ (plus:QIHISI (match_operand:QIHISI 1 "register_operand" "") ++ (match_operand:QIHISI 2 "register_operand" "")))] ++ "reload_completed ++ && TARGET_16_BIT ++ && REGNO (operands[0]) == REGNO (operands[2]) ++ && REGNO (operands[0]) != REGNO (operands[1]) ++ && TEST_HARD_REG_BIT (reg_class_contents[MIDDLE_REGS], REGNO (operands[0]))" ++ [(set (match_dup 0) (plus:QIHISI (match_dup 2) (match_dup 1)))]) ++ ++;; Try to make xor/ior/and/mult as xor33/ior33/and33/mult33. ++(define_peephole2 ++ [(set (match_operand:SI 0 "register_operand" "") ++ (match_operator:SI 1 "nds32_have_33_inst_operator" ++ [(match_operand:SI 2 "register_operand" "") ++ (match_operand:SI 3 "register_operand" "")]))] ++ "reload_completed ++ && TARGET_16_BIT ++ && REGNO (operands[0]) == REGNO (operands[3]) ++ && REGNO (operands[0]) != REGNO (operands[2]) ++ && TEST_HARD_REG_BIT (reg_class_contents[LOW_REGS], REGNO (operands[0])) ++ && TEST_HARD_REG_BIT (reg_class_contents[LOW_REGS], REGNO (operands[2]))" ++ [(set (match_dup 0) (match_op_dup 1 [(match_dup 3) (match_dup 2)]))]) ++ ++(define_peephole ++ [(set (match_operand:SI 0 "register_operand" "") ++ (match_operand:SI 1 "register_operand" "")) ++ (set (match_operand:SI 2 "register_operand" "") ++ (match_operand:SI 3 "register_operand" ""))] ++ "TARGET_16_BIT ++ && !TARGET_ISA_V2 ++ && NDS32_IS_GPR_REGNUM (REGNO (operands[0])) ++ && NDS32_IS_GPR_REGNUM (REGNO (operands[1])) ++ && ((REGNO (operands[0]) & 0x1) == 0) ++ && ((REGNO (operands[1]) & 0x1) == 0) ++ && (REGNO (operands[0]) + 1) == REGNO (operands[2]) ++ && (REGNO (operands[1]) + 1) == REGNO (operands[3])" ++ "movd44\t%0, %1" ++ [(set_attr "type" "alu") ++ (set_attr "length" "2")]) ++ ++;; Merge two fcpyss to fcpysd. ++(define_peephole2 ++ [(set (match_operand:SF 0 "float_even_register_operand" "") ++ (match_operand:SF 1 "float_even_register_operand" "")) ++ (set (match_operand:SF 2 "float_odd_register_operand" "") ++ (match_operand:SF 3 "float_odd_register_operand" ""))] ++ "(TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE) ++ && REGNO (operands[0]) == REGNO (operands[2]) - 1 ++ && REGNO (operands[1]) == REGNO (operands[3]) - 1" ++ [(set (match_dup 4) (match_dup 5))] ++ { ++ operands[4] = gen_rtx_REG (DFmode, REGNO (operands[0])); ++ operands[5] = gen_rtx_REG (DFmode, REGNO (operands[1])); ++ }) ++ ++(define_peephole2 ++ [(set (match_operand:SF 0 "float_odd_register_operand" "") ++ (match_operand:SF 1 "float_odd_register_operand" "")) ++ (set (match_operand:SF 2 "float_even_register_operand" "") ++ (match_operand:SF 3 "float_even_register_operand" ""))] ++ "(TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE) ++ && REGNO (operands[2]) == REGNO (operands[0]) - 1 ++ && REGNO (operands[3]) == REGNO (operands[1]) - 1" ++ [(set (match_dup 4) (match_dup 5))] ++ { ++ operands[4] = gen_rtx_REG (DFmode, REGNO (operands[2])); ++ operands[5] = gen_rtx_REG (DFmode, REGNO (operands[3])); ++ }) ++ ++;; ------------------------------------------------------------------------ ++;; GCC will prefer [u]divmodsi3 rather than [u]divsi3 even remainder is ++;; unused, so we use split to drop mod operation for lower register pressure. ++ ++(define_split ++ [(set (match_operand:SI 0 "register_operand") ++ (div:SI (match_operand:SI 1 "register_operand") ++ (match_operand:SI 2 "register_operand"))) ++ (set (match_operand:SI 3 "register_operand") ++ (mod:SI (match_dup 1) (match_dup 2)))] ++ "find_regno_note (insn, REG_UNUSED, REGNO (operands[3])) != NULL ++ && can_create_pseudo_p ()" ++ [(set (match_dup 0) ++ (div:SI (match_dup 1) ++ (match_dup 2)))]) ++ ++(define_split ++ [(set (match_operand:SI 0 "register_operand") ++ (udiv:SI (match_operand:SI 1 "register_operand") ++ (match_operand:SI 2 "register_operand"))) ++ (set (match_operand:SI 3 "register_operand") ++ (umod:SI (match_dup 1) (match_dup 2)))] ++ "find_regno_note (insn, REG_UNUSED, REGNO (operands[3])) != NULL ++ && can_create_pseudo_p ()" ++ [(set (match_dup 0) ++ (udiv:SI (match_dup 1) ++ (match_dup 2)))]) ++ ++(define_peephole2 ++ [(set (match_operand:DI 0 "register_operand") ++ (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand")) ++ (sign_extend:DI (match_operand:SI 2 "register_operand"))))] ++ "NDS32_EXT_DSP_P () ++ && peep2_regno_dead_p (1, WORDS_BIG_ENDIAN ? REGNO (operands[0]) + 1 : REGNO (operands[0]))" ++ [(const_int 1)] ++{ ++ rtx highpart = nds32_di_high_part_subreg (operands[0]); ++ emit_insn (gen_smulsi3_highpart (highpart, operands[1], operands[2])); ++ DONE; ++}) ++ ++(define_split ++ [(set (match_operand:DI 0 "nds32_general_register_operand" "") ++ (match_operand:DI 1 "nds32_general_register_operand" ""))] ++ "find_regno_note (insn, REG_UNUSED, REGNO (operands[0])) != NULL ++ || find_regno_note (insn, REG_UNUSED, REGNO (operands[0]) + 1) != NULL" ++ [(set (match_dup 0) (match_dup 1))] ++{ ++ rtx dead_note = find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0])); ++ HOST_WIDE_INT offset; ++ if (dead_note == NULL_RTX) ++ offset = 0; ++ else ++ offset = 4; ++ operands[0] = simplify_gen_subreg ( ++ SImode, operands[0], ++ DImode, offset); ++ operands[1] = simplify_gen_subreg ( ++ SImode, operands[1], ++ DImode, offset); ++}) +diff -urN gcc-8.2.0.orig/gcc/config/nds32/nds32-pipelines-auxiliary.c gcc-8.2.0/gcc/config/nds32/nds32-pipelines-auxiliary.c +--- gcc-8.2.0.orig/gcc/config/nds32/nds32-pipelines-auxiliary.c 2018-04-08 11:21:30.000000000 +0200 ++++ gcc-8.2.0/gcc/config/nds32/nds32-pipelines-auxiliary.c 2019-01-25 15:38:32.833242671 +0100 +@@ -306,6 +306,19 @@ + return false; + } + ++/* Determine if the latency is occured when the consumer PBSADA_INSN uses the ++ value of DEF_REG in its Rt field. */ ++bool ++pbsada_insn_rt_dep_reg_p (rtx pbsada_insn, rtx def_reg) ++{ ++ rtx pbsada_rt = SET_DEST (PATTERN (pbsada_insn)); ++ ++ if (rtx_equal_p (def_reg, pbsada_rt)) ++ return true; ++ ++ return false; ++} ++ + /* Check if INSN is a movd44 insn consuming DEF_REG. */ + bool + movd44_even_dep_p (rtx_insn *insn, rtx def_reg) +@@ -335,6 +348,103 @@ + return false; + } + ++/* Check if INSN is a wext insn consuming DEF_REG. */ ++bool ++wext_odd_dep_p (rtx insn, rtx def_reg) ++{ ++ rtx shift_rtx = XEXP (SET_SRC (PATTERN (insn)), 0); ++ rtx use_reg = XEXP (shift_rtx, 0); ++ rtx pos_rtx = XEXP (shift_rtx, 1); ++ ++ if (REG_P (pos_rtx) && reg_overlap_p (def_reg, pos_rtx)) ++ return true; ++ ++ if (GET_MODE (def_reg) == DImode) ++ return reg_overlap_p (def_reg, use_reg); ++ ++ gcc_assert (REG_P (def_reg) || GET_CODE (def_reg) == SUBREG); ++ gcc_assert (REG_P (use_reg)); ++ ++ if (REG_P (def_reg)) ++ { ++ if (!TARGET_BIG_ENDIAN) ++ return REGNO (def_reg) == REGNO (use_reg) + 1; ++ else ++ return REGNO (def_reg) == REGNO (use_reg); ++ } ++ ++ if (GET_CODE (def_reg) == SUBREG) ++ { ++ if (!reg_overlap_p (def_reg, use_reg)) ++ return false; ++ ++ if (!TARGET_BIG_ENDIAN) ++ return SUBREG_BYTE (def_reg) == 4; ++ else ++ return SUBREG_BYTE (def_reg) == 0; ++ } ++ ++ return false; ++} ++ ++/* Check if INSN is a bpick insn consuming DEF_REG. */ ++bool ++bpick_ra_rb_dep_p (rtx insn, rtx def_reg) ++{ ++ rtx ior_rtx = SET_SRC (PATTERN (insn)); ++ rtx and1_rtx = XEXP (ior_rtx, 0); ++ rtx and2_rtx = XEXP (ior_rtx, 1); ++ rtx reg1_0 = XEXP (and1_rtx, 0); ++ rtx reg1_1 = XEXP (and1_rtx, 1); ++ rtx reg2_0 = XEXP (and2_rtx, 0); ++ rtx reg2_1 = XEXP (and2_rtx, 1); ++ ++ if (GET_CODE (reg1_0) == NOT) ++ { ++ if (rtx_equal_p (reg1_0, reg2_0)) ++ return reg_overlap_p (def_reg, reg1_1) ++ || reg_overlap_p (def_reg, reg2_1); ++ ++ if (rtx_equal_p (reg1_0, reg2_1)) ++ return reg_overlap_p (def_reg, reg1_1) ++ || reg_overlap_p (def_reg, reg2_0); ++ } ++ ++ if (GET_CODE (reg1_1) == NOT) ++ { ++ if (rtx_equal_p (reg1_1, reg2_0)) ++ return reg_overlap_p (def_reg, reg1_0) ++ || reg_overlap_p (def_reg, reg2_1); ++ ++ if (rtx_equal_p (reg1_1, reg2_1)) ++ return reg_overlap_p (def_reg, reg1_0) ++ || reg_overlap_p (def_reg, reg2_0); ++ } ++ ++ if (GET_CODE (reg2_0) == NOT) ++ { ++ if (rtx_equal_p (reg2_0, reg1_0)) ++ return reg_overlap_p (def_reg, reg2_1) ++ || reg_overlap_p (def_reg, reg1_1); ++ ++ if (rtx_equal_p (reg2_0, reg1_1)) ++ return reg_overlap_p (def_reg, reg2_1) ++ || reg_overlap_p (def_reg, reg1_0); ++ } ++ ++ if (GET_CODE (reg2_1) == NOT) ++ { ++ if (rtx_equal_p (reg2_1, reg1_0)) ++ return reg_overlap_p (def_reg, reg2_0) ++ || reg_overlap_p (def_reg, reg1_1); ++ ++ if (rtx_equal_p (reg2_1, reg1_1)) ++ return reg_overlap_p (def_reg, reg2_0) ++ || reg_overlap_p (def_reg, reg1_0); ++ } ++ ++ gcc_unreachable (); ++} + } // namespace scheduling + } // namespace nds32 + +@@ -375,8 +485,7 @@ + operations in order to write two registers. We have to check the + dependency from the producer to the first micro-operation. */ + case TYPE_DIV: +- if (INSN_CODE (consumer) == CODE_FOR_divmodsi4 +- || INSN_CODE (consumer) == CODE_FOR_udivmodsi4) ++ if (divmod_p (consumer)) + use_rtx = SET_SRC (parallel_element (consumer, 0)); + else + use_rtx = SET_SRC (PATTERN (consumer)); +@@ -506,8 +615,7 @@ + operations in order to write two registers. We have to check the + dependency from the producer to the first micro-operation. */ + case TYPE_DIV: +- if (INSN_CODE (consumer) == CODE_FOR_divmodsi4 +- || INSN_CODE (consumer) == CODE_FOR_udivmodsi4) ++ if (divmod_p (consumer)) + use_rtx = SET_SRC (parallel_element (consumer, 0)); + else + use_rtx = SET_SRC (PATTERN (consumer)); +@@ -606,8 +714,7 @@ + break; + + case TYPE_DIV: +- if (INSN_CODE (consumer) == CODE_FOR_divmodsi4 +- || INSN_CODE (consumer) == CODE_FOR_udivmodsi4) ++ if (divmod_p (consumer)) + use_rtx = SET_SRC (parallel_element (consumer, 0)); + else + use_rtx = SET_SRC (PATTERN (consumer)); +@@ -706,13 +813,175 @@ + We have to check the dependency from the producer to the first + micro-operation. */ + case TYPE_DIV: +- if (INSN_CODE (consumer) == CODE_FOR_divmodsi4 +- || INSN_CODE (consumer) == CODE_FOR_udivmodsi4) ++ if (divmod_p (consumer)) ++ use_rtx = SET_SRC (parallel_element (consumer, 0)); ++ else ++ use_rtx = SET_SRC (PATTERN (consumer)); ++ break; ++ ++ case TYPE_MMU: ++ if (GET_CODE (PATTERN (consumer)) == SET) ++ use_rtx = SET_SRC (PATTERN (consumer)); ++ else ++ return true; ++ break; ++ ++ case TYPE_LOAD: ++ case TYPE_STORE: ++ use_rtx = extract_mem_rtx (consumer); ++ break; ++ ++ case TYPE_LOAD_MULTIPLE: ++ case TYPE_STORE_MULTIPLE: ++ use_rtx = extract_base_reg (consumer); ++ break; ++ ++ case TYPE_BRANCH: ++ use_rtx = PATTERN (consumer); ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ ++ if (reg_overlap_p (def_reg, use_rtx)) ++ return true; ++ ++ return false; ++} ++ ++/* Check the dependency between the producer defining DEF_REG and CONSUMER ++ requiring input operand at EX. */ ++bool ++n10_consumed_by_ex_dep_p (rtx_insn *consumer, rtx def_reg) ++{ ++ rtx use_rtx; ++ ++ switch (get_attr_type (consumer)) ++ { ++ case TYPE_ALU: ++ case TYPE_PBSAD: ++ case TYPE_MUL: ++ case TYPE_DALU: ++ case TYPE_DALU64: ++ case TYPE_DMUL: ++ case TYPE_DPACK: ++ case TYPE_DINSB: ++ case TYPE_DCMP: ++ case TYPE_DCLIP: ++ case TYPE_DALUROUND: ++ use_rtx = SET_SRC (PATTERN (consumer)); ++ break; ++ ++ case TYPE_ALU_SHIFT: ++ use_rtx = extract_shift_reg (consumer); ++ break; ++ ++ case TYPE_PBSADA: ++ return pbsada_insn_ra_rb_dep_reg_p (consumer, def_reg); ++ ++ case TYPE_MAC: ++ case TYPE_DMAC: ++ use_rtx = extract_mac_non_acc_rtx (consumer); ++ break; ++ ++ /* Some special instructions, divmodsi4 and udivmodsi4, produce two ++ results, the quotient and the remainder. */ ++ case TYPE_DIV: ++ if (divmod_p (consumer)) ++ use_rtx = SET_SRC (parallel_element (consumer, 0)); ++ else ++ use_rtx = SET_SRC (PATTERN (consumer)); ++ break; ++ ++ case TYPE_DWEXT: ++ return wext_odd_dep_p (consumer, def_reg); ++ ++ case TYPE_DBPICK: ++ return bpick_ra_rb_dep_p (consumer, def_reg); ++ ++ case TYPE_MMU: ++ if (GET_CODE (PATTERN (consumer)) == SET) ++ use_rtx = SET_SRC (PATTERN (consumer)); ++ else ++ return true; ++ break; ++ ++ case TYPE_LOAD: ++ case TYPE_STORE: ++ use_rtx = extract_mem_rtx (consumer); ++ break; ++ ++ case TYPE_LOAD_MULTIPLE: ++ case TYPE_STORE_MULTIPLE: ++ use_rtx = extract_base_reg (consumer); ++ break; ++ ++ case TYPE_BRANCH: ++ use_rtx = PATTERN (consumer); ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ ++ if (reg_overlap_p (def_reg, use_rtx)) ++ return true; ++ ++ return false; ++} ++ ++/* Check the dependency between the producer defining DEF_REG and CONSUMER ++ requiring input operand at EX. */ ++bool ++gw_consumed_by_ex_dep_p (rtx_insn *consumer, rtx def_reg) ++{ ++ rtx use_rtx; ++ ++ switch (get_attr_type (consumer)) ++ { ++ case TYPE_ALU: ++ case TYPE_PBSAD: ++ case TYPE_MUL: ++ case TYPE_DALU: ++ case TYPE_DALU64: ++ case TYPE_DMUL: ++ case TYPE_DPACK: ++ case TYPE_DINSB: ++ case TYPE_DCMP: ++ case TYPE_DCLIP: ++ case TYPE_DALUROUND: ++ use_rtx = SET_SRC (PATTERN (consumer)); ++ break; ++ ++ case TYPE_ALU_SHIFT: ++ use_rtx = extract_shift_reg (consumer); ++ break; ++ ++ case TYPE_PBSADA: ++ return pbsada_insn_ra_rb_dep_reg_p (consumer, def_reg); ++ ++ case TYPE_MAC: ++ case TYPE_DMAC: ++ use_rtx = extract_mac_non_acc_rtx (consumer); ++ break; ++ ++ /* Some special instructions, divmodsi4 and udivmodsi4, produce two ++ results, the quotient and the remainder. We have to check the ++ dependency from the producer to the first micro-operation. */ ++ case TYPE_DIV: ++ if (divmod_p (consumer)) + use_rtx = SET_SRC (parallel_element (consumer, 0)); + else + use_rtx = SET_SRC (PATTERN (consumer)); + break; + ++ case TYPE_DWEXT: ++ return wext_odd_dep_p (consumer, def_reg); ++ ++ case TYPE_DBPICK: ++ return bpick_ra_rb_dep_p (consumer, def_reg); ++ + case TYPE_MMU: + if (GET_CODE (PATTERN (consumer)) == SET) + use_rtx = SET_SRC (PATTERN (consumer)); +@@ -744,7 +1013,153 @@ + return false; + } + ++/* Check dependencies from any stages to ALU_E1 (E1). This is a helper ++ function of n13_consumed_by_e1_dep_p (). */ ++bool ++n13_alu_e1_insn_dep_reg_p (rtx_insn *alu_e1_insn, rtx def_reg) ++{ ++ rtx unspec_rtx, operand_ra, operand_rb; ++ rtx src_rtx, dst_rtx; ++ ++ switch (INSN_CODE (alu_e1_insn)) ++ { ++ /* BSP and BSE are supported by built-in functions, the corresponding ++ patterns are formed by UNSPEC RTXs. We have to handle them ++ individually. */ ++ case CODE_FOR_unspec_bsp: ++ case CODE_FOR_unspec_bse: ++ unspec_rtx = SET_SRC (parallel_element (alu_e1_insn, 0)); ++ gcc_assert (GET_CODE (unspec_rtx) == UNSPEC); ++ ++ operand_ra = XVECEXP (unspec_rtx, 0, 0); ++ operand_rb = XVECEXP (unspec_rtx, 0, 1); ++ ++ if (rtx_equal_p (def_reg, operand_ra) ++ || rtx_equal_p (def_reg, operand_rb)) ++ return true; ++ ++ return false; ++ ++ /* Unlink general ALU instructions, MOVD44 requires operands at E1. */ ++ case CODE_FOR_move_di: ++ case CODE_FOR_move_df: ++ src_rtx = SET_SRC (PATTERN (alu_e1_insn)); ++ dst_rtx = SET_DEST (PATTERN (alu_e1_insn)); ++ ++ if (REG_P (dst_rtx) && REG_P (src_rtx) ++ && rtx_equal_p (src_rtx, def_reg)) ++ return true; ++ ++ return false; ++ ++ default: ++ return false; ++ } ++} ++ ++/* Check the dependency between the producer defining DEF_REG and CONSUMER ++ requiring input operand at E1. Because the address generation unti is ++ at E1, the address input should be ready at E1. Note that the branch ++ target is also a kind of addresses, so we have to check it. */ ++bool ++n13_consumed_by_e1_dep_p (rtx_insn *consumer, rtx def_reg) ++{ ++ rtx use_rtx; ++ ++ switch (get_attr_type (consumer)) ++ { ++ /* ALU_E1 */ ++ case TYPE_ALU: ++ return n13_alu_e1_insn_dep_reg_p (consumer, def_reg); ++ ++ case TYPE_PBSADA: ++ return pbsada_insn_ra_rb_dep_reg_p (consumer, def_reg); ++ ++ case TYPE_PBSAD: ++ case TYPE_MUL: ++ use_rtx = SET_SRC (PATTERN (consumer)); ++ break; ++ ++ case TYPE_MAC: ++ use_rtx = extract_mac_non_acc_rtx (consumer); ++ break; ++ ++ case TYPE_DIV: ++ if (divmod_p (consumer)) ++ use_rtx = SET_SRC (parallel_element (consumer, 0)); ++ else ++ use_rtx = SET_SRC (PATTERN (consumer)); ++ break; ++ ++ case TYPE_MMU: ++ if (GET_CODE (PATTERN (consumer)) == SET) ++ use_rtx = SET_SRC (PATTERN (consumer)); ++ else ++ return true; ++ break; ++ ++ case TYPE_BRANCH: ++ use_rtx = extract_branch_target_rtx (consumer); ++ break; ++ ++ case TYPE_LOAD: ++ case TYPE_STORE: ++ use_rtx = extract_mem_rtx (consumer); ++ break; ++ ++ case TYPE_LOAD_MULTIPLE: ++ case TYPE_STORE_MULTIPLE: ++ use_rtx = extract_base_reg (consumer); ++ break; ++ ++ default: ++ return false; ++ } ++ ++ if (reg_overlap_p (def_reg, use_rtx)) ++ return true; ++ ++ return false; ++} ++ ++/* Check the dependency between the producer defining DEF_REG and CONSUMER ++ requiring input operand at E2. */ ++bool ++n13_consumed_by_e2_dep_p (rtx_insn *consumer, rtx def_reg) ++{ ++ rtx use_rtx; ++ ++ switch (get_attr_type (consumer)) ++ { ++ case TYPE_ALU: ++ case TYPE_STORE: ++ use_rtx = SET_SRC (PATTERN (consumer)); ++ break; ++ ++ case TYPE_ALU_SHIFT: ++ use_rtx = extract_shift_reg (consumer); ++ break; ++ ++ case TYPE_PBSADA: ++ return pbsada_insn_rt_dep_reg_p (consumer, def_reg); ++ ++ case TYPE_STORE_MULTIPLE: ++ use_rtx = extract_nth_access_rtx (consumer, 0); ++ break; ++ ++ case TYPE_BRANCH: ++ use_rtx = extract_branch_condition_rtx (consumer); ++ break; ++ ++ default: ++ gcc_unreachable(); ++ } ++ ++ if (reg_overlap_p (def_reg, use_rtx)) ++ return true; + ++ return false; ++} + } // anonymous namespace + + /* ------------------------------------------------------------------------ */ +@@ -837,8 +1252,7 @@ + break; + + case TYPE_DIV: +- if (INSN_CODE (producer) == CODE_FOR_divmodsi4 +- || INSN_CODE (producer) == CODE_FOR_udivmodsi4) ++ if (divmod_p (producer)) + def_reg = SET_DEST (parallel_element (producer, 1)); + else + def_reg = SET_DEST (PATTERN (producer)); +@@ -969,8 +1383,7 @@ + break; + + case TYPE_DIV: +- if (INSN_CODE (producer) == CODE_FOR_divmodsi4 +- || INSN_CODE (producer) == CODE_FOR_udivmodsi4) ++ if (divmod_p (producer)) + { + rtx def_reg1 = SET_DEST (parallel_element (producer, 0)); + rtx def_reg2 = SET_DEST (parallel_element (producer, 1)); +@@ -1073,8 +1486,7 @@ + results, the quotient and the remainder. We have to handle them + individually. */ + case TYPE_DIV: +- if (INSN_CODE (producer) == CODE_FOR_divmodsi4 +- || INSN_CODE (producer) == CODE_FOR_udivmodsi4) ++ if (divmod_p (producer)) + { + rtx def_reg1 = SET_DEST (parallel_element (producer, 0)); + rtx def_reg2 = SET_DEST (parallel_element (producer, 1)); +@@ -1132,4 +1544,245 @@ + return n9_3r2w_consumed_by_ex_dep_p (consumer, last_def_reg); + } + ++/* Guard functions for N10 cores. */ ++ ++/* Check dependencies from EX to EX (ADDR_OUT -> ADDR_IN). */ ++bool ++nds32_n10_ex_to_ex_p (rtx_insn *producer, rtx_insn *consumer) ++{ ++ gcc_assert (get_attr_type (producer) == TYPE_FLOAD ++ || get_attr_type (producer) == TYPE_FSTORE); ++ gcc_assert (get_attr_type (consumer) == TYPE_FLOAD ++ || get_attr_type (consumer) == TYPE_FSTORE); ++ ++ if (!post_update_insn_p (producer)) ++ return false; ++ ++ return reg_overlap_p (extract_base_reg (producer), ++ extract_mem_rtx (consumer)); ++} ++ ++/* Check dependencies from MM to EX. */ ++bool ++nds32_n10_mm_to_ex_p (rtx_insn *producer, rtx_insn *consumer) ++{ ++ rtx def_reg; ++ ++ switch (get_attr_type (producer)) ++ { ++ case TYPE_LOAD: ++ case TYPE_MUL: ++ case TYPE_MAC: ++ case TYPE_DALU64: ++ case TYPE_DMUL: ++ case TYPE_DMAC: ++ case TYPE_DALUROUND: ++ case TYPE_DBPICK: ++ case TYPE_DWEXT: ++ def_reg = SET_DEST (PATTERN (producer)); ++ break; ++ ++ /* Some special instructions, divmodsi4 and udivmodsi4, produce two ++ results, the quotient and the remainder. We have to handle them ++ individually. */ ++ case TYPE_DIV: ++ if (divmod_p (producer)) ++ { ++ rtx def_reg1 = SET_DEST (parallel_element (producer, 0)); ++ rtx def_reg2 = SET_DEST (parallel_element (producer, 1)); ++ ++ return (n10_consumed_by_ex_dep_p (consumer, def_reg1) ++ || n10_consumed_by_ex_dep_p (consumer, def_reg2)); ++ } ++ ++ def_reg = SET_DEST (PATTERN (producer)); ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ ++ return n10_consumed_by_ex_dep_p (consumer, def_reg); ++} ++ ++/* Check dependencies from LMW(N, N) to EX. */ ++bool ++nds32_n10_last_load_to_ex_p (rtx_insn *producer, rtx_insn *consumer) ++{ ++ rtx last_def_reg = extract_nth_access_reg (producer, -1); ++ ++ return n10_consumed_by_ex_dep_p (consumer, last_def_reg); ++} ++ ++/* Guard functions for Graywolf cores. */ ++ ++/* Check dependencies from EX to EX (ADDR_OUT -> ADDR_IN). */ ++bool ++nds32_gw_ex_to_ex_p (rtx_insn *producer, rtx_insn *consumer) ++{ ++ return nds32_n10_ex_to_ex_p (producer, consumer); ++} ++ ++/* Check dependencies from MM to EX. */ ++bool ++nds32_gw_mm_to_ex_p (rtx_insn *producer, rtx_insn *consumer) ++{ ++ rtx def_reg; ++ ++ switch (get_attr_type (producer)) ++ { ++ case TYPE_LOAD: ++ case TYPE_MUL: ++ case TYPE_MAC: ++ case TYPE_DALU64: ++ case TYPE_DMUL: ++ case TYPE_DMAC: ++ case TYPE_DALUROUND: ++ case TYPE_DBPICK: ++ case TYPE_DWEXT: ++ def_reg = SET_DEST (PATTERN (producer)); ++ break; ++ ++ /* Some special instructions, divmodsi4 and udivmodsi4, produce two ++ results, the quotient and the remainder. We have to handle them ++ individually. */ ++ case TYPE_DIV: ++ if (divmod_p (producer)) ++ { ++ rtx def_reg1 = SET_DEST (parallel_element (producer, 0)); ++ rtx def_reg2 = SET_DEST (parallel_element (producer, 1)); ++ ++ return (gw_consumed_by_ex_dep_p (consumer, def_reg1) ++ || gw_consumed_by_ex_dep_p (consumer, def_reg2)); ++ } ++ ++ def_reg = SET_DEST (PATTERN (producer)); ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ ++ return gw_consumed_by_ex_dep_p (consumer, def_reg); ++} ++ ++/* Check dependencies from LMW(N, N) to EX. */ ++bool ++nds32_gw_last_load_to_ex_p (rtx_insn *producer, rtx_insn *consumer) ++{ ++ rtx last_def_reg = extract_nth_access_reg (producer, -1); ++ ++ return gw_consumed_by_ex_dep_p (consumer, last_def_reg); ++} ++ ++/* Guard functions for N12/N13 cores. */ ++ ++/* Check dependencies from E2 to E1. */ ++bool ++nds32_n13_e2_to_e1_p (rtx_insn *producer, rtx_insn *consumer) ++{ ++ rtx def_reg; ++ ++ switch (get_attr_type (producer)) ++ { ++ /* Only post-update load/store instructions are considered. These ++ instructions produces address output at E2. */ ++ case TYPE_LOAD: ++ case TYPE_STORE: ++ case TYPE_LOAD_MULTIPLE: ++ case TYPE_STORE_MULTIPLE: ++ if (!post_update_insn_p (producer)) ++ return false; ++ ++ def_reg = extract_base_reg (producer); ++ break; ++ ++ case TYPE_ALU: ++ case TYPE_ALU_SHIFT: ++ case TYPE_PBSAD: ++ case TYPE_PBSADA: ++ case TYPE_MUL: ++ case TYPE_MAC: ++ def_reg = SET_DEST (PATTERN (producer)); ++ break; ++ ++ case TYPE_BRANCH: ++ return true; ++ ++ case TYPE_DIV: ++ /* Some special instructions, divmodsi4 and udivmodsi4, produce two ++ results, the quotient and the remainder. We have to handle them ++ individually. */ ++ if (divmod_p (producer)) ++ { ++ rtx def_reg1 = SET_DEST (parallel_element (producer, 0)); ++ rtx def_reg2 = SET_DEST (parallel_element (producer, 1)); ++ ++ return (n13_consumed_by_e1_dep_p (consumer, def_reg1) ++ || n13_consumed_by_e1_dep_p (consumer, def_reg2)); ++ } ++ ++ def_reg = SET_DEST (PATTERN (producer)); ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ ++ return n13_consumed_by_e1_dep_p (consumer, def_reg); ++} ++ ++/* Check dependencies from Load-Store Unit (E3) to E1. */ ++bool ++nds32_n13_load_to_e1_p (rtx_insn *producer, rtx_insn *consumer) ++{ ++ rtx def_reg = SET_DEST (PATTERN (producer)); ++ ++ gcc_assert (get_attr_type (producer) == TYPE_LOAD); ++ gcc_assert (REG_P (def_reg) || GET_CODE (def_reg) == SUBREG); ++ ++ return n13_consumed_by_e1_dep_p (consumer, def_reg); ++} ++ ++/* Check dependencies from Load-Store Unit (E3) to E2. */ ++bool ++nds32_n13_load_to_e2_p (rtx_insn *producer, rtx_insn *consumer) ++{ ++ rtx def_reg = SET_DEST (PATTERN (producer)); ++ ++ gcc_assert (get_attr_type (producer) == TYPE_LOAD); ++ gcc_assert (REG_P (def_reg) || GET_CODE (def_reg) == SUBREG); ++ ++ return n13_consumed_by_e2_dep_p (consumer, def_reg); ++} ++ ++/* Check dependencies from LMW(N, N) to E1. */ ++bool ++nds32_n13_last_load_to_e1_p (rtx_insn *producer, rtx_insn *consumer) ++{ ++ rtx last_def_reg = extract_nth_access_reg (producer, -1); ++ ++ return n13_consumed_by_e1_dep_p (consumer, last_def_reg); ++} ++ ++/* Check dependencies from LMW(N, N) to E2. */ ++bool ++nds32_n13_last_load_to_e2_p (rtx_insn *producer, rtx_insn *consumer) ++{ ++ rtx last_def_reg = extract_nth_access_reg (producer, -1); ++ ++ return n13_consumed_by_e2_dep_p (consumer, last_def_reg); ++} ++ ++/* Check dependencies from LMW(N, N-1) to E2. */ ++bool ++nds32_n13_last_two_load_to_e1_p (rtx_insn *producer, rtx_insn *consumer) ++{ ++ rtx last_two_def_reg = extract_nth_access_reg (producer, -2); ++ ++ if (last_two_def_reg == NULL_RTX) ++ return false; ++ ++ return n13_consumed_by_e1_dep_p (consumer, last_two_def_reg); ++} + /* ------------------------------------------------------------------------ */ +diff -urN gcc-8.2.0.orig/gcc/config/nds32/nds32-predicates.c gcc-8.2.0/gcc/config/nds32/nds32-predicates.c +--- gcc-8.2.0.orig/gcc/config/nds32/nds32-predicates.c 2018-05-07 04:22:07.000000000 +0200 ++++ gcc-8.2.0/gcc/config/nds32/nds32-predicates.c 2019-01-25 15:41:54.217806220 +0100 +@@ -356,8 +356,8 @@ + } + + /* Function to check if 'bclr' instruction can be used with IVAL. */ +-int +-nds32_can_use_bclr_p (int ival) ++bool ++nds32_can_use_bclr_p (HOST_WIDE_INT ival) + { + int one_bit_count; + unsigned HOST_WIDE_INT mask = GET_MODE_MASK (SImode); +@@ -373,8 +373,8 @@ + } + + /* Function to check if 'bset' instruction can be used with IVAL. */ +-int +-nds32_can_use_bset_p (int ival) ++bool ++nds32_can_use_bset_p (HOST_WIDE_INT ival) + { + int one_bit_count; + unsigned HOST_WIDE_INT mask = GET_MODE_MASK (SImode); +@@ -389,8 +389,8 @@ + } + + /* Function to check if 'btgl' instruction can be used with IVAL. */ +-int +-nds32_can_use_btgl_p (int ival) ++bool ++nds32_can_use_btgl_p (HOST_WIDE_INT ival) + { + int one_bit_count; + unsigned HOST_WIDE_INT mask = GET_MODE_MASK (SImode); +@@ -405,8 +405,8 @@ + } + + /* Function to check if 'bitci' instruction can be used with IVAL. */ +-int +-nds32_can_use_bitci_p (int ival) ++bool ++nds32_can_use_bitci_p (HOST_WIDE_INT ival) + { + /* If we are using V3 ISA, we have 'bitci' instruction. + Try to see if we can present 'andi' semantic with +@@ -518,4 +518,117 @@ + + return val >= lower && val < upper; + } ++ ++bool ++nds32_const_unspec_p (rtx x) ++{ ++ if (GET_CODE (x) == CONST) ++ { ++ x = XEXP (x, 0); ++ ++ if (GET_CODE (x) == PLUS) ++ x = XEXP (x, 0); ++ ++ if (GET_CODE (x) == UNSPEC) ++ { ++ switch (XINT (x, 1)) ++ { ++ case UNSPEC_GOTINIT: ++ case UNSPEC_GOT: ++ case UNSPEC_GOTOFF: ++ case UNSPEC_PLT: ++ case UNSPEC_TLSGD: ++ case UNSPEC_TLSLD: ++ case UNSPEC_TLSIE: ++ case UNSPEC_TLSLE: ++ return false; ++ default: ++ return true; ++ } ++ } ++ } ++ ++ if (GET_CODE (x) == SYMBOL_REF ++ && SYMBOL_REF_TLS_MODEL (x)) ++ return false; ++ ++ return true; ++} ++ ++HOST_WIDE_INT ++const_vector_to_hwint (rtx op) ++{ ++ HOST_WIDE_INT hwint = 0; ++ HOST_WIDE_INT mask; ++ int i; ++ int shift_adv; ++ int shift = 0; ++ int nelem; ++ ++ switch (GET_MODE (op)) ++ { ++ case E_V2HImode: ++ mask = 0xffff; ++ shift_adv = 16; ++ nelem = 2; ++ break; ++ case E_V4QImode: ++ mask = 0xff; ++ shift_adv = 8; ++ nelem = 4; ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ ++ if (TARGET_BIG_ENDIAN) ++ { ++ for (i = 0; i < nelem; ++i) ++ { ++ HOST_WIDE_INT val = XINT (XVECEXP (op, 0, nelem - i - 1), 0); ++ hwint |= (val & mask) << shift; ++ shift = shift + shift_adv; ++ } ++ } ++ else ++ { ++ for (i = 0; i < nelem; ++i) ++ { ++ HOST_WIDE_INT val = XINT (XVECEXP (op, 0, i), 0); ++ hwint |= (val & mask) << shift; ++ shift = shift + shift_adv; ++ } ++ } ++ ++ return hwint; ++} ++ ++bool ++nds32_valid_CVp5_p (rtx op) ++{ ++ HOST_WIDE_INT ival = const_vector_to_hwint (op); ++ return (ival < ((1 << 5) + 16)) && (ival >= (0 + 16)); ++} ++ ++bool ++nds32_valid_CVs5_p (rtx op) ++{ ++ HOST_WIDE_INT ival = const_vector_to_hwint (op); ++ return (ival < (1 << 4)) && (ival >= -(1 << 4)); ++} ++ ++bool ++nds32_valid_CVs2_p (rtx op) ++{ ++ HOST_WIDE_INT ival = const_vector_to_hwint (op); ++ return (ival < (1 << 19)) && (ival >= -(1 << 19)); ++} ++ ++bool ++nds32_valid_CVhi_p (rtx op) ++{ ++ HOST_WIDE_INT ival = const_vector_to_hwint (op); ++ return (ival != 0) && ((ival & 0xfff) == 0); ++} ++ + /* ------------------------------------------------------------------------ */ +diff -urN gcc-8.2.0.orig/gcc/config/nds32/nds32-protos.h gcc-8.2.0/gcc/config/nds32/nds32-protos.h +--- gcc-8.2.0.orig/gcc/config/nds32/nds32-protos.h 2018-04-22 11:05:10.000000000 +0200 ++++ gcc-8.2.0/gcc/config/nds32/nds32-protos.h 2019-01-25 15:38:32.833242671 +0100 +@@ -69,9 +69,10 @@ + + /* ------------------------------------------------------------------------ */ + +-/* Auxiliary functions for lwm/smw. */ ++/* Auxiliary functions for manipulation DI mode. */ + +-extern bool nds32_valid_smw_lwm_base_p (rtx); ++extern rtx nds32_di_high_part_subreg(rtx); ++extern rtx nds32_di_low_part_subreg(rtx); + + /* Auxiliary functions for expanding rtl used in nds32-multiple.md. */ + +@@ -116,6 +117,20 @@ + extern bool nds32_n9_3r2w_mm_to_ex_p (rtx_insn *, rtx_insn *); + extern bool nds32_n9_last_load_to_ex_p (rtx_insn *, rtx_insn *); + ++extern bool nds32_n10_ex_to_ex_p (rtx_insn *, rtx_insn *); ++extern bool nds32_n10_mm_to_ex_p (rtx_insn *, rtx_insn *); ++extern bool nds32_n10_last_load_to_ex_p (rtx_insn *, rtx_insn *); ++ ++extern bool nds32_gw_ex_to_ex_p (rtx_insn *, rtx_insn *); ++extern bool nds32_gw_mm_to_ex_p (rtx_insn *, rtx_insn *); ++extern bool nds32_gw_last_load_to_ex_p (rtx_insn *, rtx_insn *); ++ ++extern bool nds32_n13_e2_to_e1_p (rtx_insn *, rtx_insn *); ++extern bool nds32_n13_load_to_e1_p (rtx_insn *, rtx_insn *); ++extern bool nds32_n13_load_to_e2_p (rtx_insn *, rtx_insn *); ++extern bool nds32_n13_last_load_to_e1_p (rtx_insn *, rtx_insn *); ++extern bool nds32_n13_last_load_to_e2_p (rtx_insn *, rtx_insn *); ++extern bool nds32_n13_last_two_load_to_e1_p (rtx_insn *, rtx_insn *); + + /* Auxiliary functions for stack operation predicate checking. */ + +@@ -123,24 +138,25 @@ + + /* Auxiliary functions for bit operation detection. */ + +-extern int nds32_can_use_bclr_p (int); +-extern int nds32_can_use_bset_p (int); +-extern int nds32_can_use_btgl_p (int); ++extern bool nds32_can_use_bclr_p (HOST_WIDE_INT); ++extern bool nds32_can_use_bset_p (HOST_WIDE_INT); ++extern bool nds32_can_use_btgl_p (HOST_WIDE_INT); + +-extern int nds32_can_use_bitci_p (int); ++extern bool nds32_can_use_bitci_p (HOST_WIDE_INT); + + extern bool nds32_const_double_range_ok_p (rtx, machine_mode, + HOST_WIDE_INT, HOST_WIDE_INT); + ++extern bool nds32_const_unspec_p (rtx x); ++ + /* Auxiliary function for 'Computing the Length of an Insn'. */ + + extern int nds32_adjust_insn_length (rtx_insn *, int); + + /* Auxiliary functions for FP_AS_GP detection. */ + +-extern int nds32_fp_as_gp_check_available (void); +- + extern bool nds32_symbol_load_store_p (rtx_insn *); ++extern bool nds32_naked_function_p (tree); + + /* Auxiliary functions for jump table generation. */ + +@@ -159,10 +175,50 @@ + extern enum nds32_expand_result_type nds32_expand_movcc (rtx *); + extern void nds32_expand_float_movcc (rtx *); + ++/* Auxiliary functions for expand extv/insv instruction. */ ++ ++extern enum nds32_expand_result_type nds32_expand_extv (rtx *); ++extern enum nds32_expand_result_type nds32_expand_insv (rtx *); ++ ++/* Auxiliary functions for expand PIC instruction. */ ++ ++extern void nds32_expand_pic_move (rtx *); ++ ++/* Auxiliary functions to legitimize PIC address. */ ++ ++extern rtx nds32_legitimize_pic_address (rtx); ++ ++/* Auxiliary functions for expand TLS instruction. */ ++ ++extern void nds32_expand_tls_move (rtx *); ++ ++/* Auxiliary functions to legitimize TLS address. */ ++ ++extern rtx nds32_legitimize_tls_address (rtx); ++ ++/* Auxiliary functions to identify thread-local symbol. */ ++ ++extern bool nds32_tls_referenced_p (rtx); ++ ++/* Auxiliary functions for expand ICT instruction. */ ++ ++extern void nds32_expand_ict_move (rtx *); ++ ++/* Auxiliary functions to legitimize address for indirect-call symbol. */ ++ ++extern rtx nds32_legitimize_ict_address (rtx); ++ ++/* Auxiliary functions to identify indirect-call symbol. */ ++ ++extern bool nds32_indirect_call_referenced_p (rtx); + + /* Auxiliary functions to identify long-call symbol. */ + extern bool nds32_long_call_p (rtx); + ++/* Auxiliary functions to identify SYMBOL_REF and LABEL_REF pattern. */ ++ ++extern bool symbolic_reference_mentioned_p (rtx); ++ + /* Auxiliary functions to identify conditional move comparison operand. */ + + extern int nds32_cond_move_p (rtx); +@@ -185,6 +241,7 @@ + extern const char *nds32_output_float_load(rtx *); + extern const char *nds32_output_float_store(rtx *); + extern const char *nds32_output_smw_single_word (rtx *); ++extern const char *nds32_output_smw_double_word (rtx *); + extern const char *nds32_output_lmw_single_word (rtx *); + extern const char *nds32_output_double (rtx *, bool); + extern const char *nds32_output_cbranchsi4_equality_zero (rtx_insn *, rtx *); +@@ -193,9 +250,12 @@ + rtx *); + extern const char *nds32_output_cbranchsi4_greater_less_zero (rtx_insn *, rtx *); + ++extern const char *nds32_output_unpkd8 (rtx, rtx, rtx, rtx, bool); ++ + extern const char *nds32_output_call (rtx, rtx *, rtx, + const char *, const char *, bool); +- ++extern const char *nds32_output_tls_desc (rtx *); ++extern const char *nds32_output_tls_ie (rtx *); + + /* Auxiliary functions to output stack push/pop instruction. */ + +@@ -203,9 +263,19 @@ + extern const char *nds32_output_stack_pop (rtx); + extern const char *nds32_output_return (void); + ++ ++/* Auxiliary functions to split/output sms pattern. */ ++extern bool nds32_need_split_sms_p (rtx, rtx, rtx, rtx); ++extern const char *nds32_output_sms (rtx, rtx, rtx, rtx); ++extern void nds32_split_sms (rtx, rtx, rtx, rtx, rtx, rtx, rtx); ++ + /* Auxiliary functions to split double word RTX pattern. */ + + extern void nds32_spilt_doubleword (rtx *, bool); ++extern void nds32_split_ashiftdi3 (rtx, rtx, rtx); ++extern void nds32_split_ashiftrtdi3 (rtx, rtx, rtx); ++extern void nds32_split_lshiftrtdi3 (rtx, rtx, rtx); ++extern void nds32_split_rotatertdi3 (rtx, rtx, rtx); + + /* Auxiliary functions to split large constant RTX pattern. */ + +@@ -237,15 +307,29 @@ + extern void nds32_asm_file_start_for_isr (void); + extern void nds32_asm_file_end_for_isr (void); + extern bool nds32_isr_function_p (tree); ++extern bool nds32_isr_function_critical_p (tree); + + /* Auxiliary functions for cost calculation. */ + ++extern void nds32_init_rtx_costs (void); + extern bool nds32_rtx_costs_impl (rtx, machine_mode, int, int, int *, bool); + extern int nds32_address_cost_impl (rtx, machine_mode, addr_space_t, bool); + + /* Auxiliary functions for pre-define marco. */ + extern void nds32_cpu_cpp_builtins(struct cpp_reader *); + ++/* Auxiliary functions for const_vector's constraints. */ ++ ++extern HOST_WIDE_INT const_vector_to_hwint (rtx); ++extern bool nds32_valid_CVp5_p (rtx); ++extern bool nds32_valid_CVs5_p (rtx); ++extern bool nds32_valid_CVs2_p (rtx); ++extern bool nds32_valid_CVhi_p (rtx); ++ ++/* Auxiliary functions for lwm/smw. */ ++ ++extern bool nds32_valid_smw_lwm_base_p (rtx); ++ + extern bool nds32_split_double_word_load_store_p (rtx *,bool); + + namespace nds32 { +@@ -258,11 +342,13 @@ + bool store_single_p (rtx_insn *); + bool load_double_p (rtx_insn *); + bool store_double_p (rtx_insn *); ++bool store_offset_reg_p (rtx_insn *); + bool post_update_insn_p (rtx_insn *); + bool immed_offset_p (rtx); + int find_post_update_rtx (rtx_insn *); + rtx extract_mem_rtx (rtx_insn *); + rtx extract_base_reg (rtx_insn *); ++rtx extract_offset_rtx (rtx_insn *); + + rtx extract_shift_reg (rtx); + +@@ -271,6 +357,8 @@ + + rtx extract_mac_non_acc_rtx (rtx_insn *); + ++bool divmod_p (rtx_insn *); ++ + rtx extract_branch_target_rtx (rtx_insn *); + rtx extract_branch_condition_rtx (rtx_insn *); + } // namespace nds32 +@@ -279,5 +367,6 @@ + + /* Functions for create nds32 specific optimization pass. */ + extern rtl_opt_pass *make_pass_nds32_relax_opt (gcc::context *); ++extern rtl_opt_pass *make_pass_nds32_fp_as_gp (gcc::context *); + + /* ------------------------------------------------------------------------ */ +diff -urN gcc-8.2.0.orig/gcc/config/nds32/nds32-relax-opt.c gcc-8.2.0/gcc/config/nds32/nds32-relax-opt.c +--- gcc-8.2.0.orig/gcc/config/nds32/nds32-relax-opt.c 2018-04-01 12:07:40.000000000 +0200 ++++ gcc-8.2.0/gcc/config/nds32/nds32-relax-opt.c 2019-01-25 15:38:32.833242671 +0100 +@@ -52,6 +52,8 @@ + #include "cfgrtl.h" + #include "tree-pass.h" + ++using namespace nds32; ++ + /* This is used to create unique relax hint id value. + The initial value is 0. */ + static int relax_group_id = 0; +@@ -185,6 +187,121 @@ + return false; + } + ++/* Return true if x is const and the referance is ict symbol. */ ++static bool ++nds32_ict_const_p (rtx x) ++{ ++ if (GET_CODE (x) == CONST) ++ { ++ x = XEXP (x, 0); ++ return nds32_indirect_call_referenced_p (x); ++ } ++ return FALSE; ++} ++ ++/* Group the following pattern as relax candidates: ++ ++ GOT: ++ sethi $ra, hi20(sym) ++ ori $ra, $ra, lo12(sym) ++ lw $rb, [$ra + $gp] ++ ++ GOTOFF, TLSLE: ++ sethi $ra, hi20(sym) ++ ori $ra, $ra, lo12(sym) ++ LS $rb, [$ra + $gp] ++ ++ GOTOFF, TLSLE: ++ sethi $ra, hi20(sym) ++ ori $ra, $ra, lo12(sym) ++ add $rb, $ra, $gp($tp) ++ ++ Initial GOT table: ++ sethi $gp,hi20(sym) ++ ori $gp, $gp, lo12(sym) ++ add5.pc $gp */ ++ ++static auto_vec<rtx_insn *, 32> nds32_group_infos; ++/* Group the PIC and TLS relax candidate instructions for linker. */ ++static bool ++nds32_pic_tls_group (rtx_insn *def_insn, ++ enum nds32_relax_insn_type relax_type, ++ int sym_type) ++{ ++ df_ref def_record; ++ df_link *link; ++ rtx_insn *use_insn = NULL; ++ rtx pat, new_pat; ++ def_record = DF_INSN_DEFS (def_insn); ++ for (link = DF_REF_CHAIN (def_record); link; link = link->next) ++ { ++ if (!DF_REF_INSN_INFO (link->ref)) ++ continue; ++ ++ use_insn = DF_REF_INSN (link->ref); ++ ++ /* Skip if define insn and use insn not in the same basic block. */ ++ if (!dominated_by_p (CDI_DOMINATORS, ++ BLOCK_FOR_INSN (use_insn), ++ BLOCK_FOR_INSN (def_insn))) ++ return FALSE; ++ ++ /* Skip if use_insn not active insn. */ ++ if (!active_insn_p (use_insn)) ++ return FALSE; ++ ++ switch (relax_type) ++ { ++ case RELAX_ORI: ++ ++ /* GOTOFF, TLSLE: ++ sethi $ra, hi20(sym) ++ ori $ra, $ra, lo12(sym) ++ add $rb, $ra, $gp($tp) */ ++ if ((sym_type == UNSPEC_TLSLE ++ || sym_type == UNSPEC_GOTOFF) ++ && (recog_memoized (use_insn) == CODE_FOR_addsi3)) ++ { ++ pat = XEXP (PATTERN (use_insn), 1); ++ new_pat = ++ gen_rtx_UNSPEC (SImode, ++ gen_rtvec (2, XEXP (pat, 0), XEXP (pat, 1)), ++ UNSPEC_ADD32); ++ validate_replace_rtx (pat, new_pat, use_insn); ++ nds32_group_infos.safe_push (use_insn); ++ } ++ else if (nds32_plus_reg_load_store_p (use_insn) ++ && !nds32_sp_base_or_plus_load_store_p (use_insn)) ++ nds32_group_infos.safe_push (use_insn); ++ else ++ return FALSE; ++ break; ++ ++ default: ++ return FALSE; ++ } ++ } ++ return TRUE; ++} ++ ++static int ++nds32_pic_tls_symbol_type (rtx x) ++{ ++ x = XEXP (SET_SRC (PATTERN (x)), 1); ++ ++ if (GET_CODE (x) == CONST) ++ { ++ x = XEXP (x, 0); ++ ++ if (GET_CODE (x) == PLUS) ++ x = XEXP (x, 0); ++ ++ return XINT (x, 1); ++ } ++ ++ return XINT (x, 1); ++} ++ + /* Group the relax candidates with group id. */ + static void + nds32_group_insns (rtx sethi) +@@ -193,6 +310,7 @@ + df_link *link; + rtx_insn *use_insn = NULL; + rtx group_id; ++ bool valid; + + def_record = DF_INSN_DEFS (sethi); + +@@ -242,6 +360,132 @@ + /* Insert .relax_* directive. */ + if (active_insn_p (use_insn)) + emit_insn_before (gen_relax_group (group_id), use_insn); ++ ++ /* Find ori ra, ra, unspec(symbol) instruction. */ ++ if (use_insn != NULL ++ && recog_memoized (use_insn) == CODE_FOR_lo_sum ++ && !nds32_const_unspec_p (XEXP (SET_SRC (PATTERN (use_insn)), 1))) ++ { ++ int sym_type = nds32_pic_tls_symbol_type (use_insn); ++ valid = nds32_pic_tls_group (use_insn, RELAX_ORI, sym_type); ++ ++ /* Insert .relax_* directive. */ ++ while (!nds32_group_infos.is_empty ()) ++ { ++ use_insn = nds32_group_infos.pop (); ++ if (valid) ++ emit_insn_before (gen_relax_group (group_id), use_insn); ++ } ++ } ++ } ++ ++ relax_group_id++; ++} ++ ++/* Convert relax group id in rtl. */ ++ ++static void ++nds32_group_tls_insn (rtx insn) ++{ ++ rtx pat = PATTERN (insn); ++ rtx unspec_relax_group = XEXP (XVECEXP (pat, 0, 1), 0); ++ ++ while (GET_CODE (pat) != SET && GET_CODE (pat) == PARALLEL) ++ { ++ pat = XVECEXP (pat, 0, 0); ++ } ++ ++ if (GET_CODE (unspec_relax_group) == UNSPEC ++ && XINT (unspec_relax_group, 1) == UNSPEC_VOLATILE_RELAX_GROUP) ++ { ++ XVECEXP (unspec_relax_group, 0, 0) = GEN_INT (relax_group_id); ++ } ++ ++ relax_group_id++; ++} ++ ++static bool ++nds32_float_reg_load_store_p (rtx_insn *insn) ++{ ++ rtx pat = PATTERN (insn); ++ ++ if (get_attr_type (insn) == TYPE_FLOAD ++ && GET_CODE (pat) == SET ++ && (GET_MODE (XEXP (pat, 0)) == SFmode ++ || GET_MODE (XEXP (pat, 0)) == DFmode) ++ && MEM_P (XEXP (pat, 1))) ++ { ++ rtx addr = XEXP (XEXP (pat, 1), 0); ++ ++ /* [$ra] */ ++ if (REG_P (addr)) ++ return true; ++ /* [$ra + offset] */ ++ if (GET_CODE (addr) == PLUS ++ && REG_P (XEXP (addr, 0)) ++ && CONST_INT_P (XEXP (addr, 1))) ++ return true; ++ } ++ return false; ++} ++ ++ ++/* Group float load-store instructions: ++ la $ra, symbol ++ flsi $rt, [$ra + offset] */ ++ ++static void ++nds32_group_float_insns (rtx insn) ++{ ++ df_ref def_record, use_record; ++ df_link *link; ++ rtx_insn *use_insn = NULL; ++ rtx group_id; ++ ++ def_record = DF_INSN_DEFS (insn); ++ ++ for (link = DF_REF_CHAIN (def_record); link; link = link->next) ++ { ++ if (!DF_REF_INSN_INFO (link->ref)) ++ continue; ++ ++ use_insn = DF_REF_INSN (link->ref); ++ ++ /* Skip if define insn and use insn not in the same basic block. */ ++ if (!dominated_by_p (CDI_DOMINATORS, ++ BLOCK_FOR_INSN (use_insn), ++ BLOCK_FOR_INSN (insn))) ++ return; ++ ++ /* Skip if the low-part used register is from different high-part ++ instructions. */ ++ use_record = DF_INSN_USES (use_insn); ++ if (DF_REF_CHAIN (use_record) && DF_REF_CHAIN (use_record)->next) ++ return; ++ ++ /* Skip if use_insn not active insn. */ ++ if (!active_insn_p (use_insn)) ++ return; ++ ++ if (!nds32_float_reg_load_store_p (use_insn) ++ || find_post_update_rtx (use_insn) != -1) ++ return; ++ } ++ ++ group_id = GEN_INT (relax_group_id); ++ /* Insert .relax_* directive for insn. */ ++ emit_insn_before (gen_relax_group (group_id), insn); ++ ++ /* Scan the use insns and insert the directive. */ ++ for (link = DF_REF_CHAIN (def_record); link; link = link->next) ++ { ++ if (!DF_REF_INSN_INFO (link->ref)) ++ continue; ++ ++ use_insn = DF_REF_INSN (link->ref); ++ ++ /* Insert .relax_* directive. */ ++ emit_insn_before (gen_relax_group (group_id), use_insn); + } + + relax_group_id++; +@@ -271,8 +515,21 @@ + /* Find sethi ra, symbol instruction. */ + if (recog_memoized (insn) == CODE_FOR_sethi + && nds32_symbolic_operand (XEXP (SET_SRC (PATTERN (insn)), 0), +- SImode)) ++ SImode) ++ && !nds32_ict_const_p (XEXP (SET_SRC (PATTERN (insn)), 0))) + nds32_group_insns (insn); ++ else if (recog_memoized (insn) == CODE_FOR_tls_ie) ++ nds32_group_tls_insn (insn); ++ else if (TARGET_FPU_SINGLE ++ && recog_memoized (insn) == CODE_FOR_move_addr ++ && !nds32_ict_const_p (XEXP (SET_SRC (PATTERN (insn)), 0))) ++ { ++ nds32_group_float_insns (insn); ++ } ++ } ++ else if (CALL_P (insn) && recog_memoized (insn) == CODE_FOR_tls_desc) ++ { ++ nds32_group_tls_insn (insn); + } + } + +diff -urN gcc-8.2.0.orig/gcc/config/nds32/nds32-utils.c gcc-8.2.0/gcc/config/nds32/nds32-utils.c +--- gcc-8.2.0.orig/gcc/config/nds32/nds32-utils.c 2018-04-08 10:31:52.000000000 +0200 ++++ gcc-8.2.0/gcc/config/nds32/nds32-utils.c 2019-01-25 15:38:32.833242671 +0100 +@@ -142,6 +142,23 @@ + return true; + } + ++bool ++store_offset_reg_p (rtx_insn *insn) ++{ ++ if (get_attr_type (insn) != TYPE_STORE) ++ return false; ++ ++ rtx offset_rtx = extract_offset_rtx (insn); ++ ++ if (offset_rtx == NULL_RTX) ++ return false; ++ ++ if (REG_P (offset_rtx)) ++ return true; ++ ++ return false; ++} ++ + /* Determine if INSN is a post update insn. */ + bool + post_update_insn_p (rtx_insn *insn) +@@ -316,22 +333,114 @@ + if (REG_P (XEXP (mem_rtx, 0))) + return XEXP (mem_rtx, 0); + ++ /* (mem (lo_sum (reg) (symbol_ref)) */ ++ if (GET_CODE (XEXP (mem_rtx, 0)) == LO_SUM) ++ return XEXP (XEXP (mem_rtx, 0), 0); ++ + plus_rtx = XEXP (mem_rtx, 0); + + if (GET_CODE (plus_rtx) == SYMBOL_REF + || GET_CODE (plus_rtx) == CONST) + return NULL_RTX; + +- gcc_assert (GET_CODE (plus_rtx) == PLUS +- || GET_CODE (plus_rtx) == POST_INC +- || GET_CODE (plus_rtx) == POST_DEC +- || GET_CODE (plus_rtx) == POST_MODIFY); +- gcc_assert (REG_P (XEXP (plus_rtx, 0))); + /* (mem (plus (reg) (const_int))) or ++ (mem (plus (mult (reg) (const_int 4)) (reg))) or + (mem (post_inc (reg))) or + (mem (post_dec (reg))) or + (mem (post_modify (reg) (plus (reg) (reg)))) */ +- return XEXP (plus_rtx, 0); ++ gcc_assert (GET_CODE (plus_rtx) == PLUS ++ || GET_CODE (plus_rtx) == POST_INC ++ || GET_CODE (plus_rtx) == POST_DEC ++ || GET_CODE (plus_rtx) == POST_MODIFY); ++ ++ if (REG_P (XEXP (plus_rtx, 0))) ++ return XEXP (plus_rtx, 0); ++ ++ gcc_assert (REG_P (XEXP (plus_rtx, 1))); ++ return XEXP (plus_rtx, 1); ++} ++ ++/* Extract the offset rtx from load/store insns. The function returns ++ NULL_RTX if offset is absent. */ ++rtx ++extract_offset_rtx (rtx_insn *insn) ++{ ++ rtx mem_rtx; ++ rtx plus_rtx; ++ rtx offset_rtx; ++ ++ /* Find the MEM rtx. The multiple load/store insns doens't have ++ the offset field so we can return NULL_RTX here. */ ++ switch (get_attr_type (insn)) ++ { ++ case TYPE_LOAD_MULTIPLE: ++ case TYPE_STORE_MULTIPLE: ++ return NULL_RTX; ++ ++ case TYPE_LOAD: ++ case TYPE_FLOAD: ++ case TYPE_STORE: ++ case TYPE_FSTORE: ++ mem_rtx = extract_mem_rtx (insn); ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ ++ gcc_assert (MEM_P (mem_rtx)); ++ ++ /* (mem (reg)) */ ++ if (REG_P (XEXP (mem_rtx, 0))) ++ return NULL_RTX; ++ ++ plus_rtx = XEXP (mem_rtx, 0); ++ ++ switch (GET_CODE (plus_rtx)) ++ { ++ case SYMBOL_REF: ++ case CONST: ++ case POST_INC: ++ case POST_DEC: ++ return NULL_RTX; ++ ++ case PLUS: ++ /* (mem (plus (reg) (const_int))) or ++ (mem (plus (mult (reg) (const_int 4)) (reg))) */ ++ if (REG_P (XEXP (plus_rtx, 0))) ++ offset_rtx = XEXP (plus_rtx, 1); ++ else ++ { ++ gcc_assert (REG_P (XEXP (plus_rtx, 1))); ++ offset_rtx = XEXP (plus_rtx, 0); ++ } ++ ++ if (ARITHMETIC_P (offset_rtx)) ++ { ++ gcc_assert (GET_CODE (offset_rtx) == MULT); ++ gcc_assert (REG_P (XEXP (offset_rtx, 0))); ++ offset_rtx = XEXP (offset_rtx, 0); ++ } ++ break; ++ ++ case LO_SUM: ++ /* (mem (lo_sum (reg) (symbol_ref)) */ ++ offset_rtx = XEXP (plus_rtx, 1); ++ break; ++ ++ case POST_MODIFY: ++ /* (mem (post_modify (reg) (plus (reg) (reg / const_int)))) */ ++ gcc_assert (REG_P (XEXP (plus_rtx, 0))); ++ plus_rtx = XEXP (plus_rtx, 1); ++ gcc_assert (GET_CODE (plus_rtx) == PLUS); ++ offset_rtx = XEXP (plus_rtx, 0); ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ ++ return offset_rtx; + } + + /* Extract the register of the shift operand from an ALU_SHIFT rtx. */ +@@ -413,6 +522,7 @@ + switch (get_attr_type (insn)) + { + case TYPE_MAC: ++ case TYPE_DMAC: + if (REG_P (XEXP (exp, 0))) + return XEXP (exp, 1); + else +@@ -423,6 +533,19 @@ + } + } + ++/* Check if the DIV insn needs two write ports. */ ++bool ++divmod_p (rtx_insn *insn) ++{ ++ gcc_assert (get_attr_type (insn) == TYPE_DIV); ++ ++ if (INSN_CODE (insn) == CODE_FOR_divmodsi4 ++ || INSN_CODE (insn) == CODE_FOR_udivmodsi4) ++ return true; ++ ++ return false; ++} ++ + /* Extract the rtx representing the branch target to help recognize + data hazards. */ + rtx +diff -urN gcc-8.2.0.orig/gcc/config/nds32/pipelines.md gcc-8.2.0/gcc/config/nds32/pipelines.md +--- gcc-8.2.0.orig/gcc/config/nds32/pipelines.md 2018-04-08 11:21:30.000000000 +0200 ++++ gcc-8.2.0/gcc/config/nds32/pipelines.md 2019-01-25 15:38:32.833242671 +0100 +@@ -44,6 +44,24 @@ + + + ;; ------------------------------------------------------------------------ ++;; Include N10 pipeline settings. ++;; ------------------------------------------------------------------------ ++(include "nds32-n10.md") ++ ++ ++;; ------------------------------------------------------------------------ ++;; Include Graywolf pipeline settings. ++;; ------------------------------------------------------------------------ ++(include "nds32-graywolf.md") ++ ++ ++;; ------------------------------------------------------------------------ ++;; Include N12/N13 pipeline settings. ++;; ------------------------------------------------------------------------ ++(include "nds32-n13.md") ++ ++ ++;; ------------------------------------------------------------------------ + ;; Define simple pipeline settings. + ;; ------------------------------------------------------------------------ + +diff -urN gcc-8.2.0.orig/gcc/config/nds32/predicates.md gcc-8.2.0/gcc/config/nds32/predicates.md +--- gcc-8.2.0.orig/gcc/config/nds32/predicates.md 2018-04-06 07:51:33.000000000 +0200 ++++ gcc-8.2.0/gcc/config/nds32/predicates.md 2019-01-25 15:38:32.833242671 +0100 +@@ -40,7 +40,15 @@ + (match_code "mult,and,ior,xor")) + + (define_predicate "nds32_symbolic_operand" +- (match_code "const,symbol_ref,label_ref")) ++ (and (match_code "const,symbol_ref,label_ref") ++ (match_test "!(TARGET_ICT_MODEL_LARGE ++ && nds32_indirect_call_referenced_p (op))"))) ++ ++(define_predicate "nds32_nonunspec_symbolic_operand" ++ (and (match_code "const,symbol_ref,label_ref") ++ (match_test "!flag_pic && nds32_const_unspec_p (op) ++ && !(TARGET_ICT_MODEL_LARGE ++ && nds32_indirect_call_referenced_p (op))"))) + + (define_predicate "nds32_reg_constant_operand" + (ior (match_operand 0 "register_operand") +@@ -56,14 +64,51 @@ + (and (match_operand 0 "const_int_operand") + (match_test "satisfies_constraint_Is11 (op)")))) + ++(define_predicate "nds32_imm_0_1_operand" ++ (and (match_operand 0 "const_int_operand") ++ (ior (match_test "satisfies_constraint_Iv00 (op)") ++ (match_test "satisfies_constraint_Iv01 (op)")))) ++ ++(define_predicate "nds32_imm_1_2_operand" ++ (and (match_operand 0 "const_int_operand") ++ (ior (match_test "satisfies_constraint_Iv01 (op)") ++ (match_test "satisfies_constraint_Iv02 (op)")))) ++ ++(define_predicate "nds32_imm_1_2_4_8_operand" ++ (and (match_operand 0 "const_int_operand") ++ (ior (ior (match_test "satisfies_constraint_Iv01 (op)") ++ (match_test "satisfies_constraint_Iv02 (op)")) ++ (ior (match_test "satisfies_constraint_Iv04 (op)") ++ (match_test "satisfies_constraint_Iv08 (op)"))))) ++ ++(define_predicate "nds32_imm2u_operand" ++ (and (match_operand 0 "const_int_operand") ++ (match_test "satisfies_constraint_Iu02 (op)"))) ++ ++(define_predicate "nds32_imm4u_operand" ++ (and (match_operand 0 "const_int_operand") ++ (match_test "satisfies_constraint_Iu04 (op)"))) ++ + (define_predicate "nds32_imm5u_operand" + (and (match_operand 0 "const_int_operand") + (match_test "satisfies_constraint_Iu05 (op)"))) + ++(define_predicate "nds32_imm6u_operand" ++ (and (match_operand 0 "const_int_operand") ++ (match_test "satisfies_constraint_Iu06 (op)"))) ++ ++(define_predicate "nds32_rimm4u_operand" ++ (ior (match_operand 0 "register_operand") ++ (match_operand 0 "nds32_imm4u_operand"))) ++ + (define_predicate "nds32_rimm5u_operand" + (ior (match_operand 0 "register_operand") + (match_operand 0 "nds32_imm5u_operand"))) + ++(define_predicate "nds32_rimm6u_operand" ++ (ior (match_operand 0 "register_operand") ++ (match_operand 0 "nds32_imm6u_operand"))) ++ + (define_predicate "nds32_move_operand" + (and (match_operand 0 "general_operand") + (not (match_code "high,const,symbol_ref,label_ref"))) +@@ -78,6 +123,20 @@ + return true; + }) + ++(define_predicate "nds32_vmove_operand" ++ (and (match_operand 0 "general_operand") ++ (not (match_code "high,const,symbol_ref,label_ref"))) ++{ ++ /* If the constant op does NOT satisfy Is20 nor Ihig, ++ we can not perform move behavior by a single instruction. */ ++ if (GET_CODE (op) == CONST_VECTOR ++ && !satisfies_constraint_CVs2 (op) ++ && !satisfies_constraint_CVhi (op)) ++ return false; ++ ++ return true; ++}) ++ + (define_predicate "nds32_and_operand" + (match_operand 0 "nds32_reg_constant_operand") + { +@@ -127,6 +186,15 @@ + (ior (match_operand 0 "nds32_symbolic_operand") + (match_operand 0 "nds32_general_register_operand"))) + ++(define_predicate "nds32_insv_operand" ++ (match_code "const_int") ++{ ++ return INTVAL (op) == 0 ++ || INTVAL (op) == 8 ++ || INTVAL (op) == 16 ++ || INTVAL (op) == 24; ++}) ++ + (define_predicate "nds32_lmw_smw_base_operand" + (and (match_code "mem") + (match_test "nds32_valid_smw_lwm_base_p (op)"))) +diff -urN gcc-8.2.0.orig/gcc/config/nds32/t-elf gcc-8.2.0/gcc/config/nds32/t-elf +--- gcc-8.2.0.orig/gcc/config/nds32/t-elf 1970-01-01 01:00:00.000000000 +0100 ++++ gcc-8.2.0/gcc/config/nds32/t-elf 2019-01-25 15:38:32.833242671 +0100 +@@ -0,0 +1,42 @@ ++# The multilib settings of Andes NDS32 cpu for GNU compiler ++# Copyright (C) 2012-2018 Free Software Foundation, Inc. ++# Contributed by Andes Technology Corporation. ++# ++# This file is part of GCC. ++# ++# GCC is free software; you can redistribute it and/or modify it ++# under the terms of the GNU General Public License as published ++# by the Free Software Foundation; either version 3, or (at your ++# option) any later version. ++# ++# GCC is distributed in the hope that it will be useful, but WITHOUT ++# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ++# License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with GCC; see the file COPYING3. If not see ++# <http://www.gnu.org/licenses/>. ++ ++# We also define a macro MULTILIB_DEFAULTS in nds32.h that tells the ++# driver program which options are defaults for this target and thus ++# do not need to be handled specially. ++MULTILIB_OPTIONS += mcmodel=small/mcmodel=medium/mcmodel=large mvh ++ ++ifneq ($(filter graywolf,$(TM_MULTILIB_CONFIG)),) ++MULTILIB_OPTIONS += mcpu=graywolf ++endif ++ ++ifneq ($(filter dsp,$(TM_MULTILIB_CONFIG)),) ++MULTILIB_OPTIONS += mext-dsp ++endif ++ ++ifneq ($(filter zol,$(TM_MULTILIB_CONFIG)),) ++MULTILIB_OPTIONS += mext-zol ++endif ++ ++ifneq ($(filter v3m+,$(TM_MULTILIB_CONFIG)),) ++MULTILIB_OPTIONS += march=v3m+ ++endif ++ ++# ------------------------------------------------------------------------ +diff -urN gcc-8.2.0.orig/gcc/config/nds32/t-linux gcc-8.2.0/gcc/config/nds32/t-linux +--- gcc-8.2.0.orig/gcc/config/nds32/t-linux 1970-01-01 01:00:00.000000000 +0100 ++++ gcc-8.2.0/gcc/config/nds32/t-linux 2019-01-25 15:38:32.833242671 +0100 +@@ -0,0 +1,26 @@ ++# The multilib settings of Andes NDS32 cpu for GNU compiler ++# Copyright (C) 2012-2018 Free Software Foundation, Inc. ++# Contributed by Andes Technology Corporation. ++# ++# This file is part of GCC. ++# ++# GCC is free software; you can redistribute it and/or modify it ++# under the terms of the GNU General Public License as published ++# by the Free Software Foundation; either version 3, or (at your ++# option) any later version. ++# ++# GCC is distributed in the hope that it will be useful, but WITHOUT ++# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ++# License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with GCC; see the file COPYING3. If not see ++# <http://www.gnu.org/licenses/>. ++ ++# We also define a macro MULTILIB_DEFAULTS in nds32.h that tells the ++# driver program which options are defaults for this target and thus ++# do not need to be handled specially. ++MULTILIB_OPTIONS += ++ ++# ------------------------------------------------------------------------ +diff -urN gcc-8.2.0.orig/gcc/config.gcc gcc-8.2.0/gcc/config.gcc +--- gcc-8.2.0.orig/gcc/config.gcc 2018-06-25 21:34:01.000000000 +0200 ++++ gcc-8.2.0/gcc/config.gcc 2019-01-25 15:38:32.821242637 +0100 +@@ -445,7 +445,17 @@ + ;; + nds32*) + cpu_type=nds32 +- extra_headers="nds32_intrinsic.h" ++ extra_headers="nds32_intrinsic.h nds32_isr.h nds32_init.inc" ++ case ${target} in ++ nds32*-*-linux*) ++ extra_options="${extra_options} nds32/nds32-linux.opt" ++ ;; ++ nds32*-*-elf*) ++ extra_options="${extra_options} nds32/nds32-elf.opt" ++ ;; ++ *) ++ ;; ++ esac + extra_objs="nds32-cost.o nds32-intrinsic.o nds32-isr.o nds32-md-auxiliary.o nds32-pipelines-auxiliary.o nds32-predicates.o nds32-memory-manipulation.o nds32-fp-as-gp.o nds32-relax-opt.o nds32-utils.o" + ;; + nios2-*-*) +@@ -2335,17 +2345,36 @@ + tmake_file="${tmake_file} msp430/t-msp430" + extra_gcc_objs="driver-msp430.o" + ;; +-nds32le-*-*) ++nds32*-*-*) + target_cpu_default="0" + tm_defines="${tm_defines}" +- tm_file="dbxelf.h elfos.h newlib-stdint.h ${tm_file} nds32/nds32_intrinsic.h" +- tmake_file="nds32/t-nds32 nds32/t-mlibs" +- ;; +-nds32be-*-*) +- target_cpu_default="0|MASK_BIG_ENDIAN" +- tm_defines="${tm_defines} TARGET_BIG_ENDIAN_DEFAULT=1" +- tm_file="dbxelf.h elfos.h newlib-stdint.h ${tm_file} nds32/nds32_intrinsic.h" +- tmake_file="nds32/t-nds32 nds32/t-mlibs" ++ case ${target} in ++ nds32le*-*-*) ++ ;; ++ nds32be-*-*) ++ target_cpu_default="${target_cpu_default}|MASK_BIG_ENDIAN" ++ tm_defines="${tm_defines} TARGET_BIG_ENDIAN_DEFAULT=1" ++ ;; ++ esac ++ case ${target} in ++ nds32*-*-elf*) ++ tm_file="dbxelf.h elfos.h newlib-stdint.h ${tm_file} nds32/elf.h nds32/nds32_intrinsic.h" ++ tmake_file="nds32/t-nds32 nds32/t-elf" ++ ;; ++ nds32*-*-linux*) ++ tm_file="dbxelf.h elfos.h ${tm_file} gnu-user.h linux.h glibc-stdint.h nds32/linux.h nds32/nds32_intrinsic.h" ++ tmake_file="${tmake_file} nds32/t-nds32 nds32/t-linux" ++ ;; ++ esac ++ ++ # Handle --enable-default-relax setting. ++ if test x${enable_default_relax} = xyes; then ++ tm_defines="${tm_defines} TARGET_DEFAULT_RELAX=1" ++ fi ++ # Handle --with-ext-dsp ++ if test x${with_ext_dsp} = xyes; then ++ tm_defines="${tm_defines} TARGET_DEFAULT_EXT_DSP=1" ++ fi + ;; + nios2-*-*) + tm_file="elfos.h ${tm_file}" +@@ -4318,11 +4347,11 @@ + "") + with_cpu=n9 + ;; +- n6 | n7 | n8 | e8 | s8 | n9) ++ n6 | n7 |n8 | e8 | s8 | n9 | n10 | d10 | n12 | n13 | n15) + # OK + ;; + *) +- echo "Cannot accept --with-cpu=$with_cpu, available values are: n6 n7 n8 e8 s8 n9" 1>&2 ++ echo "Cannot accept --with-cpu=$with_cpu, available values are: n6 n7 n8 e8 s8 n9 n10 d10 n12 n13 n15" 1>&2 + exit 1 + ;; + esac +@@ -4332,15 +4361,30 @@ + "") + # the default library is newlib + with_nds32_lib=newlib ++ tm_defines="${tm_defines} TARGET_DEFAULT_CTOR_DTOR=1" + ;; + newlib) + # OK ++ tm_defines="${tm_defines} TARGET_DEFAULT_CTOR_DTOR=1" + ;; + mculib) + # OK ++ # for the arch=v3f or arch=v3s under mculib toolchain, ++ # we would like to set -fno-math-errno as default ++ case "${with_arch}" in ++ v3f | v3s) ++ tm_defines="${tm_defines} TARGET_DEFAULT_NO_MATH_ERRNO=1" ++ ;; ++ esac ++ ;; ++ glibc) ++ # OK ++ tm_defines="${tm_defines}" ++ ;; ++ uclibc) + ;; + *) +- echo "Cannot accept --with-nds32-lib=$with_nds32_lib, available values are: newlib mculib" 1>&2 ++ echo "Cannot accept --with-nds32-lib=$with_nds32_lib, available values are: newlib mculib glibc uclibc" 1>&2 + exit 1 + ;; + esac +diff -urN gcc-8.2.0.orig/gcc/configure gcc-8.2.0/gcc/configure +--- gcc-8.2.0.orig/gcc/configure 2018-04-18 11:46:58.000000000 +0200 ++++ gcc-8.2.0/gcc/configure 2019-01-25 15:38:32.837242683 +0100 +@@ -27784,7 +27784,7 @@ + # version to the per-target configury. + case "$cpu_type" in + aarch64 | alpha | arc | arm | avr | bfin | cris | i386 | m32c | m68k \ +- | microblaze | mips | nios2 | pa | riscv | rs6000 | score | sparc | spu \ ++ | microblaze | mips | nds32 | nios2 | pa | riscv | rs6000 | score | sparc | spu \ + | tilegx | tilepro | visium | xstormy16 | xtensa) + insn="nop" + ;; +diff -urN gcc-8.2.0.orig/gcc/configure.ac gcc-8.2.0/gcc/configure.ac +--- gcc-8.2.0.orig/gcc/configure.ac 2018-04-18 11:46:58.000000000 +0200 ++++ gcc-8.2.0/gcc/configure.ac 2019-01-25 15:38:32.837242683 +0100 +@@ -4910,7 +4910,7 @@ + # version to the per-target configury. + case "$cpu_type" in + aarch64 | alpha | arc | arm | avr | bfin | cris | i386 | m32c | m68k \ +- | microblaze | mips | nios2 | pa | riscv | rs6000 | score | sparc | spu \ ++ | microblaze | mips | nds32 | nios2 | pa | riscv | rs6000 | score | sparc | spu \ + | tilegx | tilepro | visium | xstormy16 | xtensa) + insn="nop" + ;; +diff -urN gcc-8.2.0.orig/gcc/testsuite/gcc.c-torture/execute/20010122-1.c gcc-8.2.0/gcc/testsuite/gcc.c-torture/execute/20010122-1.c +--- gcc-8.2.0.orig/gcc/testsuite/gcc.c-torture/execute/20010122-1.c 2015-12-10 20:20:14.000000000 +0100 ++++ gcc-8.2.0/gcc/testsuite/gcc.c-torture/execute/20010122-1.c 2019-01-25 15:38:32.837242683 +0100 +@@ -1,4 +1,5 @@ + /* { dg-skip-if "requires frame pointers" { *-*-* } "-fomit-frame-pointer" "" } */ ++/* { dg-additional-options "-malways-save-lp" { target nds32*-*-* } } */ + /* { dg-require-effective-target return_address } */ + + extern void exit (int); +diff -urN gcc-8.2.0.orig/gcc/testsuite/gcc.dg/lower-subreg-1.c gcc-8.2.0/gcc/testsuite/gcc.dg/lower-subreg-1.c +--- gcc-8.2.0.orig/gcc/testsuite/gcc.dg/lower-subreg-1.c 2017-06-17 17:32:28.000000000 +0200 ++++ gcc-8.2.0/gcc/testsuite/gcc.dg/lower-subreg-1.c 2019-01-25 15:38:32.837242683 +0100 +@@ -1,4 +1,4 @@ +-/* { dg-do compile { target { ! { mips64 || { aarch64*-*-* arm*-*-* ia64-*-* sparc*-*-* spu-*-* tilegx-*-* } } } } } */ ++/* { dg-do compile { target { ! { mips64 || { aarch64*-*-* arm*-*-* ia64-*-* nds32*-*-* sparc*-*-* spu-*-* tilegx-*-* } } } } } */ + /* { dg-options "-O -fdump-rtl-subreg1" } */ + /* { dg-additional-options "-mno-stv" { target ia32 } } */ + /* { dg-skip-if "" { { i?86-*-* x86_64-*-* } && x32 } } */ +diff -urN gcc-8.2.0.orig/gcc/testsuite/gcc.dg/stack-usage-1.c gcc-8.2.0/gcc/testsuite/gcc.dg/stack-usage-1.c +--- gcc-8.2.0.orig/gcc/testsuite/gcc.dg/stack-usage-1.c 2017-06-17 17:32:28.000000000 +0200 ++++ gcc-8.2.0/gcc/testsuite/gcc.dg/stack-usage-1.c 2019-01-25 15:38:32.837242683 +0100 +@@ -2,6 +2,7 @@ + /* { dg-options "-fstack-usage" } */ + /* nvptx doesn't have a reg allocator, and hence no stack usage data. */ + /* { dg-skip-if "" { nvptx-*-* } } */ ++/* { dg-options "-fstack-usage -fno-omit-frame-pointer" { target { nds32*-*-* } } } */ + + /* This is aimed at testing basic support for -fstack-usage in the back-ends. + See the SPARC back-end for example (grep flag_stack_usage_info in sparc.c). +diff -urN gcc-8.2.0.orig/gcc/testsuite/gcc.target/nds32/builtin-isb.c gcc-8.2.0/gcc/testsuite/gcc.target/nds32/builtin-isb.c +--- gcc-8.2.0.orig/gcc/testsuite/gcc.target/nds32/builtin-isb.c 2013-12-03 11:58:05.000000000 +0100 ++++ gcc-8.2.0/gcc/testsuite/gcc.target/nds32/builtin-isb.c 1970-01-01 01:00:00.000000000 +0100 +@@ -1,11 +0,0 @@ +-/* Verify that we generate isb instruction with builtin function. */ +- +-/* { dg-do compile } */ +-/* { dg-options "-O0" } */ +-/* { dg-final { scan-assembler "\\tisb" } } */ +- +-void +-test (void) +-{ +- __builtin_nds32_isb (); +-} +diff -urN gcc-8.2.0.orig/gcc/testsuite/gcc.target/nds32/builtin-isync.c gcc-8.2.0/gcc/testsuite/gcc.target/nds32/builtin-isync.c +--- gcc-8.2.0.orig/gcc/testsuite/gcc.target/nds32/builtin-isync.c 2013-12-03 11:58:05.000000000 +0100 ++++ gcc-8.2.0/gcc/testsuite/gcc.target/nds32/builtin-isync.c 1970-01-01 01:00:00.000000000 +0100 +@@ -1,12 +0,0 @@ +-/* Verify that we generate isync instruction with builtin function. */ +- +-/* { dg-do compile } */ +-/* { dg-options "-O0" } */ +-/* { dg-final { scan-assembler "\\tisync" } } */ +- +-void +-test (void) +-{ +- int *addr = (int *) 0x53000000; +- __builtin_nds32_isync (addr); +-} +diff -urN gcc-8.2.0.orig/gcc/testsuite/gcc.target/nds32/builtin-mfsr-mtsr.c gcc-8.2.0/gcc/testsuite/gcc.target/nds32/builtin-mfsr-mtsr.c +--- gcc-8.2.0.orig/gcc/testsuite/gcc.target/nds32/builtin-mfsr-mtsr.c 2013-12-03 11:58:05.000000000 +0100 ++++ gcc-8.2.0/gcc/testsuite/gcc.target/nds32/builtin-mfsr-mtsr.c 1970-01-01 01:00:00.000000000 +0100 +@@ -1,17 +0,0 @@ +-/* Verify that we generate mfsr/mtsr instruction with builtin function. */ +- +-/* { dg-do compile } */ +-/* { dg-options "-O0" } */ +-/* { dg-final { scan-assembler "\\tmfsr" } } */ +-/* { dg-final { scan-assembler "\\tmtsr" } } */ +- +-#include <nds32_intrinsic.h> +- +-void +-test (void) +-{ +- int ipsw_value; +- +- ipsw_value = __builtin_nds32_mfsr (__NDS32_REG_IPSW__); +- __builtin_nds32_mtsr (ipsw_value, __NDS32_REG_IPSW__); +-} +diff -urN gcc-8.2.0.orig/gcc/testsuite/gcc.target/nds32/builtin-mfusr-mtusr.c gcc-8.2.0/gcc/testsuite/gcc.target/nds32/builtin-mfusr-mtusr.c +--- gcc-8.2.0.orig/gcc/testsuite/gcc.target/nds32/builtin-mfusr-mtusr.c 2013-12-03 11:58:05.000000000 +0100 ++++ gcc-8.2.0/gcc/testsuite/gcc.target/nds32/builtin-mfusr-mtusr.c 1970-01-01 01:00:00.000000000 +0100 +@@ -1,17 +0,0 @@ +-/* Verify that we generate mfusr/mtusr instruction with builtin function. */ +- +-/* { dg-do compile } */ +-/* { dg-options "-O0" } */ +-/* { dg-final { scan-assembler "\\tmfusr" } } */ +-/* { dg-final { scan-assembler "\\tmtusr" } } */ +- +-#include <nds32_intrinsic.h> +- +-void +-test (void) +-{ +- int itype_value; +- +- itype_value = __builtin_nds32_mfusr (__NDS32_REG_ITYPE__); +- __builtin_nds32_mtusr (itype_value, __NDS32_REG_ITYPE__); +-} +diff -urN gcc-8.2.0.orig/gcc/testsuite/gcc.target/nds32/builtin-setgie-dis.c gcc-8.2.0/gcc/testsuite/gcc.target/nds32/builtin-setgie-dis.c +--- gcc-8.2.0.orig/gcc/testsuite/gcc.target/nds32/builtin-setgie-dis.c 2013-12-03 11:58:05.000000000 +0100 ++++ gcc-8.2.0/gcc/testsuite/gcc.target/nds32/builtin-setgie-dis.c 1970-01-01 01:00:00.000000000 +0100 +@@ -1,11 +0,0 @@ +-/* Verify that we generate setgie.d instruction with builtin function. */ +- +-/* { dg-do compile } */ +-/* { dg-options "-O0" } */ +-/* { dg-final { scan-assembler "\\tsetgie.d" } } */ +- +-void +-test (void) +-{ +- __builtin_nds32_setgie_dis (); +-} +diff -urN gcc-8.2.0.orig/gcc/testsuite/gcc.target/nds32/builtin-setgie-en.c gcc-8.2.0/gcc/testsuite/gcc.target/nds32/builtin-setgie-en.c +--- gcc-8.2.0.orig/gcc/testsuite/gcc.target/nds32/builtin-setgie-en.c 2013-12-03 11:58:05.000000000 +0100 ++++ gcc-8.2.0/gcc/testsuite/gcc.target/nds32/builtin-setgie-en.c 1970-01-01 01:00:00.000000000 +0100 +@@ -1,11 +0,0 @@ +-/* Verify that we generate setgie.e instruction with builtin function. */ +- +-/* { dg-do compile } */ +-/* { dg-options "-O0" } */ +-/* { dg-final { scan-assembler "\\tsetgie.e" } } */ +- +-void +-test (void) +-{ +- __builtin_nds32_setgie_en (); +-} +diff -urN gcc-8.2.0.orig/gcc/testsuite/gcc.target/nds32/builtin-setgie_mtsr_mfsr.c gcc-8.2.0/gcc/testsuite/gcc.target/nds32/builtin-setgie_mtsr_mfsr.c +--- gcc-8.2.0.orig/gcc/testsuite/gcc.target/nds32/builtin-setgie_mtsr_mfsr.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc-8.2.0/gcc/testsuite/gcc.target/nds32/builtin-setgie_mtsr_mfsr.c 2019-01-25 15:38:32.837242683 +0100 +@@ -0,0 +1,36 @@ ++/* This is a test program for checking gie with ++ mtsr/mfsr instruction. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-O0" } */ ++ ++#include <nds32_intrinsic.h> ++#include <stdlib.h> ++ ++int ++main () ++{ ++ unsigned int psw; ++ unsigned int gie; ++ unsigned int pfm_ctl; ++ ++ __nds32__setgie_en (); ++ __nds32__dsb(); /* This is needed for waiting pipeline. */ ++ psw = __nds32__mfsr (NDS32_SR_PSW); ++ ++ gie = psw & 0x00000001; ++ ++ if (gie != 1) ++ abort (); ++ ++ psw = psw & 0xFFFFFFFE; ++ __nds32__mtsr (psw,NDS32_SR_PSW); ++ __nds32__dsb(); /* This is needed for waiting pipeline. */ ++ psw = __nds32__mfsr (NDS32_SR_PSW); ++ gie = psw & 0x00000001; ++ ++ if (gie != 0) ++ abort (); ++ else ++ exit (0); ++} +diff -urN gcc-8.2.0.orig/gcc/testsuite/gcc.target/nds32/compile/builtin-clr-pending.c gcc-8.2.0/gcc/testsuite/gcc.target/nds32/compile/builtin-clr-pending.c +--- gcc-8.2.0.orig/gcc/testsuite/gcc.target/nds32/compile/builtin-clr-pending.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc-8.2.0/gcc/testsuite/gcc.target/nds32/compile/builtin-clr-pending.c 2019-01-25 15:38:32.837242683 +0100 +@@ -0,0 +1,10 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O1" } */ ++ ++#include <nds32_intrinsic.h> ++ ++void ++main (void) ++{ ++ __nds32__clr_pending_swint (); ++} +diff -urN gcc-8.2.0.orig/gcc/testsuite/gcc.target/nds32/compile/builtin-clr-pending-hw.c gcc-8.2.0/gcc/testsuite/gcc.target/nds32/compile/builtin-clr-pending-hw.c +--- gcc-8.2.0.orig/gcc/testsuite/gcc.target/nds32/compile/builtin-clr-pending-hw.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc-8.2.0/gcc/testsuite/gcc.target/nds32/compile/builtin-clr-pending-hw.c 2019-01-25 15:38:32.837242683 +0100 +@@ -0,0 +1,16 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O1" } */ ++ ++#include <nds32_intrinsic.h> ++ ++void ++main (void) ++{ ++ __nds32__clr_pending_hwint (NDS32_INT_H0); ++ __nds32__clr_pending_hwint (NDS32_INT_H1); ++ __nds32__clr_pending_hwint (NDS32_INT_H2); ++ ++ __nds32__clr_pending_hwint (NDS32_INT_H15); ++ __nds32__clr_pending_hwint (NDS32_INT_H16); ++ __nds32__clr_pending_hwint (NDS32_INT_H31); ++} +diff -urN gcc-8.2.0.orig/gcc/testsuite/gcc.target/nds32/compile/builtin-disable.c gcc-8.2.0/gcc/testsuite/gcc.target/nds32/compile/builtin-disable.c +--- gcc-8.2.0.orig/gcc/testsuite/gcc.target/nds32/compile/builtin-disable.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc-8.2.0/gcc/testsuite/gcc.target/nds32/compile/builtin-disable.c 2019-01-25 15:38:32.837242683 +0100 +@@ -0,0 +1,13 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O1" } */ ++ ++#include <nds32_intrinsic.h> ++ ++void ++main (void) ++{ ++ __nds32__disable_int (NDS32_INT_H15); ++ __nds32__disable_int (NDS32_INT_H16); ++ __nds32__disable_int (NDS32_INT_H31); ++ __nds32__disable_int (NDS32_INT_SWI); ++} +diff -urN gcc-8.2.0.orig/gcc/testsuite/gcc.target/nds32/compile/builtin-enable.c gcc-8.2.0/gcc/testsuite/gcc.target/nds32/compile/builtin-enable.c +--- gcc-8.2.0.orig/gcc/testsuite/gcc.target/nds32/compile/builtin-enable.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc-8.2.0/gcc/testsuite/gcc.target/nds32/compile/builtin-enable.c 2019-01-25 15:38:32.837242683 +0100 +@@ -0,0 +1,13 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O1" } */ ++ ++#include <nds32_intrinsic.h> ++ ++void ++main (void) ++{ ++ __nds32__enable_int (NDS32_INT_H15); ++ __nds32__enable_int (NDS32_INT_H16); ++ __nds32__enable_int (NDS32_INT_H31); ++ __nds32__enable_int (NDS32_INT_SWI); ++} +diff -urN gcc-8.2.0.orig/gcc/testsuite/gcc.target/nds32/compile/builtin-get-pending-int.c gcc-8.2.0/gcc/testsuite/gcc.target/nds32/compile/builtin-get-pending-int.c +--- gcc-8.2.0.orig/gcc/testsuite/gcc.target/nds32/compile/builtin-get-pending-int.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc-8.2.0/gcc/testsuite/gcc.target/nds32/compile/builtin-get-pending-int.c 2019-01-25 15:38:32.837242683 +0100 +@@ -0,0 +1,14 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O1" } */ ++ ++#include <nds32_intrinsic.h> ++ ++int ++main (void) ++{ ++ int a = __nds32__get_pending_int (NDS32_INT_H15); ++ int b = __nds32__get_pending_int (NDS32_INT_SWI); ++ int c = __nds32__get_pending_int (NDS32_INT_H16); ++ ++ return a + b + c; ++} +diff -urN gcc-8.2.0.orig/gcc/testsuite/gcc.target/nds32/compile/builtin-get-trig.c gcc-8.2.0/gcc/testsuite/gcc.target/nds32/compile/builtin-get-trig.c +--- gcc-8.2.0.orig/gcc/testsuite/gcc.target/nds32/compile/builtin-get-trig.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc-8.2.0/gcc/testsuite/gcc.target/nds32/compile/builtin-get-trig.c 2019-01-25 15:38:32.837242683 +0100 +@@ -0,0 +1,14 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O1" } */ ++ ++#include <nds32_intrinsic.h> ++ ++int ++main (void) ++{ ++ int a = __nds32__get_trig_type (NDS32_INT_H0); ++ int b = __nds32__get_trig_type (NDS32_INT_H15); ++ int c = __nds32__get_trig_type (NDS32_INT_H16); ++ int d = __nds32__get_trig_type (NDS32_INT_H31); ++ return a + b + c + d; ++} +diff -urN gcc-8.2.0.orig/gcc/testsuite/gcc.target/nds32/compile/builtin-isb.c gcc-8.2.0/gcc/testsuite/gcc.target/nds32/compile/builtin-isb.c +--- gcc-8.2.0.orig/gcc/testsuite/gcc.target/nds32/compile/builtin-isb.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc-8.2.0/gcc/testsuite/gcc.target/nds32/compile/builtin-isb.c 2019-01-25 15:38:32.837242683 +0100 +@@ -0,0 +1,11 @@ ++/* Verify that we generate isb instruction with builtin function. */ ++ ++/* { dg-do compile } */ ++/* { dg-options "-O0" } */ ++/* { dg-final { scan-assembler "\\tisb" } } */ ++ ++void ++test (void) ++{ ++ __builtin_nds32_isb (); ++} +diff -urN gcc-8.2.0.orig/gcc/testsuite/gcc.target/nds32/compile/builtin-isync.c gcc-8.2.0/gcc/testsuite/gcc.target/nds32/compile/builtin-isync.c +--- gcc-8.2.0.orig/gcc/testsuite/gcc.target/nds32/compile/builtin-isync.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc-8.2.0/gcc/testsuite/gcc.target/nds32/compile/builtin-isync.c 2019-01-25 15:38:32.841242694 +0100 +@@ -0,0 +1,12 @@ ++/* Verify that we generate isync instruction with builtin function. */ ++ ++/* { dg-do compile } */ ++/* { dg-options "-O0" } */ ++/* { dg-final { scan-assembler "\\tisync" } } */ ++ ++void ++test (void) ++{ ++ int *addr = (int *) 0x53000000; ++ __builtin_nds32_isync (addr); ++} +diff -urN gcc-8.2.0.orig/gcc/testsuite/gcc.target/nds32/compile/builtin-mfsr-mtsr.c gcc-8.2.0/gcc/testsuite/gcc.target/nds32/compile/builtin-mfsr-mtsr.c +--- gcc-8.2.0.orig/gcc/testsuite/gcc.target/nds32/compile/builtin-mfsr-mtsr.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc-8.2.0/gcc/testsuite/gcc.target/nds32/compile/builtin-mfsr-mtsr.c 2019-01-25 15:38:32.841242694 +0100 +@@ -0,0 +1,17 @@ ++/* Verify that we generate mfsr/mtsr instruction with builtin function. */ ++ ++/* { dg-do compile } */ ++/* { dg-options "-O0" } */ ++/* { dg-final { scan-assembler "\\tmfsr" } } */ ++/* { dg-final { scan-assembler "\\tmtsr" } } */ ++ ++#include <nds32_intrinsic.h> ++ ++void ++test (void) ++{ ++ int ipsw_value; ++ ++ ipsw_value = __builtin_nds32_mfsr (__NDS32_REG_IPSW__); ++ __builtin_nds32_mtsr (ipsw_value, __NDS32_REG_IPSW__); ++} +diff -urN gcc-8.2.0.orig/gcc/testsuite/gcc.target/nds32/compile/builtin-mfusr-mtusr.c gcc-8.2.0/gcc/testsuite/gcc.target/nds32/compile/builtin-mfusr-mtusr.c +--- gcc-8.2.0.orig/gcc/testsuite/gcc.target/nds32/compile/builtin-mfusr-mtusr.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc-8.2.0/gcc/testsuite/gcc.target/nds32/compile/builtin-mfusr-mtusr.c 2019-01-25 15:38:32.841242694 +0100 +@@ -0,0 +1,17 @@ ++/* Verify that we generate mfusr/mtusr instruction with builtin function. */ ++ ++/* { dg-do compile } */ ++/* { dg-options "-O0" } */ ++/* { dg-final { scan-assembler "\\tmfusr" } } */ ++/* { dg-final { scan-assembler "\\tmtusr" } } */ ++ ++#include <nds32_intrinsic.h> ++ ++void ++test (void) ++{ ++ int itype_value; ++ ++ itype_value = __builtin_nds32_mfusr (__NDS32_REG_ITYPE__); ++ __builtin_nds32_mtusr (itype_value, __NDS32_REG_ITYPE__); ++} +diff -urN gcc-8.2.0.orig/gcc/testsuite/gcc.target/nds32/compile/builtin-setgie-dis.c gcc-8.2.0/gcc/testsuite/gcc.target/nds32/compile/builtin-setgie-dis.c +--- gcc-8.2.0.orig/gcc/testsuite/gcc.target/nds32/compile/builtin-setgie-dis.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc-8.2.0/gcc/testsuite/gcc.target/nds32/compile/builtin-setgie-dis.c 2019-01-25 15:38:32.841242694 +0100 +@@ -0,0 +1,13 @@ ++/* Verify that we generate setgie.d instruction with builtin function. */ ++ ++/* { dg-do compile } */ ++/* { dg-options "-O0" } */ ++/* { dg-final { scan-assembler "\\tsetgie.d" } } */ ++ ++#include <nds32_intrinsic.h> ++ ++void ++test (void) ++{ ++ __nds32__setgie_dis (); ++} +diff -urN gcc-8.2.0.orig/gcc/testsuite/gcc.target/nds32/compile/builtin-setgie-en.c gcc-8.2.0/gcc/testsuite/gcc.target/nds32/compile/builtin-setgie-en.c +--- gcc-8.2.0.orig/gcc/testsuite/gcc.target/nds32/compile/builtin-setgie-en.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc-8.2.0/gcc/testsuite/gcc.target/nds32/compile/builtin-setgie-en.c 2019-01-25 15:38:32.841242694 +0100 +@@ -0,0 +1,13 @@ ++/* Verify that we generate setgie.e instruction with builtin function. */ ++ ++/* { dg-do compile */ ++/* { dg-options "-O0" } */ ++/* { dg-final { scan-assembler "\\tsetgie.e" } } */ ++ ++#include <nds32_intrinsic.h> ++ ++void ++test (void) ++{ ++ __nds32__setgie_en (); ++} +diff -urN gcc-8.2.0.orig/gcc/testsuite/gcc.target/nds32/compile/builtin-set-pending.c gcc-8.2.0/gcc/testsuite/gcc.target/nds32/compile/builtin-set-pending.c +--- gcc-8.2.0.orig/gcc/testsuite/gcc.target/nds32/compile/builtin-set-pending.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc-8.2.0/gcc/testsuite/gcc.target/nds32/compile/builtin-set-pending.c 2019-01-25 15:38:32.841242694 +0100 +@@ -0,0 +1,10 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O1" } */ ++ ++#include <nds32_intrinsic.h> ++ ++int ++main (void) ++{ ++ __nds32__set_pending_swint (); ++} +diff -urN gcc-8.2.0.orig/gcc/testsuite/gcc.target/nds32/compile/builtin-set-trig-edge.c gcc-8.2.0/gcc/testsuite/gcc.target/nds32/compile/builtin-set-trig-edge.c +--- gcc-8.2.0.orig/gcc/testsuite/gcc.target/nds32/compile/builtin-set-trig-edge.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc-8.2.0/gcc/testsuite/gcc.target/nds32/compile/builtin-set-trig-edge.c 2019-01-25 15:38:32.841242694 +0100 +@@ -0,0 +1,13 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O1" } */ ++ ++#include <nds32_intrinsic.h> ++ ++void ++main (void) ++{ ++ __nds32__set_trig_type_edge (NDS32_INT_H0); ++ __nds32__set_trig_type_edge (NDS32_INT_H15); ++ __nds32__set_trig_type_edge (NDS32_INT_H16); ++ __nds32__set_trig_type_edge (NDS32_INT_H31); ++} +diff -urN gcc-8.2.0.orig/gcc/testsuite/gcc.target/nds32/compile/builtin-set-trig-level.c gcc-8.2.0/gcc/testsuite/gcc.target/nds32/compile/builtin-set-trig-level.c +--- gcc-8.2.0.orig/gcc/testsuite/gcc.target/nds32/compile/builtin-set-trig-level.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc-8.2.0/gcc/testsuite/gcc.target/nds32/compile/builtin-set-trig-level.c 2019-01-25 15:38:32.841242694 +0100 +@@ -0,0 +1,13 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O1" } */ ++ ++#include <nds32_intrinsic.h> ++ ++void ++main (void) ++{ ++ __nds32__set_trig_type_level (NDS32_INT_H0); ++ __nds32__set_trig_type_level (NDS32_INT_H15); ++ __nds32__set_trig_type_level (NDS32_INT_H16); ++ __nds32__set_trig_type_level (NDS32_INT_H31); ++} +diff -urN gcc-8.2.0.orig/gcc/testsuite/gcc.target/nds32/nds32.exp gcc-8.2.0/gcc/testsuite/gcc.target/nds32/nds32.exp +--- gcc-8.2.0.orig/gcc/testsuite/gcc.target/nds32/nds32.exp 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/gcc/testsuite/gcc.target/nds32/nds32.exp 2019-01-25 15:38:32.841242694 +0100 +@@ -38,8 +38,10 @@ + dg-init + + # Main loop. +-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[cS\]]] \ ++dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/compile/*.\[cS\]]] \ + "" $DEFAULT_CFLAGS ++gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[cS\]]] \ ++ "" "" + + # All done. + dg-finish +diff -urN gcc-8.2.0.orig/gcc/testsuite/lib/target-supports.exp gcc-8.2.0/gcc/testsuite/lib/target-supports.exp +--- gcc-8.2.0.orig/gcc/testsuite/lib/target-supports.exp 2018-06-29 00:23:51.000000000 +0200 ++++ gcc-8.2.0/gcc/testsuite/lib/target-supports.exp 2019-01-25 15:38:32.841242694 +0100 +@@ -8783,6 +8783,7 @@ + || [istarget avr*-*-*] + || [istarget crisv32-*-*] || [istarget cris-*-*] + || [istarget mmix-*-*] ++ || [istarget nds32*-*-*] + || [istarget s390*-*-*] + || [istarget powerpc*-*-*] + || [istarget nios2*-*-*] +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/initfini.c gcc-8.2.0/libgcc/config/nds32/initfini.c +--- gcc-8.2.0.orig/libgcc/config/nds32/initfini.c 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/initfini.c 2019-01-25 15:38:32.841242694 +0100 +@@ -25,6 +25,10 @@ + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + ++#include <stddef.h> ++/* Need header file for `struct object' type. */ ++#include "../libgcc/unwind-dw2-fde.h" ++ + /* Declare a pointer to void function type. */ + typedef void (*func_ptr) (void); + +@@ -42,11 +46,59 @@ + refer to only the __CTOR_END__ symbol in crtfini.o and the __DTOR_LIST__ + symbol in crtinit.o, where they are defined. */ + +-static func_ptr __CTOR_LIST__[1] __attribute__ ((section (".ctors"))) +- = { (func_ptr) (-1) }; ++static func_ptr __CTOR_LIST__[1] __attribute__ ((section (".ctors"), used)) ++ = { (func_ptr) 0 }; ++ ++static func_ptr __DTOR_LIST__[1] __attribute__ ((section (".dtors"), used)) ++ = { (func_ptr) 0 }; ++ ++ ++#ifdef SUPPORT_UNWINDING_DWARF2 ++/* Preparation of exception handling with dwar2 mechanism registration. */ ++ ++asm ("\n\ ++ .section .eh_frame,\"aw\",@progbits\n\ ++ .global __EH_FRAME_BEGIN__\n\ ++ .type __EH_FRAME_BEGIN__, @object\n\ ++ .align 2\n\ ++__EH_FRAME_BEGIN__:\n\ ++ ! Beginning location of eh_frame section\n\ ++ .previous\n\ ++"); ++ ++extern func_ptr __EH_FRAME_BEGIN__[]; ++ + +-static func_ptr __DTOR_LIST__[1] __attribute__ ((section (".dtors"))) +- = { (func_ptr) (-1) }; ++/* Note that the following two functions are going to be chained into ++ constructor and destructor list, repectively. So these two declarations ++ must be placed after __CTOR_LIST__ and __DTOR_LIST. */ ++extern void __nds32_register_eh(void) __attribute__((constructor, used)); ++extern void __nds32_deregister_eh(void) __attribute__((destructor, used)); ++ ++/* Register the exception handling table as the first constructor. */ ++void ++__nds32_register_eh (void) ++{ ++ static struct object object; ++ if (__register_frame_info) ++ __register_frame_info (__EH_FRAME_BEGIN__, &object); ++} ++ ++/* Unregister the exception handling table as a deconstructor. */ ++void ++__nds32_deregister_eh (void) ++{ ++ static int completed = 0; ++ ++ if (completed) ++ return; ++ ++ if (__deregister_frame_info) ++ __deregister_frame_info (__EH_FRAME_BEGIN__); ++ ++ completed = 1; ++} ++#endif + + /* Run all the global destructors on exit from the program. */ + +@@ -63,7 +115,7 @@ + same particular root executable or shared library file. */ + + static void __do_global_dtors (void) +-asm ("__do_global_dtors") __attribute__ ((section (".text"))); ++asm ("__do_global_dtors") __attribute__ ((section (".text"), used)); + + static void + __do_global_dtors (void) +@@ -116,23 +168,37 @@ + last, these words naturally end up at the very ends of the two lists + contained in these two sections. */ + +-static func_ptr __CTOR_END__[1] __attribute__ ((section (".ctors"))) ++static func_ptr __CTOR_END__[1] __attribute__ ((section (".ctors"), used)) + = { (func_ptr) 0 }; + +-static func_ptr __DTOR_END__[1] __attribute__ ((section (".dtors"))) ++static func_ptr __DTOR_END__[1] __attribute__ ((section (".dtors"), used)) + = { (func_ptr) 0 }; + ++#ifdef SUPPORT_UNWINDING_DWARF2 ++/* ZERO terminator in .eh_frame section. */ ++asm ("\n\ ++ .section .eh_frame,\"aw\",@progbits\n\ ++ .global __EH_FRAME_END__\n\ ++ .type __EH_FRAME_END__, @object\n\ ++ .align 2\n\ ++__EH_FRAME_END__:\n\ ++ ! End location of eh_frame section with ZERO terminator\n\ ++ .word 0\n\ ++ .previous\n\ ++"); ++#endif ++ + /* Run all global constructors for the program. + Note that they are run in reverse order. */ + + static void __do_global_ctors (void) +-asm ("__do_global_ctors") __attribute__ ((section (".text"))); ++asm ("__do_global_ctors") __attribute__ ((section (".text"), used)); + + static void + __do_global_ctors (void) + { + func_ptr *p; +- for (p = __CTOR_END__ - 1; *p != (func_ptr) -1; p--) ++ for (p = __CTOR_END__ - 1; *p; p--) + (*p) (); + } + +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/isr-library/adj_intr_lvl.inc gcc-8.2.0/libgcc/config/nds32/isr-library/adj_intr_lvl.inc +--- gcc-8.2.0.orig/libgcc/config/nds32/isr-library/adj_intr_lvl.inc 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/isr-library/adj_intr_lvl.inc 2019-01-25 15:38:32.841242694 +0100 +@@ -26,13 +26,26 @@ + .macro ADJ_INTR_LVL + #if defined(NDS32_NESTED) /* Nested handler. */ + mfsr $r3, $PSW ++ /* By substracting 1 from $PSW, we can lower PSW.INTL ++ and enable GIE simultaneously. */ + addi $r3, $r3, #-0x1 ++ #if __NDS32_EXT_ZOL__ || __NDS32_EXT_DSP__ ++ ori $r3, $r3, 0x2000 /* Set PSW.AEN(b'13) */ ++ #endif + mtsr $r3, $PSW + #elif defined(NDS32_NESTED_READY) /* Nested ready handler. */ + /* Save ipc and ipsw and lower INT level. */ + mfsr $r3, $PSW + addi $r3, $r3, #-0x2 ++ #if __NDS32_EXT_ZOL__ || __NDS32_EXT_DSP__ ++ ori $r3, $r3, 0x2000 /* Set PSW.AEN(b'13) */ ++ #endif + mtsr $r3, $PSW + #else /* Not nested handler. */ ++ #if __NDS32_EXT_ZOL__ || __NDS32_EXT_DSP__ ++ mfsr $r3, $PSW ++ ori $r3, $r3, 0x2000 /* Set PSW.AEN(b'13) */ ++ mtsr $r3, $PSW ++ #endif + #endif + .endm +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/isr-library/excp_isr.S gcc-8.2.0/libgcc/config/nds32/isr-library/excp_isr.S +--- gcc-8.2.0.orig/libgcc/config/nds32/isr-library/excp_isr.S 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/isr-library/excp_isr.S 2019-01-25 15:38:32.841242694 +0100 +@@ -23,6 +23,7 @@ + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + ++#include "save_usr_regs.inc" + #include "save_mac_regs.inc" + #include "save_fpu_regs.inc" + #include "save_fpu_regs_00.inc" +@@ -32,35 +33,33 @@ + #include "save_all.inc" + #include "save_partial.inc" + #include "adj_intr_lvl.inc" +-#include "restore_mac_regs.inc" + #include "restore_fpu_regs_00.inc" + #include "restore_fpu_regs_01.inc" + #include "restore_fpu_regs_02.inc" + #include "restore_fpu_regs_03.inc" + #include "restore_fpu_regs.inc" ++#include "restore_mac_regs.inc" ++#include "restore_usr_regs.inc" + #include "restore_all.inc" + #include "restore_partial.inc" ++ + .section .nds32_isr, "ax" /* Put it in the section of 1st level handler. */ + .align 1 +-/* +- First Level Handlers +- 1. First Level Handlers are invokded in vector section via jump instruction +- with specific names for different configurations. +- 2. Naming Format: _nds32_e_SR_NT for exception handlers. +- _nds32_i_SR_NT for interrupt handlers. +- 2.1 All upper case letters are replaced with specific lower case letters encodings. +- 2.2 SR: Saved Registers +- sa: Save All regs (context) +- ps: Partial Save (all caller-saved regs) +- 2.3 NT: Nested Type +- ns: nested +- nn: not nested +- nr: nested ready +-*/ +- +-/* +- This is original 16-byte vector size version. +-*/ ++ ++/* First Level Handlers ++ 1. First Level Handlers are invokded in vector section via jump instruction ++ with specific names for different configurations. ++ 2. Naming Format: _nds32_e_SR_NT for exception handlers. ++ _nds32_i_SR_NT for interrupt handlers. ++ 2.1 All upper case letters are replaced with specific lower case letters encodings. ++ 2.2 SR -- Saved Registers ++ sa: Save All regs (context) ++ ps: Partial Save (all caller-saved regs) ++ 2.3 NT -- Nested Type ++ ns: nested ++ nn: not nested ++ nr: nested ready */ ++ + #ifdef NDS32_SAVE_ALL_REGS + #if defined(NDS32_NESTED) + .globl _nds32_e_sa_ns +@@ -91,21 +90,26 @@ + #endif /* endif for Nest Type */ + #endif /* not NDS32_SAVE_ALL_REGS */ + +-/* +- This is 16-byte vector size version. +- The vector id was restored into $r0 in vector by compiler. +-*/ ++ ++/* For 4-byte vector size version, the vector id is ++ extracted from $ITYPE and is set into $r0 by library. ++ For 16-byte vector size version, the vector id ++ is set into $r0 in vector section by compiler. */ ++ ++/* Save used registers. */ + #ifdef NDS32_SAVE_ALL_REGS + SAVE_ALL + #else + SAVE_PARTIAL + #endif ++ + /* Prepare to call 2nd level handler. */ + la $r2, _nds32_jmptbl_00 + lw $r2, [$r2 + $r0 << #2] + ADJ_INTR_LVL /* Adjust INTR level. $r3 is clobbered. */ + jral $r2 +- /* Restore used registers. */ ++ ++/* Restore used registers. */ + #ifdef NDS32_SAVE_ALL_REGS + RESTORE_ALL + #else +@@ -113,6 +117,7 @@ + #endif + iret + ++ + #ifdef NDS32_SAVE_ALL_REGS + #if defined(NDS32_NESTED) + .size _nds32_e_sa_ns, .-_nds32_e_sa_ns +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/isr-library/intr_isr.S gcc-8.2.0/libgcc/config/nds32/isr-library/intr_isr.S +--- gcc-8.2.0.orig/libgcc/config/nds32/isr-library/intr_isr.S 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/isr-library/intr_isr.S 2019-01-25 15:38:32.841242694 +0100 +@@ -23,6 +23,7 @@ + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + ++#include "save_usr_regs.inc" + #include "save_mac_regs.inc" + #include "save_fpu_regs.inc" + #include "save_fpu_regs_00.inc" +@@ -32,35 +33,33 @@ + #include "save_all.inc" + #include "save_partial.inc" + #include "adj_intr_lvl.inc" +-#include "restore_mac_regs.inc" + #include "restore_fpu_regs_00.inc" + #include "restore_fpu_regs_01.inc" + #include "restore_fpu_regs_02.inc" + #include "restore_fpu_regs_03.inc" + #include "restore_fpu_regs.inc" ++#include "restore_mac_regs.inc" ++#include "restore_usr_regs.inc" + #include "restore_all.inc" + #include "restore_partial.inc" ++ + .section .nds32_isr, "ax" /* Put it in the section of 1st level handler. */ + .align 1 +-/* +- First Level Handlers +- 1. First Level Handlers are invokded in vector section via jump instruction +- with specific names for different configurations. +- 2. Naming Format: _nds32_e_SR_NT for exception handlers. +- _nds32_i_SR_NT for interrupt handlers. +- 2.1 All upper case letters are replaced with specific lower case letters encodings. +- 2.2 SR: Saved Registers +- sa: Save All regs (context) +- ps: Partial Save (all caller-saved regs) +- 2.3 NT: Nested Type +- ns: nested +- nn: not nested +- nr: nested ready +-*/ +- +-/* +- This is original 16-byte vector size version. +-*/ ++ ++/* First Level Handlers ++ 1. First Level Handlers are invokded in vector section via jump instruction ++ with specific names for different configurations. ++ 2. Naming Format: _nds32_e_SR_NT for exception handlers. ++ _nds32_i_SR_NT for interrupt handlers. ++ 2.1 All upper case letters are replaced with specific lower case letters encodings. ++ 2.2 SR -- Saved Registers ++ sa: Save All regs (context) ++ ps: Partial Save (all caller-saved regs) ++ 2.3 NT -- Nested Type ++ ns: nested ++ nn: not nested ++ nr: nested ready */ ++ + #ifdef NDS32_SAVE_ALL_REGS + #if defined(NDS32_NESTED) + .globl _nds32_i_sa_ns +@@ -91,21 +90,36 @@ + #endif /* endif for Nest Type */ + #endif /* not NDS32_SAVE_ALL_REGS */ + +-/* +- This is 16-byte vector size version. +- The vector id was restored into $r0 in vector by compiler. +-*/ ++ ++/* For 4-byte vector size version, the vector id is ++ extracted from $ITYPE and is set into $r0 by library. ++ For 16-byte vector size version, the vector id ++ is set into $r0 in vector section by compiler. */ ++ ++/* Save used registers first. */ + #ifdef NDS32_SAVE_ALL_REGS + SAVE_ALL + #else + SAVE_PARTIAL + #endif +- /* Prepare to call 2nd level handler. */ ++ ++/* According to vector size, we need to have different implementation. */ ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* Prepare to call 2nd level handler. */ ++ la $r2, _nds32_jmptbl_00 ++ lw $r2, [$r2 + $r0 << #2] ++ addi $r0, $r0, #-9 /* Make interrput vector id zero-based. */ ++ ADJ_INTR_LVL /* Adjust INTR level. $r3 is clobbered. */ ++ jral $r2 ++#else /* not __NDS32_ISR_VECTOR_SIZE_4__ */ ++ /* Prepare to call 2nd level handler. */ + la $r2, _nds32_jmptbl_09 /* For zero-based vcetor id. */ + lw $r2, [$r2 + $r0 << #2] + ADJ_INTR_LVL /* Adjust INTR level. $r3 is clobbered. */ + jral $r2 +- /* Restore used registers. */ ++#endif /* not __NDS32_ISR_VECTOR_SIZE_4__ */ ++ ++/* Restore used registers. */ + #ifdef NDS32_SAVE_ALL_REGS + RESTORE_ALL + #else +@@ -113,6 +127,7 @@ + #endif + iret + ++ + #ifdef NDS32_SAVE_ALL_REGS + #if defined(NDS32_NESTED) + .size _nds32_i_sa_ns, .-_nds32_i_sa_ns +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/isr-library/reset.S gcc-8.2.0/libgcc/config/nds32/isr-library/reset.S +--- gcc-8.2.0.orig/libgcc/config/nds32/isr-library/reset.S 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/isr-library/reset.S 2019-01-25 15:38:32.841242694 +0100 +@@ -26,22 +26,18 @@ + .section .nds32_isr, "ax" /* Put it in the section of 1st level handler. */ + .align 1 + .weak _SDA_BASE_ /* For reset handler only. */ +- .weak _FP_BASE_ /* For reset handler only. */ + .weak _nds32_init_mem /* User defined memory initialization function. */ + .globl _start + .globl _nds32_reset + .type _nds32_reset, @function + _nds32_reset: + _start: +-#ifdef NDS32_EXT_EX9 +- .no_ex9_begin +-#endif + /* Handle NMI and warm boot if any of them exists. */ + beqz $sp, 1f /* Reset, NMI or warm boot? */ + /* Either NMI or warm boot; save all regs. */ + + /* Preserve registers for context-switching. */ +-#ifdef __NDS32_REDUCED_REGS__ ++#if __NDS32_REDUCED_REGS__ || __NDS32_REDUCE_REGS + /* For 16-reg mode. */ + smw.adm $r0, [$sp], $r10, #0x0 + smw.adm $r15, [$sp], $r15, #0xf +@@ -49,10 +45,9 @@ + /* For 32-reg mode. */ + smw.adm $r0, [$sp], $r27, #0xf + #endif +-#ifdef NDS32_EXT_IFC ++#if __NDS32_EXT_IFC__ + mfusr $r1, $IFC_LP +- smw.adm $r1, [$sp], $r2, #0x0 /* Save extra $r2 to keep +- stack 8-byte alignment. */ ++ smw.adm $r1, [$sp], $r2, #0x0 /* Save extra $r2 to keep stack 8-byte alignment. */ + #endif + + la $gp, _SDA_BASE_ /* Init GP for small data access. */ +@@ -71,12 +66,11 @@ + bnez $r0, 1f /* If fail to resume, do cold boot. */ + + /* Restore registers for context-switching. */ +-#ifdef NDS32_EXT_IFC +- lmw.bim $r1, [$sp], $r2, #0x0 /* Restore extra $r2 to keep +- stack 8-byte alignment. */ ++#if __NDS32_EXT_IFC__ ++ lmw.bim $r1, [$sp], $r2, #0x0 /* Restore extra $r2 to keep stack 8-byte alignment. */ + mtusr $r1, $IFC_LP + #endif +-#ifdef __NDS32_REDUCED_REGS__ ++#if __NDS32_REDUCED_REGS__ || __NDS32_REDUCE_REGS + /* For 16-reg mode. */ + lmw.bim $r15, [$sp], $r15, #0xf + lmw.bim $r0, [$sp], $r10, #0x0 +@@ -88,6 +82,17 @@ + + + 1: /* Cold boot. */ ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* With vector ID feature for v3 architecture, default vector size is 4-byte. */ ++ /* Set IVB.ESZ = 0 (vector table entry size = 4 bytes) */ ++ mfsr $r0, $IVB ++ li $r1, #0xc000 ++ or $r0, $r0, $r1 ++ xor $r0, $r0, $r1 ++ mtsr $r0, $IVB ++ dsb ++#else ++ /* There is no vector ID feature, so the vector size must be 16-byte. */ + /* Set IVB.ESZ = 1 (vector table entry size = 16 bytes) */ + mfsr $r0, $IVB + li $r1, #0xffff3fff +@@ -95,36 +100,54 @@ + ori $r0, $r0, #0x4000 + mtsr $r0, $IVB + dsb ++#endif + + la $gp, _SDA_BASE_ /* Init $gp. */ +- la $fp, _FP_BASE_ /* Init $fp. */ + la $sp, _stack /* Init $sp. */ +-#ifdef NDS32_EXT_EX9 +-/* +- * Initialize the table base of EX9 instruction +- * ex9 generation needs to disable before the ITB is set +- */ +- mfsr $r0, $MSC_CFG /* Check if HW support of EX9. */ ++ ++#if __NDS32_EXT_EX9__ ++.L_init_itb: ++ /* Initialization for Instruction Table Base (ITB). ++ The symbol _ITB_BASE_ is determined by Linker. ++ Set $ITB only if MSC_CFG.EIT (cr4.b'24) is set. */ ++ mfsr $r0, $MSC_CFG + srli $r0, $r0, 24 + andi $r0, $r0, 0x1 +- beqz $r0, 4f /* Zero means HW does not support EX9. */ +- la $r0, _ITB_BASE_ /* Init $ITB. */ ++ beqz $r0, 4f /* Fall through ? */ ++ la $r0, _ITB_BASE_ + mtusr $r0, $ITB +- .no_ex9_end + 4: + #endif +- la $r15, _nds32_init_mem /* Call DRAM init. _nds32_init_mem +- may written by C language. */ ++ ++#if __NDS32_EXT_FPU_SP__ || __NDS32_EXT_FPU_DP__ ++.L_init_fpu: ++ /* Initialize FPU ++ Set FUCOP_CTL.CP0EN (fucpr.b'0). */ ++ mfsr $r0, $FUCOP_CTL ++ ori $r0, $r0, 0x1 ++ mtsr $r0, $FUCOP_CTL ++ dsb ++ /* According to [bugzilla #9425], set flush-to-zero mode. ++ That is, set $FPCSR.DNZ(b'12) = 1. */ ++ FMFCSR $r0 ++ ori $r0, $r0, 0x1000 ++ FMTCSR $r0 ++ dsb ++#endif ++ ++ /* Call DRAM init. _nds32_init_mem may written by C language. */ ++ la $r15, _nds32_init_mem + beqz $r15, 6f + jral $r15 + 6: + l.w $r15, _nds32_jmptbl_00 /* Load reset handler. */ + jral $r15 +-/* Reset handler() should never return in a RTOS or non-OS system. +- In case it does return, an exception will be generated. +- This exception will be caught either by default break handler or by EDM. +- Default break handle may just do an infinite loop. +- EDM will notify GDB and GDB will regain control when the ID is 0x7fff. */ ++ ++ /* Reset handler() should never return in a RTOS or non-OS system. ++ In case it does return, an exception will be generated. ++ This exception will be caught either by default break handler or by EDM. ++ Default break handle may just do an infinite loop. ++ EDM will notify GDB and GDB will regain control when the ID is 0x7fff. */ + 5: + break #0x7fff + .size _nds32_reset, .-_nds32_reset +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/isr-library/restore_all.inc gcc-8.2.0/libgcc/config/nds32/isr-library/restore_all.inc +--- gcc-8.2.0.orig/libgcc/config/nds32/isr-library/restore_all.inc 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/isr-library/restore_all.inc 2019-01-25 15:38:32.841242694 +0100 +@@ -31,15 +31,11 @@ + mtsr $r2, $IPSW + RESTORE_FPU_REGS + RESTORE_MAC_REGS +-#ifdef NDS32_EXT_IFC +- lmw.bim $r1, [$sp], $r2, #0x0 /* Restore extra $r2 to keep +- stack 8-byte alignment. */ +- mtusr $r1, $IFC_LP +-#endif +-#ifdef __NDS32_REDUCED_REGS__ ++ RESTORE_USR_REGS ++#if __NDS32_REDUCED_REGS__ || __NDS32_REDUCE_REGS + lmw.bim $r0, [$sp], $r10, #0x0 /* Restore all regs. */ + lmw.bim $r15, [$sp], $r15, #0xf +-#else /* not __NDS32_REDUCED_REGS__ */ ++#else + lmw.bim $r0, [$sp], $r27, #0xf /* Restore all regs. */ + #endif + .endm +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/isr-library/restore_mac_regs.inc gcc-8.2.0/libgcc/config/nds32/isr-library/restore_mac_regs.inc +--- gcc-8.2.0.orig/libgcc/config/nds32/isr-library/restore_mac_regs.inc 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/isr-library/restore_mac_regs.inc 2019-01-25 15:38:32.841242694 +0100 +@@ -24,7 +24,7 @@ + <http://www.gnu.org/licenses/>. */ + + .macro RESTORE_MAC_REGS +-#ifdef NDS32_DX_REGS ++#if __NDS32_DX_REGS__ + lmw.bim $r1, [$sp], $r4, #0x0 + mtusr $r1, $d0.lo + mtusr $r2, $d0.hi +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/isr-library/restore_partial.inc gcc-8.2.0/libgcc/config/nds32/isr-library/restore_partial.inc +--- gcc-8.2.0.orig/libgcc/config/nds32/isr-library/restore_partial.inc 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/isr-library/restore_partial.inc 2019-01-25 15:38:32.841242694 +0100 +@@ -31,15 +31,11 @@ + mtsr $r1, $IPC /* Set IPC. */ + mtsr $r2, $IPSW /* Set IPSW. */ + #endif +- RESTORE_FPU_REGS +- RESTORE_MAC_REGS +-#ifdef NDS32_EXT_IFC +- lmw.bim $r1, [$sp], $r2, #0x0 /* Restore extra $r2 to keep +- stack 8-byte alignment. */ +- mtusr $r1, $IFC_LP +-#endif ++ RESTORE_FPU_REGS ++ RESTORE_MAC_REGS ++ RESTORE_USR_REGS + lmw.bim $r0, [$sp], $r5, #0x0 /* Restore all regs. */ +-#ifdef __NDS32_REDUCED_REGS__ ++#if __NDS32_REDUCED_REGS__ || __NDS32_REDUCE_REGS + lmw.bim $r15, [$sp], $r15, #0x2 + #else + lmw.bim $r15, [$sp], $r27, #0x2 /* Restore all regs. */ +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/isr-library/restore_usr_regs.inc gcc-8.2.0/libgcc/config/nds32/isr-library/restore_usr_regs.inc +--- gcc-8.2.0.orig/libgcc/config/nds32/isr-library/restore_usr_regs.inc 1970-01-01 01:00:00.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/isr-library/restore_usr_regs.inc 2019-01-25 15:38:32.841242694 +0100 +@@ -0,0 +1,42 @@ ++/* c-isr library stuff of Andes NDS32 cpu for GNU compiler ++ Copyright (C) 2012-2018 Free Software Foundation, Inc. ++ Contributed by Andes Technology Corporation. ++ ++ This file is part of GCC. ++ ++ GCC is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published ++ by the Free Software Foundation; either version 3, or (at your ++ option) any later version. ++ ++ GCC is distributed in the hope that it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ++ License for more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ <http://www.gnu.org/licenses/>. */ ++ ++.macro RESTORE_USR_REGS ++#if __NDS32_EXT_IFC__ && (__NDS32_EXT_ZOL__ || __NDS32_EXT_DSP__) ++ lmw.bim $r1, [$sp], $r4, #0x0 ++ mtusr $r1, $IFC_LP ++ mtusr $r2, $LB ++ mtusr $r3, $LE ++ mtusr $r4, $LC ++#elif __NDS32_EXT_IFC__ ++ lmw.bim $r1, [$sp], $r2, #0x0 ++ mtusr $r1, $IFC_LP ++#elif __NDS32_EXT_ZOL__ || __NDS32_EXT_DSP__ ++ lmw.bim $r1, [$sp], $r4, #0x0 ++ mtusr $r1, $LB ++ mtusr $r2, $LE ++ mtusr $r3, $LC ++#endif ++.endm +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/isr-library/save_all.inc gcc-8.2.0/libgcc/config/nds32/isr-library/save_all.inc +--- gcc-8.2.0.orig/libgcc/config/nds32/isr-library/save_all.inc 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/isr-library/save_all.inc 2019-01-25 15:38:32.841242694 +0100 +@@ -23,45 +23,42 @@ + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +-.macro SAVE_ALL_4B +-#ifdef __NDS32_REDUCED_REGS__ ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ ++/* If vector size is 4-byte, we have to save registers ++ in the macro implementation. */ ++.macro SAVE_ALL ++#if __NDS32_REDUCED_REGS__ || __NDS32_REDUCE_REGS + smw.adm $r15, [$sp], $r15, #0xf + smw.adm $r0, [$sp], $r10, #0x0 +-#else /* not __NDS32_REDUCED_REGS__ */ ++#else + smw.adm $r0, [$sp], $r27, #0xf +-#endif /* not __NDS32_REDUCED_REGS__ */ +-#ifdef NDS32_EXT_IFC +- mfusr $r1, $IFC_LP +- smw.adm $r1, [$sp], $r2, #0x0 /* Save extra $r2 to keep +- stack 8-byte alignment. */ + #endif +- SAVE_MAC_REGS +- SAVE_FPU_REGS ++ SAVE_USR_REGS ++ SAVE_MAC_REGS ++ SAVE_FPU_REGS + mfsr $r1, $IPC /* Get IPC. */ + mfsr $r2, $IPSW /* Get IPSW. */ + smw.adm $r1, [$sp], $r2, #0x0 /* Push IPC, IPSW. */ + move $r1, $sp /* $r1 is ptr to NDS32_CONTEXT. */ + mfsr $r0, $ITYPE /* Get VID to $r0. */ + srli $r0, $r0, #5 +-#ifdef __NDS32_ISA_V2__ + andi $r0, $r0, #127 +-#else +- fexti33 $r0, #6 +-#endif + .endm + ++#else /* not __NDS32_ISR_VECTOR_SIZE_4__ */ ++ ++/* If vector size is 16-byte, some works can be done in ++ the vector section generated by compiler, so that we ++ can implement less in the macro. */ + .macro SAVE_ALL +-/* SAVE_REG_TBL code has been moved to +- vector table generated by compiler. */ +-#ifdef NDS32_EXT_IFC +- mfusr $r1, $IFC_LP +- smw.adm $r1, [$sp], $r2, #0x0 /* Save extra $r2 to keep +- stack 8-byte alignment. */ +-#endif +- SAVE_MAC_REGS +- SAVE_FPU_REGS ++ SAVE_USR_REGS ++ SAVE_MAC_REGS ++ SAVE_FPU_REGS + mfsr $r1, $IPC /* Get IPC. */ + mfsr $r2, $IPSW /* Get IPSW. */ + smw.adm $r1, [$sp], $r2, #0x0 /* Push IPC, IPSW. */ + move $r1, $sp /* $r1 is ptr to NDS32_CONTEXT. */ + .endm ++ ++#endif /* not __NDS32_ISR_VECTOR_SIZE_4__ */ +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/isr-library/save_mac_regs.inc gcc-8.2.0/libgcc/config/nds32/isr-library/save_mac_regs.inc +--- gcc-8.2.0.orig/libgcc/config/nds32/isr-library/save_mac_regs.inc 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/isr-library/save_mac_regs.inc 2019-01-25 15:38:32.841242694 +0100 +@@ -24,7 +24,7 @@ + <http://www.gnu.org/licenses/>. */ + + .macro SAVE_MAC_REGS +-#ifdef NDS32_DX_REGS ++#if __NDS32_DX_REGS__ + mfusr $r1, $d0.lo + mfusr $r2, $d0.hi + mfusr $r3, $d1.lo +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/isr-library/save_partial.inc gcc-8.2.0/libgcc/config/nds32/isr-library/save_partial.inc +--- gcc-8.2.0.orig/libgcc/config/nds32/isr-library/save_partial.inc 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/isr-library/save_partial.inc 2019-01-25 15:38:32.841242694 +0100 +@@ -23,20 +23,20 @@ + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +-.macro SAVE_PARTIAL_4B +-#ifdef __NDS32_REDUCED_REGS__ ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ ++/* If vector size is 4-byte, we have to save registers ++ in the macro implementation. */ ++.macro SAVE_PARTIAL ++#if __NDS32_REDUCED_REGS__ || __NDS32_REDUCE_REGS + smw.adm $r15, [$sp], $r15, #0x2 +-#else /* not __NDS32_REDUCED_REGS__ */ ++#else + smw.adm $r15, [$sp], $r27, #0x2 +-#endif /* not __NDS32_REDUCED_REGS__ */ +- smw.adm $r0, [$sp], $r5, #0x0 +-#ifdef NDS32_EXT_IFC +- mfusr $r1, $IFC_LP +- smw.adm $r1, [$sp], $r2, #0x0 /* Save extra $r2 to keep +- stack 8-byte alignment. */ + #endif +- SAVE_MAC_REGS +- SAVE_FPU_REGS ++ smw.adm $r0, [$sp], $r5, #0x0 ++ SAVE_USR_REGS ++ SAVE_MAC_REGS ++ SAVE_FPU_REGS + #if defined(NDS32_NESTED) || defined(NDS32_NESTED_READY) + mfsr $r1, $IPC /* Get IPC. */ + mfsr $r2, $IPSW /* Get IPSW. */ +@@ -44,26 +44,24 @@ + #endif + mfsr $r0, $ITYPE /* Get VID to $r0. */ + srli $r0, $r0, #5 +-#ifdef __NDS32_ISA_V2__ + andi $r0, $r0, #127 +-#else +- fexti33 $r0, #6 +-#endif + .endm + ++#else /* not __NDS32_ISR_VECTOR_SIZE_4__ */ ++ ++/* If vector size is 16-byte, some works can be done in ++ the vector section generated by compiler, so that we ++ can implement less in the macro. */ ++ + .macro SAVE_PARTIAL +-/* SAVE_CALLER_REGS code has been moved to +- vector table generated by compiler. */ +-#ifdef NDS32_EXT_IFC +- mfusr $r1, $IFC_LP +- smw.adm $r1, [$sp], $r2, #0x0 /* Save extra $r2 to keep +- stack 8-byte alignment. */ +-#endif +- SAVE_MAC_REGS +- SAVE_FPU_REGS ++ SAVE_USR_REGS ++ SAVE_MAC_REGS ++ SAVE_FPU_REGS + #if defined(NDS32_NESTED) || defined(NDS32_NESTED_READY) + mfsr $r1, $IPC /* Get IPC. */ + mfsr $r2, $IPSW /* Get IPSW. */ + smw.adm $r1, [$sp], $r2, #0x0 /* Push IPC, IPSW. */ + #endif + .endm ++ ++#endif /* not __NDS32_ISR_VECTOR_SIZE_4__ */ +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/isr-library/save_usr_regs.inc gcc-8.2.0/libgcc/config/nds32/isr-library/save_usr_regs.inc +--- gcc-8.2.0.orig/libgcc/config/nds32/isr-library/save_usr_regs.inc 1970-01-01 01:00:00.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/isr-library/save_usr_regs.inc 2019-01-25 15:38:32.841242694 +0100 +@@ -0,0 +1,44 @@ ++/* c-isr library stuff of Andes NDS32 cpu for GNU compiler ++ Copyright (C) 2012-2018 Free Software Foundation, Inc. ++ Contributed by Andes Technology Corporation. ++ ++ This file is part of GCC. ++ ++ GCC is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published ++ by the Free Software Foundation; either version 3, or (at your ++ option) any later version. ++ ++ GCC is distributed in the hope that it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ++ License for more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ <http://www.gnu.org/licenses/>. */ ++ ++.macro SAVE_USR_REGS ++/* Store User Special Registers according to supported ISA extension ++ !!! WATCH OUT !!! Take care of 8-byte alignment issue. */ ++#if __NDS32_EXT_IFC__ && (__NDS32_EXT_ZOL__ || __NDS32_EXT_DSP__) ++ mfusr $r1, $IFC_LP ++ mfusr $r2, $LB ++ mfusr $r3, $LE ++ mfusr $r4, $LC ++ smw.adm $r1, [$sp], $r4, #0x0 /* Save even. Ok! */ ++#elif __NDS32_EXT_IFC__ ++ mfusr $r1, $IFC_LP ++ smw.adm $r1, [$sp], $r2, #0x0 /* Save extra $r2 to keep stack 8-byte aligned. */ ++#elif (__NDS32_EXT_ZOL__ || __NDS32_EXT_DSP__) ++ mfusr $r1, $LB ++ mfusr $r2, $LE ++ mfusr $r3, $LC ++ smw.adm $r1, [$sp], $r4, #0x0 /* Save extra $r4 to keep stack 8-byte aligned. */ ++#endif ++.endm +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid00.S gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid00.S +--- gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid00.S 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid00.S 2019-01-25 15:38:32.841242694 +0100 +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.00, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_00 + .type _nds32_vector_00, @function + _nds32_vector_00: +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid01.S gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid01.S +--- gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid01.S 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid01.S 2019-01-25 15:38:32.841242694 +0100 +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.01, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_01 + .type _nds32_vector_01, @function + _nds32_vector_01: +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid02.S gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid02.S +--- gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid02.S 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid02.S 2019-01-25 15:38:32.841242694 +0100 +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.02, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_02 + .type _nds32_vector_02, @function + _nds32_vector_02: +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid03.S gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid03.S +--- gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid03.S 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid03.S 2019-01-25 15:38:32.841242694 +0100 +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.03, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_03 + .type _nds32_vector_03, @function + _nds32_vector_03: +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid04.S gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid04.S +--- gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid04.S 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid04.S 2019-01-25 15:38:32.841242694 +0100 +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.04, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_04 + .type _nds32_vector_04, @function + _nds32_vector_04: +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid05.S gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid05.S +--- gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid05.S 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid05.S 2019-01-25 15:38:32.841242694 +0100 +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.05, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_05 + .type _nds32_vector_05, @function + _nds32_vector_05: +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid06.S gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid06.S +--- gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid06.S 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid06.S 2019-01-25 15:38:32.841242694 +0100 +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.06, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_06 + .type _nds32_vector_06, @function + _nds32_vector_06: +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid07.S gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid07.S +--- gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid07.S 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid07.S 2019-01-25 15:38:32.841242694 +0100 +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.07, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_07 + .type _nds32_vector_07, @function + _nds32_vector_07: +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid08.S gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid08.S +--- gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid08.S 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid08.S 2019-01-25 15:38:32.841242694 +0100 +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.08, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_08 + .type _nds32_vector_08, @function + _nds32_vector_08: +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid09.S gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid09.S +--- gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid09.S 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid09.S 2019-01-25 15:38:32.841242694 +0100 +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.09, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_09 + .type _nds32_vector_09, @function + _nds32_vector_09: +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid10.S gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid10.S +--- gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid10.S 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid10.S 2019-01-25 15:38:32.841242694 +0100 +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.10, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_10 + .type _nds32_vector_10, @function + _nds32_vector_10: +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid11.S gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid11.S +--- gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid11.S 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid11.S 2019-01-25 15:38:32.841242694 +0100 +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.11, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_11 + .type _nds32_vector_11, @function + _nds32_vector_11: +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid12.S gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid12.S +--- gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid12.S 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid12.S 2019-01-25 15:38:32.841242694 +0100 +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.12, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_12 + .type _nds32_vector_12, @function + _nds32_vector_12: +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid13.S gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid13.S +--- gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid13.S 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid13.S 2019-01-25 15:38:32.841242694 +0100 +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.13, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_13 + .type _nds32_vector_13, @function + _nds32_vector_13: +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid14.S gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid14.S +--- gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid14.S 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid14.S 2019-01-25 15:38:32.841242694 +0100 +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.14, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_14 + .type _nds32_vector_14, @function + _nds32_vector_14: +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid15.S gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid15.S +--- gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid15.S 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid15.S 2019-01-25 15:38:32.841242694 +0100 +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.15, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_15 + .type _nds32_vector_15, @function + _nds32_vector_15: +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid16.S gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid16.S +--- gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid16.S 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid16.S 2019-01-25 15:38:32.841242694 +0100 +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.16, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_16 + .type _nds32_vector_16, @function + _nds32_vector_16: +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid17.S gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid17.S +--- gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid17.S 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid17.S 2019-01-25 15:38:32.841242694 +0100 +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.17, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_17 + .type _nds32_vector_17, @function + _nds32_vector_17: +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid18.S gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid18.S +--- gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid18.S 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid18.S 2019-01-25 15:38:32.841242694 +0100 +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.18, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_18 + .type _nds32_vector_18, @function + _nds32_vector_18: +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid19.S gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid19.S +--- gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid19.S 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid19.S 2019-01-25 15:38:32.841242694 +0100 +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.19, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_19 + .type _nds32_vector_19, @function + _nds32_vector_19: +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid20.S gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid20.S +--- gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid20.S 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid20.S 2019-01-25 15:38:32.841242694 +0100 +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.20, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_20 + .type _nds32_vector_20, @function + _nds32_vector_20: +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid21.S gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid21.S +--- gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid21.S 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid21.S 2019-01-25 15:38:32.841242694 +0100 +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.21, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_21 + .type _nds32_vector_21, @function + _nds32_vector_21: +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid22.S gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid22.S +--- gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid22.S 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid22.S 2019-01-25 15:38:32.841242694 +0100 +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.22, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_22 + .type _nds32_vector_22, @function + _nds32_vector_22: +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid23.S gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid23.S +--- gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid23.S 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid23.S 2019-01-25 15:38:32.841242694 +0100 +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.23, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_23 + .type _nds32_vector_23, @function + _nds32_vector_23: +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid24.S gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid24.S +--- gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid24.S 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid24.S 2019-01-25 15:38:32.841242694 +0100 +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.24, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_24 + .type _nds32_vector_24, @function + _nds32_vector_24: +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid25.S gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid25.S +--- gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid25.S 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid25.S 2019-01-25 15:38:32.841242694 +0100 +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.25, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_25 + .type _nds32_vector_25, @function + _nds32_vector_25: +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid26.S gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid26.S +--- gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid26.S 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid26.S 2019-01-25 15:38:32.841242694 +0100 +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.26, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_26 + .type _nds32_vector_26, @function + _nds32_vector_26: +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid27.S gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid27.S +--- gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid27.S 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid27.S 2019-01-25 15:38:32.841242694 +0100 +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.27, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_27 + .type _nds32_vector_27, @function + _nds32_vector_27: +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid28.S gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid28.S +--- gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid28.S 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid28.S 2019-01-25 15:38:32.841242694 +0100 +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.28, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_28 + .type _nds32_vector_28, @function + _nds32_vector_28: +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid29.S gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid29.S +--- gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid29.S 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid29.S 2019-01-25 15:38:32.841242694 +0100 +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.29, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_29 + .type _nds32_vector_29, @function + _nds32_vector_29: +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid30.S gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid30.S +--- gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid30.S 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid30.S 2019-01-25 15:38:32.841242694 +0100 +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.30, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_30 + .type _nds32_vector_30, @function + _nds32_vector_30: +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid31.S gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid31.S +--- gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid31.S 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid31.S 2019-01-25 15:38:32.841242694 +0100 +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.31, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_31 + .type _nds32_vector_31, @function + _nds32_vector_31: +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid32.S gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid32.S +--- gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid32.S 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid32.S 2019-01-25 15:38:32.841242694 +0100 +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.32, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_32 + .type _nds32_vector_32, @function + _nds32_vector_32: +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid33.S gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid33.S +--- gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid33.S 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid33.S 2019-01-25 15:38:32.841242694 +0100 +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.33, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_33 + .type _nds32_vector_33, @function + _nds32_vector_33: +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid34.S gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid34.S +--- gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid34.S 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid34.S 2019-01-25 15:38:32.841242694 +0100 +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.34, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_34 + .type _nds32_vector_34, @function + _nds32_vector_34: +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid35.S gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid35.S +--- gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid35.S 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid35.S 2019-01-25 15:38:32.841242694 +0100 +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.35, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_35 + .type _nds32_vector_35, @function + _nds32_vector_35: +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid36.S gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid36.S +--- gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid36.S 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid36.S 2019-01-25 15:38:32.841242694 +0100 +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.36, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_36 + .type _nds32_vector_36, @function + _nds32_vector_36: +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid37.S gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid37.S +--- gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid37.S 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid37.S 2019-01-25 15:38:32.841242694 +0100 +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.37, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_37 + .type _nds32_vector_37, @function + _nds32_vector_37: +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid38.S gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid38.S +--- gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid38.S 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid38.S 2019-01-25 15:38:32.841242694 +0100 +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.38, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_38 + .type _nds32_vector_38, @function + _nds32_vector_38: +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid39.S gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid39.S +--- gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid39.S 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid39.S 2019-01-25 15:38:32.841242694 +0100 +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.39, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_39 + .type _nds32_vector_39, @function + _nds32_vector_39: +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid40.S gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid40.S +--- gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid40.S 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid40.S 2019-01-25 15:38:32.841242694 +0100 +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.40, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_40 + .type _nds32_vector_40, @function + _nds32_vector_40: +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid41.S gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid41.S +--- gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid41.S 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid41.S 2019-01-25 15:38:32.841242694 +0100 +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.41, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_41 + .type _nds32_vector_41, @function + _nds32_vector_41: +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid42.S gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid42.S +--- gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid42.S 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid42.S 2019-01-25 15:38:32.841242694 +0100 +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.42, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_42 + .type _nds32_vector_42, @function + _nds32_vector_42: +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid43.S gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid43.S +--- gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid43.S 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid43.S 2019-01-25 15:38:32.841242694 +0100 +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.43, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_43 + .type _nds32_vector_43, @function + _nds32_vector_43: +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid44.S gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid44.S +--- gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid44.S 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid44.S 2019-01-25 15:38:32.841242694 +0100 +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.44, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_44 + .type _nds32_vector_44, @function + _nds32_vector_44: +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid45.S gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid45.S +--- gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid45.S 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid45.S 2019-01-25 15:38:32.841242694 +0100 +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.45, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_45 + .type _nds32_vector_45, @function + _nds32_vector_45: +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid46.S gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid46.S +--- gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid46.S 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid46.S 2019-01-25 15:38:32.841242694 +0100 +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.46, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_46 + .type _nds32_vector_46, @function + _nds32_vector_46: +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid47.S gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid47.S +--- gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid47.S 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid47.S 2019-01-25 15:38:32.845242705 +0100 +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.47, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_47 + .type _nds32_vector_47, @function + _nds32_vector_47: +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid48.S gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid48.S +--- gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid48.S 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid48.S 2019-01-25 15:38:32.845242705 +0100 +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.48, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_48 + .type _nds32_vector_48, @function + _nds32_vector_48: +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid49.S gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid49.S +--- gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid49.S 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid49.S 2019-01-25 15:38:32.845242705 +0100 +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.49, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_49 + .type _nds32_vector_49, @function + _nds32_vector_49: +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid50.S gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid50.S +--- gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid50.S 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid50.S 2019-01-25 15:38:32.845242705 +0100 +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.50, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_50 + .type _nds32_vector_50, @function + _nds32_vector_50: +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid51.S gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid51.S +--- gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid51.S 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid51.S 2019-01-25 15:38:32.845242705 +0100 +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.51, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_51 + .type _nds32_vector_51, @function + _nds32_vector_51: +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid52.S gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid52.S +--- gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid52.S 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid52.S 2019-01-25 15:38:32.845242705 +0100 +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.52, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_52 + .type _nds32_vector_52, @function + _nds32_vector_52: +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid53.S gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid53.S +--- gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid53.S 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid53.S 2019-01-25 15:38:32.845242705 +0100 +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.53, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_53 + .type _nds32_vector_53, @function + _nds32_vector_53: +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid54.S gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid54.S +--- gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid54.S 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid54.S 2019-01-25 15:38:32.845242705 +0100 +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.54, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_54 + .type _nds32_vector_54, @function + _nds32_vector_54: +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid55.S gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid55.S +--- gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid55.S 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid55.S 2019-01-25 15:38:32.845242705 +0100 +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.55, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_55 + .type _nds32_vector_55, @function + _nds32_vector_55: +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid56.S gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid56.S +--- gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid56.S 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid56.S 2019-01-25 15:38:32.845242705 +0100 +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.56, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_56 + .type _nds32_vector_56, @function + _nds32_vector_56: +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid57.S gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid57.S +--- gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid57.S 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid57.S 2019-01-25 15:38:32.845242705 +0100 +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.57, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_57 + .type _nds32_vector_57, @function + _nds32_vector_57: +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid58.S gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid58.S +--- gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid58.S 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid58.S 2019-01-25 15:38:32.845242705 +0100 +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.58, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_58 + .type _nds32_vector_58, @function + _nds32_vector_58: +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid59.S gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid59.S +--- gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid59.S 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid59.S 2019-01-25 15:38:32.845242705 +0100 +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.59, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_59 + .type _nds32_vector_59, @function + _nds32_vector_59: +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid60.S gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid60.S +--- gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid60.S 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid60.S 2019-01-25 15:38:32.845242705 +0100 +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.60, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_60 + .type _nds32_vector_60, @function + _nds32_vector_60: +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid61.S gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid61.S +--- gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid61.S 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid61.S 2019-01-25 15:38:32.845242705 +0100 +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.61, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_61 + .type _nds32_vector_61, @function + _nds32_vector_61: +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid62.S gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid62.S +--- gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid62.S 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid62.S 2019-01-25 15:38:32.845242705 +0100 +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.62, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_62 + .type _nds32_vector_62, @function + _nds32_vector_62: +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid63.S gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid63.S +--- gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid63.S 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid63.S 2019-01-25 15:38:32.845242705 +0100 +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.63, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_63 + .type _nds32_vector_63, @function + _nds32_vector_63: +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid64.S gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid64.S +--- gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid64.S 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid64.S 2019-01-25 15:38:32.845242705 +0100 +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.64, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_64 + .type _nds32_vector_64, @function + _nds32_vector_64: +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid65.S gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid65.S +--- gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid65.S 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid65.S 2019-01-25 15:38:32.845242705 +0100 +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.65, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_65 + .type _nds32_vector_65, @function + _nds32_vector_65: +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid66.S gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid66.S +--- gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid66.S 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid66.S 2019-01-25 15:38:32.845242705 +0100 +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.66, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_66 + .type _nds32_vector_66, @function + _nds32_vector_66: +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid67.S gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid67.S +--- gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid67.S 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid67.S 2019-01-25 15:38:32.845242705 +0100 +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.67, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_67 + .type _nds32_vector_67, @function + _nds32_vector_67: +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid68.S gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid68.S +--- gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid68.S 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid68.S 2019-01-25 15:38:32.845242705 +0100 +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.68, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_68 + .type _nds32_vector_68, @function + _nds32_vector_68: +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid69.S gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid69.S +--- gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid69.S 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid69.S 2019-01-25 15:38:32.845242705 +0100 +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.69, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_69 + .type _nds32_vector_69, @function + _nds32_vector_69: +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid70.S gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid70.S +--- gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid70.S 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid70.S 2019-01-25 15:38:32.845242705 +0100 +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.70, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_70 + .type _nds32_vector_70, @function + _nds32_vector_70: +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid71.S gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid71.S +--- gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid71.S 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid71.S 2019-01-25 15:38:32.845242705 +0100 +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.71, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_71 + .type _nds32_vector_71, @function + _nds32_vector_71: +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid72.S gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid72.S +--- gcc-8.2.0.orig/libgcc/config/nds32/isr-library/vec_vid72.S 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/isr-library/vec_vid72.S 2019-01-25 15:38:32.845242705 +0100 +@@ -24,8 +24,15 @@ + <http://www.gnu.org/licenses/>. */ + + .section .nds32_vector.72, "ax" ++#if __NDS32_ISR_VECTOR_SIZE_4__ ++ /* The vector size is default 4-byte for v3 architecture. */ ++ .vec_size 4 ++ .align 2 ++#else ++ /* The vector size is default 16-byte for other architectures. */ + .vec_size 16 + .align 4 ++#endif + .weak _nds32_vector_72 + .type _nds32_vector_72, @function + _nds32_vector_72: +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/linux-atomic.c gcc-8.2.0/libgcc/config/nds32/linux-atomic.c +--- gcc-8.2.0.orig/libgcc/config/nds32/linux-atomic.c 1970-01-01 01:00:00.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/linux-atomic.c 2019-01-25 15:38:32.845242705 +0100 +@@ -0,0 +1,282 @@ ++/* Linux-specific atomic operations for NDS32 Linux. ++ Copyright (C) 2012-2018 Free Software Foundation, Inc. ++ ++This file is free software; you can redistribute it and/or modify it ++under the terms of the GNU General Public License as published by the ++Free Software Foundation; either version 3, or (at your option) any ++later version. ++ ++This file is distributed in the hope that it will be useful, but ++WITHOUT ANY WARRANTY; without even the implied warranty of ++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++General Public License for more details. ++ ++Under Section 7 of GPL version 3, you are granted additional ++permissions described in the GCC Runtime Library Exception, version ++3.1, as published by the Free Software Foundation. ++ ++You should have received a copy of the GNU General Public License and ++a copy of the GCC Runtime Library Exception along with this program; ++see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++<http://www.gnu.org/licenses/>. */ ++ ++/* We implement byte, short and int versions of each atomic operation ++ using the kernel helper defined below. There is no support for ++ 64-bit operations yet. */ ++ ++/* This function copy form NDS32 Linux-kernal. */ ++static inline int ++__kernel_cmpxchg (int oldval, int newval, int *mem) ++{ ++ int temp1, temp2, temp3, offset; ++ ++ asm volatile ("msync\tall\n" ++ "movi\t%0, #0\n" ++ "1:\n" ++ "\tllw\t%1, [%4+%0]\n" ++ "\tsub\t%3, %1, %6\n" ++ "\tcmovz\t%2, %5, %3\n" ++ "\tcmovn\t%2, %1, %3\n" ++ "\tscw\t%2, [%4+%0]\n" ++ "\tbeqz\t%2, 1b\n" ++ : "=&r" (offset), "=&r" (temp3), "=&r" (temp2), "=&r" (temp1) ++ : "r" (mem), "r" (newval), "r" (oldval) : "memory"); ++ ++ return temp1; ++} ++ ++#define HIDDEN __attribute__ ((visibility ("hidden"))) ++ ++#ifdef __NDS32_EL__ ++#define INVERT_MASK_1 0 ++#define INVERT_MASK_2 0 ++#else ++#define INVERT_MASK_1 24 ++#define INVERT_MASK_2 16 ++#endif ++ ++#define MASK_1 0xffu ++#define MASK_2 0xffffu ++ ++#define FETCH_AND_OP_WORD(OP, PFX_OP, INF_OP) \ ++ int HIDDEN \ ++ __sync_fetch_and_##OP##_4 (int *ptr, int val) \ ++ { \ ++ int failure, tmp; \ ++ \ ++ do { \ ++ tmp = __atomic_load_n (ptr, __ATOMIC_SEQ_CST); \ ++ failure = __kernel_cmpxchg (tmp, PFX_OP (tmp INF_OP val), ptr); \ ++ } while (failure != 0); \ ++ \ ++ return tmp; \ ++ } ++ ++FETCH_AND_OP_WORD (add, , +) ++FETCH_AND_OP_WORD (sub, , -) ++FETCH_AND_OP_WORD (or, , |) ++FETCH_AND_OP_WORD (and, , &) ++FETCH_AND_OP_WORD (xor, , ^) ++FETCH_AND_OP_WORD (nand, ~, &) ++ ++#define NAME_oldval(OP, WIDTH) __sync_fetch_and_##OP##_##WIDTH ++#define NAME_newval(OP, WIDTH) __sync_##OP##_and_fetch_##WIDTH ++ ++/* Implement both __sync_<op>_and_fetch and __sync_fetch_and_<op> for ++ subword-sized quantities. */ ++ ++#define SUBWORD_SYNC_OP(OP, PFX_OP, INF_OP, TYPE, WIDTH, RETURN) \ ++ TYPE HIDDEN \ ++ NAME##_##RETURN (OP, WIDTH) (TYPE *ptr, TYPE val) \ ++ { \ ++ int *wordptr = (int *) ((unsigned long) ptr & ~3); \ ++ unsigned int mask, shift, oldval, newval; \ ++ int failure; \ ++ \ ++ shift = (((unsigned long) ptr & 3) << 3) ^ INVERT_MASK_##WIDTH; \ ++ mask = MASK_##WIDTH << shift; \ ++ \ ++ do { \ ++ oldval = __atomic_load_n (wordptr, __ATOMIC_SEQ_CST); \ ++ newval = ((PFX_OP (((oldval & mask) >> shift) \ ++ INF_OP (unsigned int) val)) << shift) & mask; \ ++ newval |= oldval & ~mask; \ ++ failure = __kernel_cmpxchg (oldval, newval, wordptr); \ ++ } while (failure != 0); \ ++ \ ++ return (RETURN & mask) >> shift; \ ++ } ++ ++ ++SUBWORD_SYNC_OP (add, , +, unsigned short, 2, oldval) ++SUBWORD_SYNC_OP (sub, , -, unsigned short, 2, oldval) ++SUBWORD_SYNC_OP (or, , |, unsigned short, 2, oldval) ++SUBWORD_SYNC_OP (and, , &, unsigned short, 2, oldval) ++SUBWORD_SYNC_OP (xor, , ^, unsigned short, 2, oldval) ++SUBWORD_SYNC_OP (nand, ~, &, unsigned short, 2, oldval) ++ ++SUBWORD_SYNC_OP (add, , +, unsigned char, 1, oldval) ++SUBWORD_SYNC_OP (sub, , -, unsigned char, 1, oldval) ++SUBWORD_SYNC_OP (or, , |, unsigned char, 1, oldval) ++SUBWORD_SYNC_OP (and, , &, unsigned char, 1, oldval) ++SUBWORD_SYNC_OP (xor, , ^, unsigned char, 1, oldval) ++SUBWORD_SYNC_OP (nand, ~, &, unsigned char, 1, oldval) ++ ++#define OP_AND_FETCH_WORD(OP, PFX_OP, INF_OP) \ ++ int HIDDEN \ ++ __sync_##OP##_and_fetch_4 (int *ptr, int val) \ ++ { \ ++ int tmp, failure; \ ++ \ ++ do { \ ++ tmp = __atomic_load_n (ptr, __ATOMIC_SEQ_CST); \ ++ failure = __kernel_cmpxchg (tmp, PFX_OP (tmp INF_OP val), ptr); \ ++ } while (failure != 0); \ ++ \ ++ return PFX_OP (tmp INF_OP val); \ ++ } ++ ++OP_AND_FETCH_WORD (add, , +) ++OP_AND_FETCH_WORD (sub, , -) ++OP_AND_FETCH_WORD (or, , |) ++OP_AND_FETCH_WORD (and, , &) ++OP_AND_FETCH_WORD (xor, , ^) ++OP_AND_FETCH_WORD (nand, ~, &) ++ ++SUBWORD_SYNC_OP (add, , +, unsigned short, 2, newval) ++SUBWORD_SYNC_OP (sub, , -, unsigned short, 2, newval) ++SUBWORD_SYNC_OP (or, , |, unsigned short, 2, newval) ++SUBWORD_SYNC_OP (and, , &, unsigned short, 2, newval) ++SUBWORD_SYNC_OP (xor, , ^, unsigned short, 2, newval) ++SUBWORD_SYNC_OP (nand, ~, &, unsigned short, 2, newval) ++ ++SUBWORD_SYNC_OP (add, , +, unsigned char, 1, newval) ++SUBWORD_SYNC_OP (sub, , -, unsigned char, 1, newval) ++SUBWORD_SYNC_OP (or, , |, unsigned char, 1, newval) ++SUBWORD_SYNC_OP (and, , &, unsigned char, 1, newval) ++SUBWORD_SYNC_OP (xor, , ^, unsigned char, 1, newval) ++SUBWORD_SYNC_OP (nand, ~, &, unsigned char, 1, newval) ++ ++int HIDDEN ++__sync_val_compare_and_swap_4 (int *ptr, int oldval, int newval) ++{ ++ int actual_oldval, fail; ++ ++ while (1) ++ { ++ actual_oldval = __atomic_load_n (ptr, __ATOMIC_SEQ_CST); ++ ++ if (oldval != actual_oldval) ++ return actual_oldval; ++ ++ fail = __kernel_cmpxchg (actual_oldval, newval, ptr); ++ ++ if (!fail) ++ return oldval; ++ } ++} ++ ++#define SUBWORD_VAL_CAS(TYPE, WIDTH) \ ++ TYPE HIDDEN \ ++ __sync_val_compare_and_swap_##WIDTH (TYPE *ptr, TYPE oldval, \ ++ TYPE newval) \ ++ { \ ++ int *wordptr = (int *)((unsigned long) ptr & ~3), fail; \ ++ unsigned int mask, shift, actual_oldval, actual_newval; \ ++ \ ++ shift = (((unsigned long) ptr & 3) << 3) ^ INVERT_MASK_##WIDTH; \ ++ mask = MASK_##WIDTH << shift; \ ++ \ ++ while (1) \ ++ { \ ++ actual_oldval = __atomic_load_n (wordptr, __ATOMIC_SEQ_CST); \ ++ \ ++ if (((actual_oldval & mask) >> shift) != (unsigned int) oldval) \ ++ return (actual_oldval & mask) >> shift; \ ++ \ ++ actual_newval = (actual_oldval & ~mask) \ ++ | (((unsigned int) newval << shift) & mask); \ ++ \ ++ fail = __kernel_cmpxchg (actual_oldval, actual_newval, \ ++ wordptr); \ ++ \ ++ if (!fail) \ ++ return oldval; \ ++ } \ ++ } ++ ++SUBWORD_VAL_CAS (unsigned short, 2) ++SUBWORD_VAL_CAS (unsigned char, 1) ++ ++typedef unsigned char bool; ++ ++bool HIDDEN ++__sync_bool_compare_and_swap_4 (int *ptr, int oldval, int newval) ++{ ++ int failure = __kernel_cmpxchg (oldval, newval, ptr); ++ return (failure == 0); ++} ++ ++#define SUBWORD_BOOL_CAS(TYPE, WIDTH) \ ++ bool HIDDEN \ ++ __sync_bool_compare_and_swap_##WIDTH (TYPE *ptr, TYPE oldval, \ ++ TYPE newval) \ ++ { \ ++ TYPE actual_oldval \ ++ = __sync_val_compare_and_swap_##WIDTH (ptr, oldval, newval); \ ++ return (oldval == actual_oldval); \ ++ } ++ ++SUBWORD_BOOL_CAS (unsigned short, 2) ++SUBWORD_BOOL_CAS (unsigned char, 1) ++ ++int HIDDEN ++__sync_lock_test_and_set_4 (int *ptr, int val) ++{ ++ int failure, oldval; ++ ++ do { ++ oldval = __atomic_load_n (ptr, __ATOMIC_SEQ_CST); ++ failure = __kernel_cmpxchg (oldval, val, ptr); ++ } while (failure != 0); ++ ++ return oldval; ++} ++ ++#define SUBWORD_TEST_AND_SET(TYPE, WIDTH) \ ++ TYPE HIDDEN \ ++ __sync_lock_test_and_set_##WIDTH (TYPE *ptr, TYPE val) \ ++ { \ ++ int failure; \ ++ unsigned int oldval, newval, shift, mask; \ ++ int *wordptr = (int *) ((unsigned long) ptr & ~3); \ ++ \ ++ shift = (((unsigned long) ptr & 3) << 3) ^ INVERT_MASK_##WIDTH; \ ++ mask = MASK_##WIDTH << shift; \ ++ \ ++ do { \ ++ oldval = __atomic_load_n (wordptr, __ATOMIC_SEQ_CST); \ ++ newval = (oldval & ~mask) \ ++ | (((unsigned int) val << shift) & mask); \ ++ failure = __kernel_cmpxchg (oldval, newval, wordptr); \ ++ } while (failure != 0); \ ++ \ ++ return (oldval & mask) >> shift; \ ++ } ++ ++SUBWORD_TEST_AND_SET (unsigned short, 2) ++SUBWORD_TEST_AND_SET (unsigned char, 1) ++ ++#define SYNC_LOCK_RELEASE(TYPE, WIDTH) \ ++ void HIDDEN \ ++ __sync_lock_release_##WIDTH (TYPE *ptr) \ ++ { \ ++ /* All writes before this point must be seen before we release \ ++ the lock itself. */ \ ++ __builtin_nds32_msync_all (); \ ++ *ptr = 0; \ ++ } ++ ++SYNC_LOCK_RELEASE (int, 4) ++SYNC_LOCK_RELEASE (short, 2) ++SYNC_LOCK_RELEASE (char, 1) +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/linux-unwind.h gcc-8.2.0/libgcc/config/nds32/linux-unwind.h +--- gcc-8.2.0.orig/libgcc/config/nds32/linux-unwind.h 1970-01-01 01:00:00.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/linux-unwind.h 2019-01-25 15:38:32.845242705 +0100 +@@ -0,0 +1,143 @@ ++/* DWARF2 EH unwinding support for NDS32 Linux signal frame. ++ Copyright (C) 2014-2015 Free Software Foundation, Inc. ++ Contributed by Andes Technology Corporation. ++ ++ This file is part of GCC. ++ ++ GCC is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published ++ by the Free Software Foundation; either version 3, or (at your ++ option) any later version. ++ ++ GCC is distributed in the hope that it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ++ License for more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ <http://www.gnu.org/licenses/>. */ ++ ++#ifndef inhibit_libc ++ ++/* Do code reading to identify a signal frame, and set the frame ++ state data appropriately. See unwind-dw2.c for the structs. ++ The corresponding bits in the Linux kernel are in ++ arch/nds32/kernel/signal.c. */ ++ ++#include <signal.h> ++#include <asm/unistd.h> ++#include <sys/ucontext.h> ++ ++/* Exactly the same layout as the kernel structures, unique names. */ ++ ++/* arch/nds32/kernel/signal.c */ ++struct _rt_sigframe { ++ siginfo_t info; ++ struct ucontext_t uc; ++}; ++ ++#define RT_SIGRETURN 0x8b00f044 ++ ++#define MD_FALLBACK_FRAME_STATE_FOR nds32_fallback_frame_state ++ ++/* This function is supposed to be invoked by uw_frame_state_for() ++ when there is no unwind data available. ++ ++ Generally, given the _Unwind_Context CONTEXT for a stack frame, ++ we need to look up its caller and decode information into FS. ++ However, if the exception handling happens within a signal handler, ++ the return address of signal handler is a special module, which ++ contains signal return syscall and has no FDE in the .eh_frame section. ++ We need to implement MD_FALLBACK_FRAME_STATE_FOR so that we can ++ unwind through signal frames. */ ++static _Unwind_Reason_Code ++nds32_fallback_frame_state (struct _Unwind_Context *context, ++ _Unwind_FrameState *fs) ++{ ++ u_int32_t *pc = (u_int32_t *) context->ra; ++ struct sigcontext *sc_; ++ _Unwind_Ptr new_cfa; ++ ++#ifdef __NDS32_EB__ ++#error "Signal handler is not supported for force unwind." ++#endif ++ ++ if ((_Unwind_Ptr) pc & 3) ++ return _URC_END_OF_STACK; ++ ++ /* Check if we are going through a signal handler. ++ See arch/nds32/kernel/signal.c implementation. ++ FIXME: Currently we only handle little endian (EL) case. */ ++ if (pc[0] == RT_SIGRETURN) ++ { ++ /* Using '_sigfame' memory address to locate kernal's sigcontext. ++ The sigcontext structures in arch/nds32/include/asm/sigcontext.h. */ ++ struct _rt_sigframe *rt_; ++ rt_ = context->cfa; ++ sc_ = &rt_->uc.uc_mcontext; ++ } ++ else ++ return _URC_END_OF_STACK; ++ ++ /* Update cfa from sigcontext. */ ++ new_cfa = (_Unwind_Ptr) sc_; ++ fs->regs.cfa_how = CFA_REG_OFFSET; ++ fs->regs.cfa_reg = STACK_POINTER_REGNUM; ++ fs->regs.cfa_offset = new_cfa - (_Unwind_Ptr) context->cfa; ++ ++#define NDS32_PUT_FS_REG(NUM, NAME) \ ++ (fs->regs.reg[NUM].how = REG_SAVED_OFFSET, \ ++ fs->regs.reg[NUM].loc.offset = (_Unwind_Ptr) &(sc_->NAME) - new_cfa) ++ ++ /* Restore all registers value. */ ++ NDS32_PUT_FS_REG (0, nds32_r0); ++ NDS32_PUT_FS_REG (1, nds32_r1); ++ NDS32_PUT_FS_REG (2, nds32_r2); ++ NDS32_PUT_FS_REG (3, nds32_r3); ++ NDS32_PUT_FS_REG (4, nds32_r4); ++ NDS32_PUT_FS_REG (5, nds32_r5); ++ NDS32_PUT_FS_REG (6, nds32_r6); ++ NDS32_PUT_FS_REG (7, nds32_r7); ++ NDS32_PUT_FS_REG (8, nds32_r8); ++ NDS32_PUT_FS_REG (9, nds32_r9); ++ NDS32_PUT_FS_REG (10, nds32_r10); ++ NDS32_PUT_FS_REG (11, nds32_r11); ++ NDS32_PUT_FS_REG (12, nds32_r12); ++ NDS32_PUT_FS_REG (13, nds32_r13); ++ NDS32_PUT_FS_REG (14, nds32_r14); ++ NDS32_PUT_FS_REG (15, nds32_r15); ++ NDS32_PUT_FS_REG (16, nds32_r16); ++ NDS32_PUT_FS_REG (17, nds32_r17); ++ NDS32_PUT_FS_REG (18, nds32_r18); ++ NDS32_PUT_FS_REG (19, nds32_r19); ++ NDS32_PUT_FS_REG (20, nds32_r20); ++ NDS32_PUT_FS_REG (21, nds32_r21); ++ NDS32_PUT_FS_REG (22, nds32_r22); ++ NDS32_PUT_FS_REG (23, nds32_r23); ++ NDS32_PUT_FS_REG (24, nds32_r24); ++ NDS32_PUT_FS_REG (25, nds32_r25); ++ ++ NDS32_PUT_FS_REG (28, nds32_fp); ++ NDS32_PUT_FS_REG (29, nds32_gp); ++ NDS32_PUT_FS_REG (30, nds32_lp); ++ NDS32_PUT_FS_REG (31, nds32_sp); ++ ++ /* Restore PC, point to trigger signal instruction. */ ++ NDS32_PUT_FS_REG (32, nds32_ipc); ++ ++#undef NDS32_PUT_FS_REG ++ ++ /* The retaddr is PC, use PC to find FDE. */ ++ fs->retaddr_column = 32; ++ fs->signal_frame = 1; ++ ++ return _URC_NO_REASON; ++} ++ ++#endif +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/t-nds32-glibc gcc-8.2.0/libgcc/config/nds32/t-nds32-glibc +--- gcc-8.2.0.orig/libgcc/config/nds32/t-nds32-glibc 1970-01-01 01:00:00.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/t-nds32-glibc 2019-01-25 15:38:37.357255536 +0100 +@@ -0,0 +1,34 @@ ++# Rules of glibc library makefile of Andes NDS32 cpu for GNU compiler ++# Copyright (C) 2012-2015 Free Software Foundation, Inc. ++# Contributed by Andes Technology Corporation. ++# ++# This file is part of GCC. ++# ++# GCC is free software; you can redistribute it and/or modify it ++# under the terms of the GNU General Public License as published ++# by the Free Software Foundation; either version 3, or (at your ++# option) any later version. ++# ++# GCC is distributed in the hope that it will be useful, but WITHOUT ++# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ++# License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with GCC; see the file COPYING3. If not see ++# <http://www.gnu.org/licenses/>. ++ ++# Compiler flags to use when compiling 'libgcc2.c' ++HOST_LIBGCC2_CFLAGS = -O2 -fPIC -fwrapv ++LIB2ADD += $(srcdir)/config/nds32/linux-atomic.c ++ ++#LIB1ASMSRC = nds32/lib1asmsrc-newlib.S ++#LIB1ASMFUNCS = _divsi3 _modsi3 _udivsi3 _umodsi3 ++ ++# List of functions not to build from libgcc2.c. ++#LIB2FUNCS_EXCLUDE = _clzsi2 ++ ++# List of extra C and assembler files(*.S) to add to static libgcc2. ++#LIB2ADD_ST += $(srcdir)/config/nds32/lib2csrc-newlib/_clzsi2.c ++ ++# ------------------------------------------------------------------------ +diff -urN gcc-8.2.0.orig/libgcc/config/nds32/t-nds32-isr gcc-8.2.0/libgcc/config/nds32/t-nds32-isr +--- gcc-8.2.0.orig/libgcc/config/nds32/t-nds32-isr 2018-01-03 11:03:58.000000000 +0100 ++++ gcc-8.2.0/libgcc/config/nds32/t-nds32-isr 2019-01-25 15:38:37.357255536 +0100 +@@ -23,11 +23,11 @@ + # Makfile fragment rules for libnds32_isr.a to support ISR attribute extension + ############################################################################### + +-# basic flags setting ++# Basic flags setting. + ISR_CFLAGS = $(CFLAGS) -c + +-# the object files we would like to create +-LIBNDS32_ISR_16B_OBJS = \ ++# The object files we would like to create. ++LIBNDS32_ISR_VEC_OBJS = \ + vec_vid00.o vec_vid01.o vec_vid02.o vec_vid03.o \ + vec_vid04.o vec_vid05.o vec_vid06.o vec_vid07.o \ + vec_vid08.o vec_vid09.o vec_vid10.o vec_vid11.o \ +@@ -46,40 +46,9 @@ + vec_vid60.o vec_vid61.o vec_vid62.o vec_vid63.o \ + vec_vid64.o vec_vid65.o vec_vid66.o vec_vid67.o \ + vec_vid68.o vec_vid69.o vec_vid70.o vec_vid71.o \ +- vec_vid72.o \ +- excp_isr_ps_nn.o excp_isr_ps_ns.o excp_isr_ps_nr.o \ +- excp_isr_sa_nn.o excp_isr_sa_ns.o excp_isr_sa_nr.o \ +- intr_isr_ps_nn.o intr_isr_ps_ns.o intr_isr_ps_nr.o \ +- intr_isr_sa_nn.o intr_isr_sa_ns.o intr_isr_sa_nr.o \ +- reset.o +- +-LIBNDS32_ISR_4B_OBJS = \ +- vec_vid00_4b.o vec_vid01_4b.o vec_vid02_4b.o vec_vid03_4b.o \ +- vec_vid04_4b.o vec_vid05_4b.o vec_vid06_4b.o vec_vid07_4b.o \ +- vec_vid08_4b.o vec_vid09_4b.o vec_vid10_4b.o vec_vid11_4b.o \ +- vec_vid12_4b.o vec_vid13_4b.o vec_vid14_4b.o vec_vid15_4b.o \ +- vec_vid16_4b.o vec_vid17_4b.o vec_vid18_4b.o vec_vid19_4b.o \ +- vec_vid20_4b.o vec_vid21_4b.o vec_vid22_4b.o vec_vid23_4b.o \ +- vec_vid24_4b.o vec_vid25_4b.o vec_vid26_4b.o vec_vid27_4b.o \ +- vec_vid28_4b.o vec_vid29_4b.o vec_vid30_4b.o vec_vid31_4b.o \ +- vec_vid32_4b.o vec_vid33_4b.o vec_vid34_4b.o vec_vid35_4b.o \ +- vec_vid36_4b.o vec_vid37_4b.o vec_vid38_4b.o vec_vid39_4b.o \ +- vec_vid40_4b.o vec_vid41_4b.o vec_vid42_4b.o vec_vid43_4b.o \ +- vec_vid44_4b.o vec_vid45_4b.o vec_vid46_4b.o vec_vid47_4b.o \ +- vec_vid48_4b.o vec_vid49_4b.o vec_vid50_4b.o vec_vid51_4b.o \ +- vec_vid52_4b.o vec_vid53_4b.o vec_vid54_4b.o vec_vid55_4b.o \ +- vec_vid56_4b.o vec_vid57_4b.o vec_vid58_4b.o vec_vid59_4b.o \ +- vec_vid60_4b.o vec_vid61_4b.o vec_vid62_4b.o vec_vid63_4b.o \ +- vec_vid64_4b.o vec_vid65_4b.o vec_vid66_4b.o vec_vid67_4b.o \ +- vec_vid68_4b.o vec_vid69_4b.o vec_vid70_4b.o vec_vid71_4b.o \ +- vec_vid72_4b.o \ +- excp_isr_ps_nn_4b.o excp_isr_ps_ns_4b.o excp_isr_ps_nr_4b.o \ +- excp_isr_sa_nn_4b.o excp_isr_sa_ns_4b.o excp_isr_sa_nr_4b.o \ +- intr_isr_ps_nn_4b.o intr_isr_ps_ns_4b.o intr_isr_ps_nr_4b.o \ +- intr_isr_sa_nn_4b.o intr_isr_sa_ns_4b.o intr_isr_sa_nr_4b.o \ +- reset_4b.o ++ vec_vid72.o + +-LIBNDS32_ISR_COMMON_OBJS = \ ++LIBNDS32_ISR_JMP_OBJS = \ + jmptbl_vid00.o jmptbl_vid01.o jmptbl_vid02.o jmptbl_vid03.o \ + jmptbl_vid04.o jmptbl_vid05.o jmptbl_vid06.o jmptbl_vid07.o \ + jmptbl_vid08.o jmptbl_vid09.o jmptbl_vid10.o jmptbl_vid11.o \ +@@ -98,29 +67,32 @@ + jmptbl_vid60.o jmptbl_vid61.o jmptbl_vid62.o jmptbl_vid63.o \ + jmptbl_vid64.o jmptbl_vid65.o jmptbl_vid66.o jmptbl_vid67.o \ + jmptbl_vid68.o jmptbl_vid69.o jmptbl_vid70.o jmptbl_vid71.o \ +- jmptbl_vid72.o \ ++ jmptbl_vid72.o ++ ++LIBNDS32_ISR_COMMON_OBJS = \ ++ excp_isr_ps_nn.o excp_isr_ps_ns.o excp_isr_ps_nr.o \ ++ excp_isr_sa_nn.o excp_isr_sa_ns.o excp_isr_sa_nr.o \ ++ intr_isr_ps_nn.o intr_isr_ps_ns.o intr_isr_ps_nr.o \ ++ intr_isr_sa_nn.o intr_isr_sa_ns.o intr_isr_sa_nr.o \ ++ reset.o \ + nmih.o \ + wrh.o + +-LIBNDS32_ISR_COMPLETE_OBJS = $(LIBNDS32_ISR_16B_OBJS) $(LIBNDS32_ISR_4B_OBJS) $(LIBNDS32_ISR_COMMON_OBJS) +- ++LIBNDS32_ISR_COMPLETE_OBJS = $(LIBNDS32_ISR_VEC_OBJS) $(LIBNDS32_ISR_JMP_OBJS) $(LIBNDS32_ISR_COMMON_OBJS) + +-# Build common objects for ISR library +-nmih.o: $(srcdir)/config/nds32/isr-library/nmih.S +- $(GCC_FOR_TARGET) $(ISR_CFLAGS) $(srcdir)/config/nds32/isr-library/nmih.S -o nmih.o + +-wrh.o: $(srcdir)/config/nds32/isr-library/wrh.S +- $(GCC_FOR_TARGET) $(ISR_CFLAGS) $(srcdir)/config/nds32/isr-library/wrh.S -o wrh.o + +-jmptbl_vid%.o: $(srcdir)/config/nds32/isr-library/jmptbl_vid%.S ++# Build vector vid objects for ISR library. ++vec_vid%.o: $(srcdir)/config/nds32/isr-library/vec_vid%.S + $(GCC_FOR_TARGET) $(ISR_CFLAGS) $< -o $@ + + +- +-# Build 16b version objects for ISR library. (no "_4b" postfix string) +-vec_vid%.o: $(srcdir)/config/nds32/isr-library/vec_vid%.S ++# Build jump table objects for ISR library. ++jmptbl_vid%.o: $(srcdir)/config/nds32/isr-library/jmptbl_vid%.S + $(GCC_FOR_TARGET) $(ISR_CFLAGS) $< -o $@ + ++ ++# Build commen objects for ISR library. + excp_isr_ps_nn.o: $(srcdir)/config/nds32/isr-library/excp_isr.S + $(GCC_FOR_TARGET) $(ISR_CFLAGS) $(srcdir)/config/nds32/isr-library/excp_isr.S -o excp_isr_ps_nn.o + +@@ -160,48 +132,12 @@ + reset.o: $(srcdir)/config/nds32/isr-library/reset.S + $(GCC_FOR_TARGET) $(ISR_CFLAGS) $(srcdir)/config/nds32/isr-library/reset.S -o reset.o + +-# Build 4b version objects for ISR library. +-vec_vid%_4b.o: $(srcdir)/config/nds32/isr-library/vec_vid%_4b.S +- $(GCC_FOR_TARGET) $(ISR_CFLAGS) $< -o $@ +- +-excp_isr_ps_nn_4b.o: $(srcdir)/config/nds32/isr-library/excp_isr_4b.S +- $(GCC_FOR_TARGET) $(ISR_CFLAGS) $(srcdir)/config/nds32/isr-library/excp_isr_4b.S -o excp_isr_ps_nn_4b.o +- +-excp_isr_ps_ns_4b.o: $(srcdir)/config/nds32/isr-library/excp_isr_4b.S +- $(GCC_FOR_TARGET) $(ISR_CFLAGS) -DNDS32_NESTED $(srcdir)/config/nds32/isr-library/excp_isr_4b.S -o excp_isr_ps_ns_4b.o +- +-excp_isr_ps_nr_4b.o: $(srcdir)/config/nds32/isr-library/excp_isr_4b.S +- $(GCC_FOR_TARGET) $(ISR_CFLAGS) -DNDS32_NESTED_READY $(srcdir)/config/nds32/isr-library/excp_isr_4b.S -o excp_isr_ps_nr_4b.o +- +-excp_isr_sa_nn_4b.o: $(srcdir)/config/nds32/isr-library/excp_isr_4b.S +- $(GCC_FOR_TARGET) $(ISR_CFLAGS) -DNDS32_SAVE_ALL_REGS $(srcdir)/config/nds32/isr-library/excp_isr_4b.S -o excp_isr_sa_nn_4b.o +- +-excp_isr_sa_ns_4b.o: $(srcdir)/config/nds32/isr-library/excp_isr_4b.S +- $(GCC_FOR_TARGET) $(ISR_CFLAGS) -DNDS32_SAVE_ALL_REGS -DNDS32_NESTED $(srcdir)/config/nds32/isr-library/excp_isr_4b.S -o excp_isr_sa_ns_4b.o +- +-excp_isr_sa_nr_4b.o: $(srcdir)/config/nds32/isr-library/excp_isr_4b.S +- $(GCC_FOR_TARGET) $(ISR_CFLAGS) -DNDS32_SAVE_ALL_REGS -DNDS32_NESTED_READY $(srcdir)/config/nds32/isr-library/excp_isr_4b.S -o excp_isr_sa_nr_4b.o +- +-intr_isr_ps_nn_4b.o: $(srcdir)/config/nds32/isr-library/intr_isr_4b.S +- $(GCC_FOR_TARGET) $(ISR_CFLAGS) $(srcdir)/config/nds32/isr-library/intr_isr_4b.S -o intr_isr_ps_nn_4b.o +- +-intr_isr_ps_ns_4b.o: $(srcdir)/config/nds32/isr-library/intr_isr_4b.S +- $(GCC_FOR_TARGET) $(ISR_CFLAGS) -DNDS32_NESTED $(srcdir)/config/nds32/isr-library/intr_isr_4b.S -o intr_isr_ps_ns_4b.o +- +-intr_isr_ps_nr_4b.o: $(srcdir)/config/nds32/isr-library/intr_isr_4b.S +- $(GCC_FOR_TARGET) $(ISR_CFLAGS) -DNDS32_NESTED_READY $(srcdir)/config/nds32/isr-library/intr_isr_4b.S -o intr_isr_ps_nr_4b.o +- +-intr_isr_sa_nn_4b.o: $(srcdir)/config/nds32/isr-library/intr_isr_4b.S +- $(GCC_FOR_TARGET) $(ISR_CFLAGS) -DNDS32_SAVE_ALL_REGS $(srcdir)/config/nds32/isr-library/intr_isr_4b.S -o intr_isr_sa_nn_4b.o +- +-intr_isr_sa_ns_4b.o: $(srcdir)/config/nds32/isr-library/intr_isr_4b.S +- $(GCC_FOR_TARGET) $(ISR_CFLAGS) -DNDS32_SAVE_ALL_REGS -DNDS32_NESTED $(srcdir)/config/nds32/isr-library/intr_isr_4b.S -o intr_isr_sa_ns_4b.o ++nmih.o: $(srcdir)/config/nds32/isr-library/nmih.S ++ $(GCC_FOR_TARGET) $(ISR_CFLAGS) $(srcdir)/config/nds32/isr-library/nmih.S -o nmih.o + +-intr_isr_sa_nr_4b.o: $(srcdir)/config/nds32/isr-library/intr_isr_4b.S +- $(GCC_FOR_TARGET) $(ISR_CFLAGS) -DNDS32_SAVE_ALL_REGS -DNDS32_NESTED_READY $(srcdir)/config/nds32/isr-library/intr_isr_4b.S -o intr_isr_sa_nr_4b.o ++wrh.o: $(srcdir)/config/nds32/isr-library/wrh.S ++ $(GCC_FOR_TARGET) $(ISR_CFLAGS) $(srcdir)/config/nds32/isr-library/wrh.S -o wrh.o + +-reset_4b.o: $(srcdir)/config/nds32/isr-library/reset_4b.S +- $(GCC_FOR_TARGET) $(ISR_CFLAGS) $(srcdir)/config/nds32/isr-library/reset_4b.S -o reset_4b.o + + + # The rule to create libnds32_isr.a file +diff -urN gcc-8.2.0.orig/libgcc/config.host gcc-8.2.0/libgcc/config.host +--- gcc-8.2.0.orig/libgcc/config.host 2018-04-06 22:04:17.000000000 +0200 ++++ gcc-8.2.0/libgcc/config.host 2019-01-25 15:38:32.841242694 +0100 +@@ -974,6 +974,23 @@ + tmake_file="$tm_file t-crtstuff t-fdpbit msp430/t-msp430" + extra_parts="$extra_parts libmul_none.a libmul_16.a libmul_32.a libmul_f5.a" + ;; ++nds32*-linux*) ++ # Basic makefile fragment and extra_parts for crt stuff. ++ # We also append c-isr library implementation. ++ tmake_file="${tmake_file} t-slibgcc-libgcc" ++ tmake_file="${tmake_file} nds32/t-nds32-glibc nds32/t-crtstuff t-softfp-sfdf t-softfp" ++ # The header file of defining MD_FALLBACK_FRAME_STATE_FOR. ++ md_unwind_header=nds32/linux-unwind.h ++ # Append library definition makefile fragment according to --with-nds32-lib=X setting. ++ case "${with_nds32_lib}" in ++ "" | glibc | uclibc ) ++ ;; ++ *) ++ echo "Cannot accept --with-nds32-lib=$with_nds32_lib, available values are: glibc uclibc" 1>&2 ++ exit 1 ++ ;; ++ esac ++ ;; + nds32*-elf*) + # Basic makefile fragment and extra_parts for crt stuff. + # We also append c-isr library implementation. |