diff --git a/gcc/common/config/nds32/nds32-common.c b/gcc/common/config/nds32/nds32-common.c index dbcc390..4823660 100644 --- a/gcc/common/config/nds32/nds32-common.c +++ b/gcc/common/config/nds32/nds32-common.c @@ -53,6 +53,16 @@ nds32_handle_option (struct gcc_options *opts ATTRIBUTE_UNUSED, return true; + case OPT_misr_secure_: + /* Check the valid security level: 0 1 2 3. */ + if (value < 0 || value > 3) + { + error_at (loc, "for the option -misr-secure=X, the valid X " + "must be: 0, 1, 2, or 3"); + return false; + } + return true; + case OPT_mcache_block_size_: /* Check valid value: 4 8 16 32 64 128 256 512. */ if (exact_log2 (value) < 2 || exact_log2 (value) > 9) @@ -74,12 +84,19 @@ nds32_handle_option (struct gcc_options *opts ATTRIBUTE_UNUSED, /* Implement TARGET_OPTION_OPTIMIZATION_TABLE. */ static const struct default_options nds32_option_optimization_table[] = { +#if TARGET_LINUX_ABI == 0 + /* Disable -fdelete-null-pointer-checks by default in ELF toolchain. */ + { OPT_LEVELS_ALL, OPT_fdelete_null_pointer_checks, + NULL, 0 }, +#endif /* Enable -fsched-pressure by default at -O1 and above. */ { OPT_LEVELS_1_PLUS, OPT_fsched_pressure, NULL, 1 }, /* Enable -fomit-frame-pointer by default at all optimization levels. */ { OPT_LEVELS_ALL, OPT_fomit_frame_pointer, NULL, 1 }, /* Enable -mrelax-hint by default at all optimization levels. */ { OPT_LEVELS_ALL, OPT_mrelax_hint, NULL, 1 }, + /* Enalbe -malways-align by default at -O1 and above, but not -Os or -Og. */ + { OPT_LEVELS_1_PLUS_SPEED_ONLY, OPT_malways_align, NULL, 1 }, /* Enable -mv3push by default at -Os, but it is useless under V2 ISA. */ { OPT_LEVELS_SIZE, OPT_mv3push, NULL, 1 }, @@ -87,6 +104,19 @@ static const struct default_options nds32_option_optimization_table[] = }; /* ------------------------------------------------------------------------ */ + +/* Implement TARGET_EXCEPT_UNWIND_INFO. */ +static enum unwind_info_type +nds32_except_unwind_info (struct gcc_options *opts ATTRIBUTE_UNUSED) +{ + if (TARGET_LINUX_ABI) + return UI_DWARF2; + + return UI_SJLJ; +} + +/* ------------------------------------------------------------------------ */ + /* Run-time Target Specification. */ @@ -103,6 +133,7 @@ static const struct default_options nds32_option_optimization_table[] = TARGET_EXT_PERF : Generate performance extention instrcution. TARGET_EXT_PERF2 : Generate performance extention version 2 instrcution. TARGET_EXT_STRING : Generate string extention instrcution. + TARGET_HW_ABS : Generate hardware abs instruction. TARGET_CMOV : Generate conditional move instruction. */ #undef TARGET_DEFAULT_TARGET_FLAGS #define TARGET_DEFAULT_TARGET_FLAGS \ @@ -113,6 +144,7 @@ static const struct default_options nds32_option_optimization_table[] = | MASK_EXT_PERF \ | MASK_EXT_PERF2 \ | MASK_EXT_STRING \ + | MASK_HW_ABS \ | MASK_CMOV) #undef TARGET_HANDLE_OPTION @@ -125,7 +157,7 @@ static const struct default_options nds32_option_optimization_table[] = /* Defining the Output Assembler Language. */ #undef TARGET_EXCEPT_UNWIND_INFO -#define TARGET_EXCEPT_UNWIND_INFO sjlj_except_unwind_info +#define TARGET_EXCEPT_UNWIND_INFO nds32_except_unwind_info /* ------------------------------------------------------------------------ */ diff --git a/gcc/config.gcc b/gcc/config.gcc index a5defb0..e5b0350 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -445,7 +445,17 @@ mips*-*-*) ;; nds32*) cpu_type=nds32 - extra_headers="nds32_intrinsic.h" + extra_headers="nds32_intrinsic.h nds32_isr.h nds32_init.inc" + case ${target} in + nds32*-*-linux*) + extra_options="${extra_options} nds32/nds32-linux.opt" + ;; + nds32*-*-elf*) + extra_options="${extra_options} nds32/nds32-elf.opt" + ;; + *) + ;; + esac extra_objs="nds32-cost.o nds32-intrinsic.o nds32-isr.o nds32-md-auxiliary.o nds32-pipelines-auxiliary.o nds32-predicates.o nds32-memory-manipulation.o nds32-fp-as-gp.o nds32-relax-opt.o nds32-utils.o" ;; nios2-*-*) @@ -2333,17 +2343,36 @@ msp430*-*-*) tmake_file="${tmake_file} msp430/t-msp430" extra_gcc_objs="driver-msp430.o" ;; -nds32le-*-*) +nds32*-*-*) target_cpu_default="0" tm_defines="${tm_defines}" - tm_file="dbxelf.h elfos.h newlib-stdint.h ${tm_file} nds32/nds32_intrinsic.h" - tmake_file="nds32/t-nds32 nds32/t-mlibs" - ;; -nds32be-*-*) - target_cpu_default="0|MASK_BIG_ENDIAN" - tm_defines="${tm_defines} TARGET_BIG_ENDIAN_DEFAULT=1" - tm_file="dbxelf.h elfos.h newlib-stdint.h ${tm_file} nds32/nds32_intrinsic.h" - tmake_file="nds32/t-nds32 nds32/t-mlibs" + case ${target} in + nds32le*-*-*) + ;; + nds32be-*-*) + target_cpu_default="${target_cpu_default}|MASK_BIG_ENDIAN" + tm_defines="${tm_defines} TARGET_BIG_ENDIAN_DEFAULT=1" + ;; + esac + case ${target} in + nds32*-*-elf*) + tm_file="dbxelf.h elfos.h newlib-stdint.h ${tm_file} nds32/elf.h nds32/nds32_intrinsic.h" + tmake_file="nds32/t-nds32 nds32/t-elf" + ;; + nds32*-*-linux*) + tm_file="dbxelf.h elfos.h ${tm_file} gnu-user.h linux.h glibc-stdint.h nds32/linux.h nds32/nds32_intrinsic.h" + tmake_file="${tmake_file} nds32/t-nds32 nds32/t-linux" + ;; + esac + + # Handle --enable-default-relax setting. + if test x${enable_default_relax} = xyes; then + tm_defines="${tm_defines} TARGET_DEFAULT_RELAX=1" + fi + # Handle --with-ext-dsp + if test x${with_ext_dsp} = xyes; then + tm_defines="${tm_defines} TARGET_DEFAULT_EXT_DSP=1" + fi ;; nios2-*-*) tm_file="elfos.h ${tm_file}" @@ -4316,11 +4345,11 @@ case "${target}" in "") with_cpu=n9 ;; - n6 | n7 | n8 | e8 | s8 | n9) + n6 | n7 |n8 | e8 | s8 | n9 | n10 | d10 | n12 | n13 | n15) # OK ;; *) - echo "Cannot accept --with-cpu=$with_cpu, available values are: n6 n7 n8 e8 s8 n9" 1>&2 + echo "Cannot accept --with-cpu=$with_cpu, available values are: n6 n7 n8 e8 s8 n9 n10 d10 n12 n13 n15" 1>&2 exit 1 ;; esac @@ -4330,15 +4359,30 @@ case "${target}" in "") # the default library is newlib with_nds32_lib=newlib + tm_defines="${tm_defines} TARGET_DEFAULT_CTOR_DTOR=1" ;; newlib) # OK + tm_defines="${tm_defines} TARGET_DEFAULT_CTOR_DTOR=1" ;; mculib) # OK + # for the arch=v3f or arch=v3s under mculib toolchain, + # we would like to set -fno-math-errno as default + case "${with_arch}" in + v3f | v3s) + tm_defines="${tm_defines} TARGET_DEFAULT_NO_MATH_ERRNO=1" + ;; + esac + ;; + glibc) + # OK + tm_defines="${tm_defines}" + ;; + uclibc) ;; *) - echo "Cannot accept --with-nds32-lib=$with_nds32_lib, available values are: newlib mculib" 1>&2 + echo "Cannot accept --with-nds32-lib=$with_nds32_lib, available values are: newlib mculib glibc uclibc" 1>&2 exit 1 ;; esac diff --git a/gcc/config/nds32/constants.md b/gcc/config/nds32/constants.md index 37c2704..6d42f50 100644 --- a/gcc/config/nds32/constants.md +++ b/gcc/config/nds32/constants.md @@ -23,6 +23,7 @@ (define_constants [(R8_REGNUM 8) (TA_REGNUM 15) + (TP_REGNUM 25) (FP_REGNUM 28) (GP_REGNUM 29) (LP_REGNUM 30) @@ -49,6 +50,16 @@ UNSPEC_FFB UNSPEC_FFMISM UNSPEC_FLMISM + UNSPEC_KDMBB + UNSPEC_KDMBT + UNSPEC_KDMTB + UNSPEC_KDMTT + UNSPEC_KHMBB + UNSPEC_KHMBT + UNSPEC_KHMTB + UNSPEC_KHMTT + UNSPEC_KSLRAW + UNSPEC_KSLRAWU UNSPEC_SVA UNSPEC_SVS UNSPEC_WSBH @@ -62,6 +73,29 @@ UNSPEC_UASTORE_HW UNSPEC_UASTORE_W UNSPEC_UASTORE_DW + UNSPEC_GOTINIT + UNSPEC_GOT + UNSPEC_GOTOFF + UNSPEC_PLT + UNSPEC_TLSGD + UNSPEC_TLSLD + UNSPEC_TLSIE + UNSPEC_TLSLE + UNSPEC_ROUND + UNSPEC_VEC_COMPARE + UNSPEC_KHM + UNSPEC_KHMX + UNSPEC_CLIP_OV + UNSPEC_CLIPS_OV + UNSPEC_BITREV + UNSPEC_KABS + UNSPEC_LOOP_END + UNSPEC_TLS_DESC + UNSPEC_TLS_IE + UNSPEC_ADD32 + UNSPEC_ICT + UNSPEC_KADDH + UNSPEC_KSUBH ]) ;; The unspec_volatile operation index. @@ -135,10 +169,14 @@ UNSPEC_VOLATILE_SET_TRIG_EDGE UNSPEC_VOLATILE_GET_TRIG_TYPE UNSPEC_VOLATILE_RELAX_GROUP + UNSPEC_VOLATILE_OMIT_FP_BEGIN + UNSPEC_VOLATILE_OMIT_FP_END UNSPEC_VOLATILE_POP25_RETURN UNSPEC_VOLATILE_UNALIGNED_FEATURE UNSPEC_VOLATILE_ENABLE_UNALIGNED UNSPEC_VOLATILE_DISABLE_UNALIGNED + UNSPEC_VOLATILE_RDOV + UNSPEC_VOLATILE_CLROV ]) ;; ------------------------------------------------------------------------ diff --git a/gcc/config/nds32/constraints.md b/gcc/config/nds32/constraints.md index 7af7769..315c603 100644 --- a/gcc/config/nds32/constraints.md +++ b/gcc/config/nds32/constraints.md @@ -127,6 +127,11 @@ (and (match_code "const_int") (match_test "IN_RANGE (ival, -31, 0)"))) +(define_constraint "Iu06" + "Unsigned immediate 6-bit value" + (and (match_code "const_int") + (match_test "ival < (1 << 6) && ival >= 0"))) + ;; Ip05 is special and dedicated for v3 movpi45 instruction. ;; movpi45 has imm5u field but the range is 16 ~ 47. (define_constraint "Ip05" @@ -136,10 +141,10 @@ && ival >= (0 + 16) && (TARGET_ISA_V3 || TARGET_ISA_V3M)"))) -(define_constraint "Iu06" +(define_constraint "IU06" "Unsigned immediate 6-bit value constraint for addri36.sp instruction" (and (match_code "const_int") - (match_test "ival < (1 << 6) + (match_test "ival < (1 << 8) && ival >= 0 && (ival % 4 == 0) && (TARGET_ISA_V3 || TARGET_ISA_V3M)"))) @@ -302,6 +307,25 @@ (match_test "(TARGET_ISA_V3 || TARGET_ISA_V3M) && (IN_RANGE (exact_log2 (ival + 1), 1, 8))"))) +(define_constraint "CVp5" + "Unsigned immediate 5-bit value for movpi45 instruction with range 16-47" + (and (match_code "const_vector") + (match_test "nds32_valid_CVp5_p (op)"))) + +(define_constraint "CVs5" + "Signed immediate 5-bit value" + (and (match_code "const_vector") + (match_test "nds32_valid_CVs5_p (op)"))) + +(define_constraint "CVs2" + "Signed immediate 20-bit value" + (and (match_code "const_vector") + (match_test "nds32_valid_CVs2_p (op)"))) + +(define_constraint "CVhi" + "The immediate value that can be simply set high 20-bit" + (and (match_code "const_vector") + (match_test "nds32_valid_CVhi_p (op)"))) (define_memory_constraint "U33" "Memory constraint for 333 format" @@ -349,4 +373,9 @@ (match_test "(TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE) && nds32_float_mem_operand_p (op)"))) +(define_constraint "S" + "@internal + A constant call address." + (match_operand 0 "nds32_symbolic_operand")) + ;; ------------------------------------------------------------------------ diff --git a/gcc/config/nds32/elf.h b/gcc/config/nds32/elf.h new file mode 100644 index 0000000..66397ac --- /dev/null +++ b/gcc/config/nds32/elf.h @@ -0,0 +1,81 @@ +/* Definitions of target machine of Andes NDS32 cpu for GNU compiler + Copyright (C) 2012-2014 Free Software Foundation, Inc. + Contributed by Andes Technology Corporation. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + + +/* ------------------------------------------------------------------------ */ + +#define TARGET_LINUX_ABI 0 + +/* In the configure stage we may use options --enable-default-relax, + --enable-Os-default-ifc and --enable-Os-default-ex9. They effect + the default spec of passing --relax, --mifc, and --mex9 to linker. + We use NDS32_RELAX_SPEC, NDS32_IFC_SPEC, and NDS32_EX9_SPEC + so that we can customize them conveniently. */ +#define LINK_SPEC \ + " %{G*}" \ + " %{mbig-endian:-EB} %{mlittle-endian:-EL}" \ + " %{shared:-shared}" \ + NDS32_RELAX_SPEC + +#define LIB_SPEC \ + " -lc -lgloss" + +#define LIBGCC_SPEC \ + " -lgcc" + +/* The option -mno-ctor-dtor can disable constructor/destructor feature + by applying different crt stuff. In the convention, crt0.o is the + startup file without constructor/destructor; + crt1.o, crti.o, crtbegin.o, crtend.o, and crtn.o are the + startup files with constructor/destructor. + Note that crt0.o, crt1.o, crti.o, and crtn.o are provided + by newlib/mculib/glibc/ublic, while crtbegin.o and crtend.o are + currently provided by GCC for nds32 target. + + For nds32 target so far: + If -mno-ctor-dtor, we are going to link + "crt0.o [user objects]". + If -mctor-dtor, we are going to link + "crt1.o crtbegin1.o [user objects] crtend1.o". + + Note that the TARGET_DEFAULT_CTOR_DTOR would effect the + default behavior. Check gcc/config.gcc for more information. */ +#ifdef TARGET_DEFAULT_CTOR_DTOR + #define STARTFILE_SPEC \ + " %{!mno-ctor-dtor:crt1.o%s;:crt0.o%s}" \ + " %{!mno-ctor-dtor:crtbegin1.o%s}" \ + " %{mcrt-arg:crtarg.o%s}" + #define ENDFILE_SPEC \ + " %{!mno-ctor-dtor:crtend1.o%s}" +#else + #define STARTFILE_SPEC \ + " %{mctor-dtor|coverage:crt1.o%s;:crt0.o%s}" \ + " %{mctor-dtor|coverage:crtbegin1.o%s}" \ + " %{mcrt-arg:crtarg.o%s}" + #define ENDFILE_SPEC \ + " %{mctor-dtor|coverage:crtend1.o%s}" +#endif + +#define STARTFILE_CXX_SPEC \ + " %{!mno-ctor-dtor:crt1.o%s;:crt0.o%s}" \ + " %{!mno-ctor-dtor:crtbegin1.o%s}" \ + " %{mcrt-arg:crtarg.o%s}" +#define ENDFILE_CXX_SPEC \ + " %{!mno-ctor-dtor:crtend1.o%s}" diff --git a/gcc/config/nds32/iterators.md b/gcc/config/nds32/iterators.md index c2062de..f4fb581 100644 --- a/gcc/config/nds32/iterators.md +++ b/gcc/config/nds32/iterators.md @@ -68,6 +68,28 @@ ;; shifts (define_code_iterator shift_rotate [ashift ashiftrt lshiftrt rotatert]) +(define_code_iterator shifts [ashift ashiftrt lshiftrt]) + +(define_code_iterator shiftrt [ashiftrt lshiftrt]) + +(define_code_iterator sat_plus [ss_plus us_plus]) + +(define_code_iterator all_plus [plus ss_plus us_plus]) + +(define_code_iterator sat_minus [ss_minus us_minus]) + +(define_code_iterator all_minus [minus ss_minus us_minus]) + +(define_code_iterator plus_minus [plus minus]) + +(define_code_iterator extend [sign_extend zero_extend]) + +(define_code_iterator sumax [smax umax]) + +(define_code_iterator sumin [smin umin]) + +(define_code_iterator sumin_max [smax umax smin umin]) + ;;---------------------------------------------------------------------------- ;; Code attributes. ;;---------------------------------------------------------------------------- @@ -76,5 +98,23 @@ (define_code_attr shift [(ashift "ashl") (ashiftrt "ashr") (lshiftrt "lshr") (rotatert "rotr")]) +(define_code_attr su + [(ashiftrt "") (lshiftrt "u") (sign_extend "s") (zero_extend "u")]) + +(define_code_attr zs + [(sign_extend "s") (zero_extend "z")]) + +(define_code_attr uk + [(plus "") (ss_plus "k") (us_plus "uk") + (minus "") (ss_minus "k") (us_minus "uk")]) + +(define_code_attr opcode + [(plus "add") (minus "sub") (smax "smax") (umax "umax") (smin "smin") (umin "umin")]) + +(define_code_attr add_rsub + [(plus "a") (minus "rs")]) + +(define_code_attr add_sub + [(plus "a") (minus "s")]) ;;---------------------------------------------------------------------------- diff --git a/gcc/config/nds32/linux.h b/gcc/config/nds32/linux.h new file mode 100644 index 0000000..f7104a9 --- /dev/null +++ b/gcc/config/nds32/linux.h @@ -0,0 +1,69 @@ +/* Definitions of target machine of Andes NDS32 cpu for GNU compiler + Copyright (C) 2012-2014 Free Software Foundation, Inc. + Contributed by Andes Technology Corporation. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + + +/* ------------------------------------------------------------------------ */ + +#define TARGET_LINUX_ABI 1 + +#undef SIZE_TYPE +#define SIZE_TYPE "unsigned int" + +#undef PTRDIFF_TYPE +#define PTRDIFF_TYPE "int" + +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + GNU_USER_TARGET_OS_CPP_BUILTINS(); \ + } \ + while (0) + +#define GLIBC_DYNAMIC_LINKER "/lib/ld.so.1" + +/* In the configure stage we may use options --enable-default-relax, + --enable-Os-default-ifc and --enable-Os-default-ex9. They effect + the default spec of passing --relax, --mifc, and --mex9 to linker. + We use NDS32_RELAX_SPEC, NDS32_IFC_SPEC, and NDS32_EX9_SPEC + so that we can customize them conveniently. */ +#define LINK_SPEC \ + " %{G*}" \ + " %{mbig-endian:-EB} %{mlittle-endian:-EL}" \ + " %{shared:-shared} \ + %{!shared: \ + %{!static: \ + %{rdynamic:-export-dynamic} \ + -dynamic-linker " GNU_USER_DYNAMIC_LINKER "} \ + %{static:-static}}" \ + NDS32_RELAX_SPEC + +#define LINK_PIE_SPEC "%{pie:%{!fno-pie:%{!fno-PIE:%{!static:-pie}}}} " + +#define CPP_SPEC "%{pthread:-D_REENTRANT}" + +/* The SYNC operations are implemented as library functions, not + INSN patterns. As a result, the HAVE defines for the patterns are + not defined. We need to define them to generate the corresponding + __GCC_HAVE_SYNC_COMPARE_AND_SWAP_* and __GCC_ATOMIC_*_LOCK_FREE + defines. + Ref: https://sourceware.org/ml/libc-alpha/2014-09/msg00322.html */ +#define HAVE_sync_compare_and_swapqi 1 +#define HAVE_sync_compare_and_swaphi 1 +#define HAVE_sync_compare_and_swapsi 1 diff --git a/gcc/config/nds32/nds32-cost.c b/gcc/config/nds32/nds32-cost.c index 8d01e8a..979000f 100644 --- a/gcc/config/nds32/nds32-cost.c +++ b/gcc/config/nds32/nds32-cost.c @@ -34,66 +34,379 @@ #include "optabs.h" /* For GEN_FCN. */ #include "recog.h" #include "tm-constrs.h" +#include "tree-pass.h" /* ------------------------------------------------------------------------ */ -bool -nds32_rtx_costs_impl (rtx x, - machine_mode mode ATTRIBUTE_UNUSED, - int outer_code, - int opno ATTRIBUTE_UNUSED, - int *total, - bool speed) -{ - int code = GET_CODE (x); +typedef bool (*rtx_cost_func) (rtx, int, int, int, int*); - /* According to 'speed', goto suitable cost model section. */ - if (speed) - goto performance_cost; - else - goto size_cost; +struct rtx_cost_model_t { + rtx_cost_func speed_prefer; + rtx_cost_func size_prefer; +}; +static rtx_cost_model_t rtx_cost_model; -performance_cost: - /* This is section for performance cost model. */ +static int insn_size_16bit; /* Initial at nds32_init_rtx_costs. */ +static const int insn_size_32bit = 4; + +static bool +nds32_rtx_costs_speed_prefer (rtx x ATTRIBUTE_UNUSED, + int code, + int outer_code ATTRIBUTE_UNUSED, + int opno ATTRIBUTE_UNUSED, + int *total) +{ + rtx op0; + rtx op1; + machine_mode mode = GET_MODE (x); + /* Scale cost by mode size. */ + int cost = COSTS_N_INSNS (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode)); - /* In gcc/rtl.h, the default value of COSTS_N_INSNS(N) is N*4. - We treat it as 4-cycle cost for each instruction - under performance consideration. */ switch (code) { - case SET: - /* For 'SET' rtx, we need to return false - so that it can recursively calculate costs. */ - return false; - case USE: /* Used in combine.c as a marker. */ *total = 0; - break; + return true; + + case CONST_INT: + /* When not optimizing for size, we care more about the cost + of hot code, and hot code is often in a loop. If a constant + operand needs to be forced into a register, we will often be + able to hoist the constant load out of the loop, so the load + should not contribute to the cost. */ + if (outer_code == SET || outer_code == PLUS) + *total = satisfies_constraint_Is20 (x) ? 0 : 4; + else if (outer_code == AND || outer_code == IOR || outer_code == XOR + || outer_code == MINUS) + *total = satisfies_constraint_Iu15 (x) ? 0 : 4; + else if (outer_code == ASHIFT || outer_code == ASHIFTRT + || outer_code == LSHIFTRT) + *total = satisfies_constraint_Iu05 (x) ? 0 : 4; + else if (GET_RTX_CLASS (outer_code) == RTX_COMPARE + || GET_RTX_CLASS (outer_code) == RTX_COMM_COMPARE) + *total = satisfies_constraint_Is16 (x) ? 0 : 4; + else + *total = COSTS_N_INSNS (1); + return true; + + case CONST: + case LO_SUM: + case HIGH: + case SYMBOL_REF: + *total = COSTS_N_INSNS (1); + return true; + + case MEM: + *total = COSTS_N_INSNS (1); + return true; + + case SET: + op0 = SET_DEST (x); + op1 = SET_SRC (x); + mode = GET_MODE (op0); + /* Scale cost by mode size. */ + cost = COSTS_N_INSNS (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode)); + + switch (GET_CODE (op1)) + { + case REG: + case SUBREG: + /* Register move and Store instructions. */ + if ((REG_P (op0) || MEM_P (op0)) + && GET_MODE_SIZE (mode) <= GET_MODE_SIZE (DImode)) + *total = COSTS_N_INSNS (1); + else + *total = cost; + return true; + + case MEM: + /* Load instructions. */ + if (REG_P (op0) && GET_MODE_SIZE (mode) <= GET_MODE_SIZE (DImode)) + *total = COSTS_N_INSNS (1); + else + *total = cost; + return true; + + case CONST_INT: + /* movi instruction. */ + if (REG_P (op0) && GET_MODE_SIZE (mode) < GET_MODE_SIZE (DImode)) + { + if (satisfies_constraint_Is20 (op1)) + *total = COSTS_N_INSNS (1) - 1; + else + *total = COSTS_N_INSNS (2); + } + else + *total = cost; + return true; + + case CONST: + case SYMBOL_REF: + case LABEL_REF: + /* la instruction. */ + if (REG_P (op0) && GET_MODE_SIZE (mode) < GET_MODE_SIZE (DImode)) + *total = COSTS_N_INSNS (1) - 1; + else + *total = cost; + return true; + case VEC_SELECT: + *total = cost; + return true; + + default: + *total = cost; + return true; + } + + case PLUS: + op0 = XEXP (x, 0); + op1 = XEXP (x, 1); + + if (GET_MODE_SIZE (mode) >= GET_MODE_SIZE (DImode)) + *total = cost; + else if (GET_CODE (op0) == MULT || GET_CODE (op0) == LSHIFTRT + || GET_CODE (op1) == MULT || GET_CODE (op1) == LSHIFTRT) + /* ALU_SHIFT */ + *total = COSTS_N_INSNS (2); + + else if ((GET_CODE (op1) == CONST_INT + && satisfies_constraint_Is15 (op1)) + || REG_P (op1)) + /* ADD instructions */ + *total = COSTS_N_INSNS (1); + else + /* ADD instructions: IMM out of range. */ + *total = COSTS_N_INSNS (2); + return true; + + case MINUS: + op0 = XEXP (x, 0); + op1 = XEXP (x, 1); + + if (GET_MODE_SIZE (mode) >= GET_MODE_SIZE (DImode)) + *total = cost; + else if (GET_CODE (op0) == MULT || GET_CODE (op0) == LSHIFTRT + || GET_CODE (op1) == MULT || GET_CODE (op1) == LSHIFTRT) + /* ALU_SHIFT */ + *total = COSTS_N_INSNS (2); + else if ((GET_CODE (op0) == CONST_INT + && satisfies_constraint_Is15 (op0)) + || REG_P (op0)) + /* SUB instructions */ + *total = COSTS_N_INSNS (1); + else + /* SUB instructions: IMM out of range. */ + *total = COSTS_N_INSNS (2); + return true; + + case TRUNCATE: + /* TRUNCATE and AND behavior is same. */ + *total = COSTS_N_INSNS (1); + return true; + + case AND: + case IOR: + case XOR: + op0 = XEXP (x, 0); + op1 = XEXP (x, 1); + + if (NDS32_EXT_DSP_P ()) + { + /* We prefer (and (ior) (ior)) than (ior (and) (and)) for + synthetize pk** and insb instruction. */ + if (code == AND && GET_CODE (op0) == IOR && GET_CODE (op1) == IOR) + return COSTS_N_INSNS (1); + + if (code == IOR && GET_CODE (op0) == AND && GET_CODE (op1) == AND) + return COSTS_N_INSNS (10); + } + + if (GET_MODE_SIZE (mode) >= GET_MODE_SIZE (DImode)) + *total = cost; + else if (GET_CODE (op0) == ASHIFT || GET_CODE (op0) == LSHIFTRT) + *total = COSTS_N_INSNS (2); + else if ((GET_CODE (op1) == CONST_INT + && satisfies_constraint_Iu15 (op1)) + || REG_P (op1)) + /* AND, OR, XOR instructions */ + *total = COSTS_N_INSNS (1); + else if (code == AND || GET_CODE (op0) == NOT) + /* BITC instruction */ + *total = COSTS_N_INSNS (1); + else + /* AND, OR, XOR instructions: IMM out of range. */ + *total = COSTS_N_INSNS (2); + return true; case MULT: + if (GET_MODE (x) == DImode + || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND + || GET_CODE (XEXP (x, 1)) == ZERO_EXTEND) + /* MUL instructions */ + *total = COSTS_N_INSNS (1); + else if (GET_MODE_SIZE (mode) >= GET_MODE_SIZE (DImode)) + *total = cost; + else if (outer_code == PLUS || outer_code == MINUS) + *total = COSTS_N_INSNS (2); + else if ((GET_CODE (XEXP (x, 1)) == CONST_INT + && satisfies_constraint_Iu05 (XEXP (x, 1))) + || REG_P (XEXP (x, 1))) + /* MUL instructions */ + *total = COSTS_N_INSNS (1); + else + /* MUL instructions: IMM out of range. */ + *total = COSTS_N_INSNS (2); + + if (TARGET_MUL_SLOW) + *total += COSTS_N_INSNS (4); + + return true; + + case LSHIFTRT: + if (GET_MODE_SIZE (mode) >= GET_MODE_SIZE (DImode)) + *total = cost; + else if (outer_code == PLUS || outer_code == MINUS + || outer_code == AND || outer_code == IOR + || outer_code == XOR) + *total = COSTS_N_INSNS (2); + else if ((GET_CODE (XEXP (x, 1)) == CONST_INT + && satisfies_constraint_Iu05 (XEXP (x, 1))) + || REG_P (XEXP (x, 1))) + /* SRL instructions */ + *total = COSTS_N_INSNS (1); + else + /* SRL instructions: IMM out of range. */ + *total = COSTS_N_INSNS (2); + return true; + + case ASHIFT: + if (GET_MODE_SIZE (mode) >= GET_MODE_SIZE (DImode)) + *total = cost; + else if (outer_code == AND || outer_code == IOR + || outer_code == XOR) + *total = COSTS_N_INSNS (2); + else if ((GET_CODE (XEXP (x, 1)) == CONST_INT + && satisfies_constraint_Iu05 (XEXP (x, 1))) + || REG_P (XEXP (x, 1))) + /* SLL instructions */ + *total = COSTS_N_INSNS (1); + else + /* SLL instructions: IMM out of range. */ + *total = COSTS_N_INSNS (2); + return true; + + case ASHIFTRT: + case ROTATERT: + if (GET_MODE_SIZE (mode) >= GET_MODE_SIZE (DImode)) + *total = cost; + else if ((GET_CODE (XEXP (x, 1)) == CONST_INT + && satisfies_constraint_Iu05 (XEXP (x, 1))) + || REG_P (XEXP (x, 1))) + /* ROTR, SLL instructions */ + *total = COSTS_N_INSNS (1); + else + /* ROTR, SLL instructions: IMM out of range. */ + *total = COSTS_N_INSNS (2); + return true; + + case LT: + case LTU: + if (outer_code == SET) + { + if ((GET_CODE (XEXP (x, 1)) == CONST_INT + && satisfies_constraint_Iu15 (XEXP (x, 1))) + || REG_P (XEXP (x, 1))) + /* SLT, SLTI instructions */ + *total = COSTS_N_INSNS (1); + else + /* SLT, SLT instructions: IMM out of range. */ + *total = COSTS_N_INSNS (2); + } + else + /* branch */ + *total = COSTS_N_INSNS (2); + return true; + + case EQ: + case NE: + case GE: + case LE: + case GT: + /* branch */ + *total = COSTS_N_INSNS (2); + return true; + + case IF_THEN_ELSE: + if (GET_CODE (XEXP (x, 1)) == LABEL_REF) + /* branch */ + *total = COSTS_N_INSNS (2); + else + /* cmovz, cmovn instructions */ + *total = COSTS_N_INSNS (1); + return true; + + case LABEL_REF: + if (outer_code == IF_THEN_ELSE) + /* branch */ + *total = COSTS_N_INSNS (2); + else + *total = COSTS_N_INSNS (1); + return true; + + case ZERO_EXTEND: + case SIGN_EXTEND: + if (MEM_P (XEXP (x, 0))) + /* Using memory access. */ + *total = COSTS_N_INSNS (1); + else + /* Zero extend and sign extend instructions. */ + *total = COSTS_N_INSNS (1); + return true; + + case NEG: + case NOT: *total = COSTS_N_INSNS (1); - break; + return true; case DIV: case UDIV: case MOD: case UMOD: - *total = COSTS_N_INSNS (7); - break; + *total = COSTS_N_INSNS (20); + return true; - default: + case CALL: + *total = COSTS_N_INSNS (2); + return true; + + case CLZ: + case SMIN: + case SMAX: + case ZERO_EXTRACT: + if (TARGET_EXT_PERF) + *total = COSTS_N_INSNS (1); + else + *total = COSTS_N_INSNS (3); + return true; + case VEC_SELECT: *total = COSTS_N_INSNS (1); - break; - } - - return true; + return true; + default: + *total = COSTS_N_INSNS (3); + return true; + } +} -size_cost: - /* This is section for size cost model. */ - +static bool +nds32_rtx_costs_size_prefer (rtx x, + int code, + int outer_code, + int opno ATTRIBUTE_UNUSED, + int *total) +{ /* In gcc/rtl.h, the default value of COSTS_N_INSNS(N) is N*4. We treat it as 4-byte cost for each instruction under code size consideration. */ @@ -118,85 +431,162 @@ size_cost: (set X imm20s), use movi, 4-byte cost. (set X BIG_INT), use sethi/ori, 8-byte cost. */ if (satisfies_constraint_Is05 (x)) - *total = COSTS_N_INSNS (1) - 2; + *total = insn_size_16bit; else if (satisfies_constraint_Is20 (x)) - *total = COSTS_N_INSNS (1); + *total = insn_size_32bit; else - *total = COSTS_N_INSNS (2); + *total = insn_size_32bit * 2; } else if (outer_code == PLUS || outer_code == MINUS) { /* Possible addi333/subi333 or subi45/addi45, 2-byte cost. General case, cost 1 instruction with 4-byte. */ if (satisfies_constraint_Iu05 (x)) - *total = COSTS_N_INSNS (1) - 2; + *total = insn_size_16bit; else - *total = COSTS_N_INSNS (1); + *total = insn_size_32bit; } else if (outer_code == ASHIFT) { /* Possible slli333, 2-byte cost. General case, cost 1 instruction with 4-byte. */ if (satisfies_constraint_Iu03 (x)) - *total = COSTS_N_INSNS (1) - 2; + *total = insn_size_16bit; else - *total = COSTS_N_INSNS (1); + *total = insn_size_32bit; } else if (outer_code == ASHIFTRT || outer_code == LSHIFTRT) { /* Possible srai45 or srli45, 2-byte cost. General case, cost 1 instruction with 4-byte. */ if (satisfies_constraint_Iu05 (x)) - *total = COSTS_N_INSNS (1) - 2; + *total = insn_size_16bit; else - *total = COSTS_N_INSNS (1); + *total = insn_size_32bit; } else { /* For other cases, simply set it 4-byte cost. */ - *total = COSTS_N_INSNS (1); + *total = insn_size_32bit; } break; case CONST_DOUBLE: /* It requires high part and low part processing, set it 8-byte cost. */ - *total = COSTS_N_INSNS (2); + *total = insn_size_32bit * 2; + break; + + case CONST: + case SYMBOL_REF: + *total = insn_size_32bit * 2; break; default: /* For other cases, generally we set it 4-byte cost - and stop resurively traversing. */ - *total = COSTS_N_INSNS (1); + and stop resurively traversing. */ + *total = insn_size_32bit; break; } return true; } -int -nds32_address_cost_impl (rtx address, - machine_mode mode ATTRIBUTE_UNUSED, - addr_space_t as ATTRIBUTE_UNUSED, - bool speed) +void +nds32_init_rtx_costs (void) +{ + rtx_cost_model.speed_prefer = nds32_rtx_costs_speed_prefer; + rtx_cost_model.size_prefer = nds32_rtx_costs_size_prefer; + + if (TARGET_16_BIT) + insn_size_16bit = 2; + else + insn_size_16bit = 4; +} + +/* This target hook describes the relative costs of RTL expressions. + Return 'true' when all subexpressions of x have been processed. + Return 'false' to sum the costs of sub-rtx, plus cost of this operation. + Refer to gcc/rtlanal.c for more information. */ +bool +nds32_rtx_costs_impl (rtx x, + machine_mode mode ATTRIBUTE_UNUSED, + int outer_code, + int opno, + int *total, + bool speed) +{ + int code = GET_CODE (x); + + /* According to 'speed', use suitable cost model section. */ + if (speed) + return rtx_cost_model.speed_prefer(x, code, outer_code, opno, total); + else + return rtx_cost_model.size_prefer(x, code, outer_code, opno, total); +} + + +int nds32_address_cost_speed_prefer (rtx address) { rtx plus0, plus1; enum rtx_code code; code = GET_CODE (address); - /* According to 'speed', goto suitable cost model section. */ - if (speed) - goto performance_cost; - else - goto size_cost; + switch (code) + { + case POST_MODIFY: + case POST_INC: + case POST_DEC: + /* We encourage that rtx contains + POST_MODIFY/POST_INC/POST_DEC behavior. */ + return COSTS_N_INSNS (1) - 2; + + case SYMBOL_REF: + /* We can have gp-relative load/store for symbol_ref. + Have it 4-byte cost. */ + return COSTS_N_INSNS (2); + + case CONST: + /* It is supposed to be the pattern (const (plus symbol_ref const_int)). + Have it 4-byte cost. */ + return COSTS_N_INSNS (2); + + case REG: + /* Simply return 4-byte costs. */ + return COSTS_N_INSNS (1) - 2; + + case PLUS: + /* We do not need to check if the address is a legitimate address, + because this hook is never called with an invalid address. + But we better check the range of + const_int value for cost, if it exists. */ + plus0 = XEXP (address, 0); + plus1 = XEXP (address, 1); + + if (REG_P (plus0) && CONST_INT_P (plus1)) + return COSTS_N_INSNS (1) - 2; + else if (ARITHMETIC_P (plus0) || ARITHMETIC_P (plus1)) + return COSTS_N_INSNS (1) - 1; + else if (REG_P (plus0) && REG_P (plus1)) + return COSTS_N_INSNS (1); + + /* For other 'plus' situation, make it cost 4-byte. */ + return COSTS_N_INSNS (1); + + default: + break; + } + + return COSTS_N_INSNS (4); -performance_cost: - /* This is section for performance cost model. */ +} - /* FALLTHRU, currently we use same cost model as size_cost. */ +int nds32_address_cost_speed_fwprop (rtx address) +{ + rtx plus0, plus1; + enum rtx_code code; -size_cost: - /* This is section for size cost model. */ + code = GET_CODE (address); switch (code) { @@ -210,12 +600,12 @@ size_cost: case SYMBOL_REF: /* We can have gp-relative load/store for symbol_ref. Have it 4-byte cost. */ - return COSTS_N_INSNS (1); + return COSTS_N_INSNS (2); case CONST: /* It is supposed to be the pattern (const (plus symbol_ref const_int)). Have it 4-byte cost. */ - return COSTS_N_INSNS (1); + return COSTS_N_INSNS (2); case REG: /* Simply return 4-byte costs. */ @@ -233,11 +623,15 @@ size_cost: { /* If it is possible to be lwi333/swi333 form, make it 2-byte cost. */ - if (satisfies_constraint_Iu05 (plus1)) + if (satisfies_constraint_Iu03 (plus1)) return (COSTS_N_INSNS (1) - 2); else return COSTS_N_INSNS (1); } + if (ARITHMETIC_P (plus0) || ARITHMETIC_P (plus1)) + return COSTS_N_INSNS (1) - 2; + else if (REG_P (plus0) && REG_P (plus1)) + return COSTS_N_INSNS (1); /* For other 'plus' situation, make it cost 4-byte. */ return COSTS_N_INSNS (1); @@ -249,4 +643,84 @@ size_cost: return COSTS_N_INSNS (4); } + +int nds32_address_cost_size_prefer (rtx address) +{ + rtx plus0, plus1; + enum rtx_code code; + + code = GET_CODE (address); + + switch (code) + { + case POST_MODIFY: + case POST_INC: + case POST_DEC: + /* We encourage that rtx contains + POST_MODIFY/POST_INC/POST_DEC behavior. */ + return 0; + + case SYMBOL_REF: + /* We can have gp-relative load/store for symbol_ref. + Have it 4-byte cost. */ + return COSTS_N_INSNS (2); + + case CONST: + /* It is supposed to be the pattern (const (plus symbol_ref const_int)). + Have it 4-byte cost. */ + return COSTS_N_INSNS (2); + + case REG: + /* Simply return 4-byte costs. */ + return COSTS_N_INSNS (1) - 1; + + case PLUS: + /* We do not need to check if the address is a legitimate address, + because this hook is never called with an invalid address. + But we better check the range of + const_int value for cost, if it exists. */ + plus0 = XEXP (address, 0); + plus1 = XEXP (address, 1); + + if (REG_P (plus0) && CONST_INT_P (plus1)) + { + /* If it is possible to be lwi333/swi333 form, + make it 2-byte cost. */ + if (satisfies_constraint_Iu03 (plus1)) + return (COSTS_N_INSNS (1) - 2); + else + return COSTS_N_INSNS (1) - 1; + } + + /* (plus (reg) (mult (reg) (const))) */ + if (ARITHMETIC_P (plus0) || ARITHMETIC_P (plus1)) + return (COSTS_N_INSNS (1) - 1); + + /* For other 'plus' situation, make it cost 4-byte. */ + return COSTS_N_INSNS (1); + + default: + break; + } + + return COSTS_N_INSNS (4); + +} + +int nds32_address_cost_impl (rtx address, + machine_mode mode ATTRIBUTE_UNUSED, + addr_space_t as ATTRIBUTE_UNUSED, + bool speed_p) +{ + if (speed_p) + { + if (current_pass->tv_id == TV_FWPROP) + return nds32_address_cost_speed_fwprop (address); + else + return nds32_address_cost_speed_prefer (address); + } + else + return nds32_address_cost_size_prefer (address); +} + /* ------------------------------------------------------------------------ */ diff --git a/gcc/config/nds32/nds32-doubleword.md b/gcc/config/nds32/nds32-doubleword.md index 7df715a..7ee6489 100644 --- a/gcc/config/nds32/nds32-doubleword.md +++ b/gcc/config/nds32/nds32-doubleword.md @@ -118,10 +118,28 @@ ]) (set_attr "feature" " v1, v1, v1, v1, v1, v1, fpu, fpu, fpu, fpu, fpu")]) +;; Split move_di pattern when the hard register is odd. +(define_split + [(set (match_operand:DIDF 0 "register_operand" "") + (match_operand:DIDF 1 "register_operand" ""))] + "(NDS32_IS_GPR_REGNUM (REGNO (operands[0])) + && ((REGNO (operands[0]) & 0x1) == 1)) + || (NDS32_IS_GPR_REGNUM (REGNO (operands[1])) + && ((REGNO (operands[1]) & 0x1) == 1))" + [(set (match_dup 2) (match_dup 3)) + (set (match_dup 4) (match_dup 5))] + { + operands[2] = gen_lowpart (SImode, operands[0]); + operands[4] = gen_highpart (SImode, operands[0]); + operands[3] = gen_lowpart (SImode, operands[1]); + operands[5] = gen_highpart (SImode, operands[1]); + } +) + (define_split [(set (match_operand:DIDF 0 "register_operand" "") (match_operand:DIDF 1 "const_double_operand" ""))] - "reload_completed" + "flag_pic || reload_completed" [(set (match_dup 2) (match_dup 3)) (set (match_dup 4) (match_dup 5))] { diff --git a/gcc/config/nds32/nds32-dspext.md b/gcc/config/nds32/nds32-dspext.md new file mode 100644 index 0000000..4c643a7 --- /dev/null +++ b/gcc/config/nds32/nds32-dspext.md @@ -0,0 +1,5278 @@ +;; Machine description of Andes NDS32 cpu for GNU compiler +;; Copyright (C) 2012-2018 Free Software Foundation, Inc. +;; Contributed by Andes Technology Corporation. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. +;; +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +(define_expand "mov" + [(set (match_operand:VQIHI 0 "general_operand" "") + (match_operand:VQIHI 1 "general_operand" ""))] + "NDS32_EXT_DSP_P ()" +{ + /* Need to force register if mem <- !reg. */ + if (MEM_P (operands[0]) && !REG_P (operands[1])) + operands[1] = force_reg (mode, operands[1]); + + /* If operands[1] is a large constant and cannot be performed + by a single instruction, we need to split it. */ + if (GET_CODE (operands[1]) == CONST_VECTOR + && !satisfies_constraint_CVs2 (operands[1]) + && !satisfies_constraint_CVhi (operands[1])) + { + HOST_WIDE_INT ival = const_vector_to_hwint (operands[1]); + rtx tmp_rtx; + + tmp_rtx = can_create_pseudo_p () + ? gen_reg_rtx (SImode) + : simplify_gen_subreg (SImode, operands[0], mode, 0); + + emit_move_insn (tmp_rtx, gen_int_mode (ival, SImode)); + convert_move (operands[0], tmp_rtx, false); + DONE; + } + + if (REG_P (operands[0]) && SYMBOLIC_CONST_P (operands[1])) + { + if (nds32_tls_referenced_p (operands [1])) + { + nds32_expand_tls_move (operands); + DONE; + } + else if (flag_pic) + { + nds32_expand_pic_move (operands); + DONE; + } + } +}) + +(define_insn "*mov" + [(set (match_operand:VQIHI 0 "nonimmediate_operand" "=r, r,$U45,$U33,$U37,$U45, m,$ l,$ l,$ l,$ d, d, r,$ d, r, r, r, *f, *f, r, *f, Q") + (match_operand:VQIHI 1 "nds32_vmove_operand" " r, r, l, l, l, d, r, U45, U33, U37, U45,Ufe, m, CVp5, CVs5, CVs2, CVhi, *f, r, *f, Q, *f"))] + "NDS32_EXT_DSP_P () + && (register_operand(operands[0], mode) + || register_operand(operands[1], mode))" +{ + switch (which_alternative) + { + case 0: + return "mov55\t%0, %1"; + case 1: + return "ori\t%0, %1, 0"; + case 2: + case 3: + case 4: + case 5: + return nds32_output_16bit_store (operands, ); + case 6: + return nds32_output_32bit_store (operands, ); + case 7: + case 8: + case 9: + case 10: + case 11: + return nds32_output_16bit_load (operands, ); + case 12: + return nds32_output_32bit_load (operands, ); + case 13: + return "movpi45\t%0, %1"; + case 14: + return "movi55\t%0, %1"; + case 15: + return "movi\t%0, %1"; + case 16: + return "sethi\t%0, hi20(%1)"; + case 17: + if (TARGET_FPU_SINGLE) + return "fcpyss\t%0, %1, %1"; + else + return "#"; + case 18: + return "fmtsr\t%1, %0"; + case 19: + return "fmfsr\t%0, %1"; + case 20: + return nds32_output_float_load (operands); + case 21: + return nds32_output_float_store (operands); + default: + gcc_unreachable (); + } +} + [(set_attr "type" "alu,alu,store,store,store,store,store,load,load,load,load,load,load,alu,alu,alu,alu,fcpy,fmtsr,fmfsr,fload,fstore") + (set_attr "length" " 2, 4, 2, 2, 2, 2, 4, 2, 2, 2, 2, 2, 4, 2, 2, 4, 4, 4, 4, 4, 4, 4") + (set_attr "feature" " v1, v1, v1, v1, v1, v1, v1, v1, v1, v1, v1, v3m, v1, v1, v1, v1, v1, fpu, fpu, fpu, fpu, fpu")]) + +(define_expand "movv2si" + [(set (match_operand:V2SI 0 "general_operand" "") + (match_operand:V2SI 1 "general_operand" ""))] + "NDS32_EXT_DSP_P ()" +{ + /* Need to force register if mem <- !reg. */ + if (MEM_P (operands[0]) && !REG_P (operands[1])) + operands[1] = force_reg (V2SImode, operands[1]); +}) + +(define_insn "*movv2si" + [(set (match_operand:V2SI 0 "nonimmediate_operand" "=r, r, r, r, Da, m, f, Q, f, r, f") + (match_operand:V2SI 1 "general_operand" " r, i, Da, m, r, r, Q, f, f, f, r"))] + "NDS32_EXT_DSP_P () + && (register_operand(operands[0], V2SImode) + || register_operand(operands[1], V2SImode))" +{ + switch (which_alternative) + { + case 0: + return "movd44\t%0, %1"; + case 1: + /* reg <- const_int, we ask gcc to split instruction. */ + return "#"; + case 2: + /* The memory format is (mem (reg)), + we can generate 'lmw.bi' instruction. */ + return nds32_output_double (operands, true); + case 3: + /* We haven't 64-bit load instruction, + we split this pattern to two SImode pattern. */ + return "#"; + case 4: + /* The memory format is (mem (reg)), + we can generate 'smw.bi' instruction. */ + return nds32_output_double (operands, false); + case 5: + /* We haven't 64-bit store instruction, + we split this pattern to two SImode pattern. */ + return "#"; + case 6: + return nds32_output_float_load (operands); + case 7: + return nds32_output_float_store (operands); + case 8: + return "fcpysd\t%0, %1, %1"; + case 9: + return "fmfdr\t%0, %1"; + case 10: + return "fmtdr\t%1, %0"; + default: + gcc_unreachable (); + } +} + [(set_attr "type" "alu,alu,load,load,store,store,unknown,unknown,unknown,unknown,unknown") + (set_attr_alternative "length" + [ + ;; Alternative 0 + (if_then_else (match_test "!TARGET_16_BIT") + (const_int 4) + (const_int 2)) + ;; Alternative 1 + (const_int 16) + ;; Alternative 2 + (const_int 4) + ;; Alternative 3 + (const_int 8) + ;; Alternative 4 + (const_int 4) + ;; Alternative 5 + (const_int 8) + ;; Alternative 6 + (const_int 4) + ;; Alternative 7 + (const_int 4) + ;; Alternative 8 + (const_int 4) + ;; Alternative 9 + (const_int 4) + ;; Alternative 10 + (const_int 4) + ]) + (set_attr "feature" " v1, v1, v1, v1, v1, v1, fpu, fpu, fpu, fpu, fpu")]) + +(define_expand "movmisalign" + [(set (match_operand:VQIHI 0 "general_operand" "") + (match_operand:VQIHI 1 "general_operand" ""))] + "NDS32_EXT_DSP_P ()" +{ + rtx addr; + if (MEM_P (operands[0]) && !REG_P (operands[1])) + operands[1] = force_reg (mode, operands[1]); + + if (MEM_P (operands[0])) + { + addr = force_reg (Pmode, XEXP (operands[0], 0)); + emit_insn (gen_unaligned_store (addr, operands[1])); + } + else + { + addr = force_reg (Pmode, XEXP (operands[1], 0)); + emit_insn (gen_unaligned_load (operands[0], addr)); + } + DONE; +}) + +(define_expand "unaligned_load" + [(set (match_operand:VQIHI 0 "register_operand" "=r") + (unspec:VQIHI [(mem:VQIHI (match_operand:SI 1 "register_operand" "r"))] UNSPEC_UALOAD_W))] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_ISA_V3M) + nds32_expand_unaligned_load (operands, mode); + else + emit_insn (gen_unaligned_load_w (operands[0], gen_rtx_MEM (mode, operands[1]))); + DONE; +}) + +(define_insn "unaligned_load_w" + [(set (match_operand:VQIHI 0 "register_operand" "= r") + (unspec:VQIHI [(match_operand:VQIHI 1 "nds32_lmw_smw_base_operand" " Umw")] UNSPEC_UALOAD_W))] + "NDS32_EXT_DSP_P ()" +{ + return nds32_output_lmw_single_word (operands); +} + [(set_attr "type" "load") + (set_attr "length" "4")] +) + +(define_expand "unaligned_store" + [(set (mem:VQIHI (match_operand:SI 0 "register_operand" "r")) + (unspec:VQIHI [(match_operand:VQIHI 1 "register_operand" "r")] UNSPEC_UASTORE_W))] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_ISA_V3M) + nds32_expand_unaligned_store (operands, mode); + else + emit_insn (gen_unaligned_store_w (gen_rtx_MEM (mode, operands[0]), operands[1])); + DONE; +}) + +(define_insn "unaligned_store_w" + [(set (match_operand:VQIHI 0 "nds32_lmw_smw_base_operand" "=Umw") + (unspec:VQIHI [(match_operand:VQIHI 1 "register_operand" " r")] UNSPEC_UASTORE_W))] + "NDS32_EXT_DSP_P ()" +{ + return nds32_output_smw_single_word (operands); +} + [(set_attr "type" "store") + (set_attr "length" "4")] +) + +(define_insn "add3" + [(set (match_operand:VQIHI 0 "register_operand" "=r") + (all_plus:VQIHI (match_operand:VQIHI 1 "register_operand" " r") + (match_operand:VQIHI 2 "register_operand" " r")))] + "NDS32_EXT_DSP_P ()" + "add %0, %1, %2" + [(set_attr "type" "dalu") + (set_attr "length" "4") + (set_attr "feature" "v1")]) + +(define_insn "adddi3" + [(set (match_operand:DI 0 "register_operand" "=r") + (all_plus:DI (match_operand:DI 1 "register_operand" " r") + (match_operand:DI 2 "register_operand" " r")))] + "NDS32_EXT_DSP_P ()" + "add64 %0, %1, %2" + [(set_attr "type" "dalu64") + (set_attr "length" "4") + (set_attr "feature" "v1")]) + +(define_insn "raddv4qi3" + [(set (match_operand:V4QI 0 "register_operand" "=r") + (truncate:V4QI + (ashiftrt:V4HI + (plus:V4HI (sign_extend:V4HI (match_operand:V4QI 1 "register_operand" " r")) + (sign_extend:V4HI (match_operand:V4QI 2 "register_operand" " r"))) + (const_int 1))))] + "NDS32_EXT_DSP_P ()" + "radd8\t%0, %1, %2" + [(set_attr "type" "dalu") + (set_attr "length" "4") + (set_attr "feature" "v1")]) + + +(define_insn "uraddv4qi3" + [(set (match_operand:V4QI 0 "register_operand" "=r") + (truncate:V4QI + (lshiftrt:V4HI + (plus:V4HI (zero_extend:V4HI (match_operand:V4QI 1 "register_operand" " r")) + (zero_extend:V4HI (match_operand:V4QI 2 "register_operand" " r"))) + (const_int 1))))] + "NDS32_EXT_DSP_P ()" + "uradd8\t%0, %1, %2" + [(set_attr "type" "dalu") + (set_attr "length" "4") + (set_attr "feature" "v1")]) + +(define_insn "raddv2hi3" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (truncate:V2HI + (ashiftrt:V2SI + (plus:V2SI (sign_extend:V2SI (match_operand:V2HI 1 "register_operand" " r")) + (sign_extend:V2SI (match_operand:V2HI 2 "register_operand" " r"))) + (const_int 1))))] + "NDS32_EXT_DSP_P ()" + "radd16\t%0, %1, %2" + [(set_attr "type" "dalu") + (set_attr "length" "4") + (set_attr "feature" "v1")]) + +(define_insn "uraddv2hi3" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (truncate:V2HI + (lshiftrt:V2SI + (plus:V2SI (zero_extend:V2SI (match_operand:V2HI 1 "register_operand" " r")) + (zero_extend:V2SI (match_operand:V2HI 2 "register_operand" " r"))) + (const_int 1))))] + "NDS32_EXT_DSP_P ()" + "uradd16\t%0, %1, %2" + [(set_attr "type" "dalu") + (set_attr "length" "4") + (set_attr "feature" "v1")]) + +(define_insn "radddi3" + [(set (match_operand:DI 0 "register_operand" "=r") + (truncate:DI + (ashiftrt:TI + (plus:TI (sign_extend:TI (match_operand:DI 1 "register_operand" " r")) + (sign_extend:TI (match_operand:DI 2 "register_operand" " r"))) + (const_int 1))))] + "NDS32_EXT_DSP_P ()" + "radd64\t%0, %1, %2" + [(set_attr "type" "dalu64") + (set_attr "length" "4") + (set_attr "feature" "v1")]) + + +(define_insn "uradddi3" + [(set (match_operand:DI 0 "register_operand" "=r") + (truncate:DI + (lshiftrt:TI + (plus:TI (zero_extend:TI (match_operand:DI 1 "register_operand" " r")) + (zero_extend:TI (match_operand:DI 2 "register_operand" " r"))) + (const_int 1))))] + "NDS32_EXT_DSP_P ()" + "uradd64\t%0, %1, %2" + [(set_attr "type" "dalu64") + (set_attr "length" "4") + (set_attr "feature" "v1")]) + +(define_insn "sub3" + [(set (match_operand:VQIHI 0 "register_operand" "=r") + (all_minus:VQIHI (match_operand:VQIHI 1 "register_operand" " r") + (match_operand:VQIHI 2 "register_operand" " r")))] + "NDS32_EXT_DSP_P ()" + "sub %0, %1, %2" + [(set_attr "type" "dalu") + (set_attr "length" "4") + (set_attr "feature" "v1")]) + +(define_insn "subdi3" + [(set (match_operand:DI 0 "register_operand" "=r") + (all_minus:DI (match_operand:DI 1 "register_operand" " r") + (match_operand:DI 2 "register_operand" " r")))] + "NDS32_EXT_DSP_P ()" + "sub64 %0, %1, %2" + [(set_attr "type" "dalu64") + (set_attr "length" "4") + (set_attr "feature" "v1")]) + +(define_insn "rsubv4qi3" + [(set (match_operand:V4QI 0 "register_operand" "=r") + (truncate:V4QI + (ashiftrt:V4HI + (minus:V4HI (sign_extend:V4HI (match_operand:V4QI 1 "register_operand" " r")) + (sign_extend:V4HI (match_operand:V4QI 2 "register_operand" " r"))) + (const_int 1))))] + "NDS32_EXT_DSP_P ()" + "rsub8\t%0, %1, %2" + [(set_attr "type" "dalu") + (set_attr "length" "4")]) + +(define_insn "ursubv4qi3" + [(set (match_operand:V4QI 0 "register_operand" "=r") + (truncate:V4QI + (lshiftrt:V4HI + (minus:V4HI (zero_extend:V4HI (match_operand:V4QI 1 "register_operand" " r")) + (zero_extend:V4HI (match_operand:V4QI 2 "register_operand" " r"))) + (const_int 1))))] + "NDS32_EXT_DSP_P ()" + "ursub8\t%0, %1, %2" + [(set_attr "type" "dalu") + (set_attr "length" "4")]) + +(define_insn "rsubv2hi3" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (truncate:V2HI + (ashiftrt:V2SI + (minus:V2SI (sign_extend:V2SI (match_operand:V2HI 1 "register_operand" " r")) + (sign_extend:V2SI (match_operand:V2HI 2 "register_operand" " r"))) + (const_int 1))))] + "NDS32_EXT_DSP_P ()" + "rsub16\t%0, %1, %2" + [(set_attr "type" "dalu") + (set_attr "length" "4")]) + +(define_insn "ursubv2hi3" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (truncate:V2HI + (lshiftrt:V2SI + (minus:V2SI (zero_extend:V2SI (match_operand:V2HI 1 "register_operand" " r")) + (zero_extend:V2SI (match_operand:V2HI 2 "register_operand" " r"))) + (const_int 1))))] + "NDS32_EXT_DSP_P ()" + "ursub16\t%0, %1, %2" + [(set_attr "type" "dalu") + (set_attr "length" "4")]) + +(define_insn "rsubdi3" + [(set (match_operand:DI 0 "register_operand" "=r") + (truncate:DI + (ashiftrt:TI + (minus:TI (sign_extend:TI (match_operand:DI 1 "register_operand" " r")) + (sign_extend:TI (match_operand:DI 2 "register_operand" " r"))) + (const_int 1))))] + "NDS32_EXT_DSP_P ()" + "rsub64\t%0, %1, %2" + [(set_attr "type" "dalu64") + (set_attr "length" "4")]) + + +(define_insn "ursubdi3" + [(set (match_operand:DI 0 "register_operand" "=r") + (truncate:DI + (lshiftrt:TI + (minus:TI (zero_extend:TI (match_operand:DI 1 "register_operand" " r")) + (zero_extend:TI (match_operand:DI 2 "register_operand" " r"))) + (const_int 1))))] + "NDS32_EXT_DSP_P ()" + "ursub64\t%0, %1, %2" + [(set_attr "type" "dalu64") + (set_attr "length" "4")]) + +(define_expand "cras16_1" + [(match_operand:V2HI 0 "register_operand" "") + (match_operand:V2HI 1 "register_operand" "") + (match_operand:V2HI 2 "register_operand" "")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_cras16_1_be (operands[0], operands[1], operands[2])); + else + emit_insn (gen_cras16_1_le (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_insn "cras16_1_le" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (vec_merge:V2HI + (vec_duplicate:V2HI + (minus:HI + (vec_select:HI + (match_operand:V2HI 1 "register_operand" " r") + (parallel [(const_int 0)])) + (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 1)])))) + (vec_duplicate:V2HI + (plus:HI + (vec_select:HI + (match_dup 2) + (parallel [(const_int 0)])) + (vec_select:HI + (match_dup 1) + (parallel [(const_int 1)])))) + (const_int 1)))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "cras16\t%0, %1, %2" + [(set_attr "type" "dalu")] +) + +(define_insn "cras16_1_be" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (vec_merge:V2HI + (vec_duplicate:V2HI + (minus:HI + (vec_select:HI + (match_operand:V2HI 1 "register_operand" " r") + (parallel [(const_int 1)])) + (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 0)])))) + (vec_duplicate:V2HI + (plus:HI + (vec_select:HI + (match_dup 2) + (parallel [(const_int 1)])) + (vec_select:HI + (match_dup 1) + (parallel [(const_int 0)])))) + (const_int 2)))] + "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" + "cras16\t%0, %1, %2" + [(set_attr "type" "dalu")] +) + +(define_expand "kcras16_1" + [(match_operand:V2HI 0 "register_operand" "") + (match_operand:V2HI 1 "register_operand" "") + (match_operand:V2HI 2 "register_operand" "")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_kcras16_1_be (operands[0], operands[1], operands[2])); + else + emit_insn (gen_kcras16_1_le (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_insn "kcras16_1_le" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (vec_merge:V2HI + (vec_duplicate:V2HI + (ss_minus:HI + (vec_select:HI + (match_operand:V2HI 1 "register_operand" " r") + (parallel [(const_int 0)])) + (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 1)])))) + (vec_duplicate:V2HI + (ss_plus:HI + (vec_select:HI + (match_dup 2) + (parallel [(const_int 0)])) + (vec_select:HI + (match_dup 1) + (parallel [(const_int 1)])))) + (const_int 1)))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "kcras16\t%0, %1, %2" + [(set_attr "type" "dalu")] +) + +(define_insn "kcras16_1_be" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (vec_merge:V2HI + (vec_duplicate:V2HI + (ss_minus:HI + (vec_select:HI + (match_operand:V2HI 1 "register_operand" " r") + (parallel [(const_int 1)])) + (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 0)])))) + (vec_duplicate:V2HI + (ss_plus:HI + (vec_select:HI + (match_dup 2) + (parallel [(const_int 1)])) + (vec_select:HI + (match_dup 1) + (parallel [(const_int 0)])))) + (const_int 2)))] + "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" + "kcras16\t%0, %1, %2" + [(set_attr "type" "dalu")] +) + +(define_expand "ukcras16_1" + [(match_operand:V2HI 0 "register_operand" "") + (match_operand:V2HI 1 "register_operand" "") + (match_operand:V2HI 2 "register_operand" "")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_ukcras16_1_be (operands[0], operands[1], operands[2])); + else + emit_insn (gen_ukcras16_1_le (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_insn "ukcras16_1_le" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (vec_merge:V2HI + (vec_duplicate:V2HI + (us_minus:HI + (vec_select:HI + (match_operand:V2HI 1 "register_operand" " r") + (parallel [(const_int 0)])) + (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 1)])))) + (vec_duplicate:V2HI + (us_plus:HI + (vec_select:HI + (match_dup 2) + (parallel [(const_int 0)])) + (vec_select:HI + (match_dup 1) + (parallel [(const_int 1)])))) + (const_int 1)))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "ukcras16\t%0, %1, %2" + [(set_attr "type" "dalu")] +) + +(define_insn "ukcras16_1_be" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (vec_merge:V2HI + (vec_duplicate:V2HI + (us_minus:HI + (vec_select:HI + (match_operand:V2HI 1 "register_operand" " r") + (parallel [(const_int 1)])) + (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 0)])))) + (vec_duplicate:V2HI + (us_plus:HI + (vec_select:HI + (match_dup 2) + (parallel [(const_int 1)])) + (vec_select:HI + (match_dup 1) + (parallel [(const_int 0)])))) + (const_int 2)))] + "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" + "ukcras16\t%0, %1, %2" + [(set_attr "type" "dalu")] +) + +(define_expand "crsa16_1" + [(match_operand:V2HI 0 "register_operand" "") + (match_operand:V2HI 1 "register_operand" "") + (match_operand:V2HI 2 "register_operand" "")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_crsa16_1_be (operands[0], operands[1], operands[2])); + else + emit_insn (gen_crsa16_1_le (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_insn "crsa16_1_le" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (vec_merge:V2HI + (vec_duplicate:V2HI + (minus:HI + (vec_select:HI + (match_operand:V2HI 1 "register_operand" " r") + (parallel [(const_int 1)])) + (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 0)])))) + (vec_duplicate:V2HI + (plus:HI + (vec_select:HI + (match_dup 1) + (parallel [(const_int 0)])) + (vec_select:HI + (match_dup 2) + (parallel [(const_int 1)])))) + (const_int 2)))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "crsa16\t%0, %1, %2" + [(set_attr "type" "dalu")] +) + +(define_insn "crsa16_1_be" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (vec_merge:V2HI + (vec_duplicate:V2HI + (minus:HI + (vec_select:HI + (match_operand:V2HI 1 "register_operand" " r") + (parallel [(const_int 0)])) + (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 1)])))) + (vec_duplicate:V2HI + (plus:HI + (vec_select:HI + (match_dup 1) + (parallel [(const_int 1)])) + (vec_select:HI + (match_dup 2) + (parallel [(const_int 0)])))) + (const_int 1)))] + "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" + "crsa16\t%0, %1, %2" + [(set_attr "type" "dalu")] +) + +(define_expand "kcrsa16_1" + [(match_operand:V2HI 0 "register_operand" "") + (match_operand:V2HI 1 "register_operand" "") + (match_operand:V2HI 2 "register_operand" "")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_kcrsa16_1_be (operands[0], operands[1], operands[2])); + else + emit_insn (gen_kcrsa16_1_le (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_insn "kcrsa16_1_le" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (vec_merge:V2HI + (vec_duplicate:V2HI + (ss_minus:HI + (vec_select:HI + (match_operand:V2HI 1 "register_operand" " r") + (parallel [(const_int 1)])) + (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 0)])))) + (vec_duplicate:V2HI + (ss_plus:HI + (vec_select:HI + (match_dup 1) + (parallel [(const_int 0)])) + (vec_select:HI + (match_dup 2) + (parallel [(const_int 1)])))) + (const_int 2)))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "kcrsa16\t%0, %1, %2" + [(set_attr "type" "dalu")] +) + +(define_insn "kcrsa16_1_be" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (vec_merge:V2HI + (vec_duplicate:V2HI + (ss_minus:HI + (vec_select:HI + (match_operand:V2HI 1 "register_operand" " r") + (parallel [(const_int 0)])) + (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 1)])))) + (vec_duplicate:V2HI + (ss_plus:HI + (vec_select:HI + (match_dup 1) + (parallel [(const_int 1)])) + (vec_select:HI + (match_dup 2) + (parallel [(const_int 0)])))) + (const_int 1)))] + "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" + "kcrsa16\t%0, %1, %2" + [(set_attr "type" "dalu")] +) + +(define_expand "ukcrsa16_1" + [(match_operand:V2HI 0 "register_operand" "") + (match_operand:V2HI 1 "register_operand" "") + (match_operand:V2HI 2 "register_operand" "")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_ukcrsa16_1_be (operands[0], operands[1], operands[2])); + else + emit_insn (gen_ukcrsa16_1_le (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_insn "ukcrsa16_1_le" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (vec_merge:V2HI + (vec_duplicate:V2HI + (us_minus:HI + (vec_select:HI + (match_operand:V2HI 1 "register_operand" " r") + (parallel [(const_int 1)])) + (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 0)])))) + (vec_duplicate:V2HI + (us_plus:HI + (vec_select:HI + (match_dup 1) + (parallel [(const_int 0)])) + (vec_select:HI + (match_dup 2) + (parallel [(const_int 1)])))) + (const_int 2)))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "ukcrsa16\t%0, %1, %2" + [(set_attr "type" "dalu")] +) + +(define_insn "ukcrsa16_1_be" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (vec_merge:V2HI + (vec_duplicate:V2HI + (us_minus:HI + (vec_select:HI + (match_operand:V2HI 1 "register_operand" " r") + (parallel [(const_int 0)])) + (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 1)])))) + (vec_duplicate:V2HI + (us_plus:HI + (vec_select:HI + (match_dup 1) + (parallel [(const_int 1)])) + (vec_select:HI + (match_dup 2) + (parallel [(const_int 0)])))) + (const_int 1)))] + "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" + "ukcrsa16\t%0, %1, %2" + [(set_attr "type" "dalu")] +) + +(define_expand "rcras16_1" + [(match_operand:V2HI 0 "register_operand" "") + (match_operand:V2HI 1 "register_operand" "") + (match_operand:V2HI 2 "register_operand" "")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_rcras16_1_be (operands[0], operands[1], operands[2])); + else + emit_insn (gen_rcras16_1_le (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_insn "rcras16_1_le" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (vec_merge:V2HI + (vec_duplicate:V2HI + (truncate:HI + (ashiftrt:SI + (minus:SI + (sign_extend:SI + (vec_select:HI + (match_operand:V2HI 1 "register_operand" " r") + (parallel [(const_int 0)]))) + (sign_extend:SI + (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 1)])))) + (const_int 1)))) + (vec_duplicate:V2HI + (truncate:HI + (ashiftrt:SI + (plus:SI + (sign_extend:SI + (vec_select:HI + (match_dup 2) + (parallel [(const_int 0)]))) + (sign_extend:SI + (vec_select:HI + (match_dup 1) + (parallel [(const_int 1)])))) + (const_int 1)))) + (const_int 1)))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "rcras16\t%0, %1, %2" + [(set_attr "type" "dalu")] +) + +(define_insn "rcras16_1_be" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (vec_merge:V2HI + (vec_duplicate:V2HI + (truncate:HI + (ashiftrt:SI + (minus:SI + (sign_extend:SI + (vec_select:HI + (match_operand:V2HI 1 "register_operand" " r") + (parallel [(const_int 1)]))) + (sign_extend:SI + (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 0)])))) + (const_int 1)))) + (vec_duplicate:V2HI + (truncate:HI + (ashiftrt:SI + (plus:SI + (sign_extend:SI + (vec_select:HI + (match_dup 2) + (parallel [(const_int 1)]))) + (sign_extend:SI + (vec_select:HI + (match_dup 1) + (parallel [(const_int 0)])))) + (const_int 1)))) + (const_int 2)))] + "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" + "rcras16\t%0, %1, %2" + [(set_attr "type" "dalu")] +) + +(define_expand "urcras16_1" + [(match_operand:V2HI 0 "register_operand" "") + (match_operand:V2HI 1 "register_operand" "") + (match_operand:V2HI 2 "register_operand" "")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_urcras16_1_be (operands[0], operands[1], operands[2])); + else + emit_insn (gen_urcras16_1_le (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_insn "urcras16_1_le" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (vec_merge:V2HI + (vec_duplicate:V2HI + (truncate:HI + (lshiftrt:SI + (minus:SI + (zero_extend:SI + (vec_select:HI + (match_operand:V2HI 1 "register_operand" " r") + (parallel [(const_int 0)]))) + (zero_extend:SI + (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 1)])))) + (const_int 1)))) + (vec_duplicate:V2HI + (truncate:HI + (lshiftrt:SI + (plus:SI + (zero_extend:SI + (vec_select:HI + (match_dup 2) + (parallel [(const_int 0)]))) + (zero_extend:SI + (vec_select:HI + (match_dup 1) + (parallel [(const_int 1)])))) + (const_int 1)))) + (const_int 1)))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "urcras16\t%0, %1, %2" + [(set_attr "type" "dalu")] +) + +(define_insn "urcras16_1_be" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (vec_merge:V2HI + (vec_duplicate:V2HI + (truncate:HI + (lshiftrt:SI + (minus:SI + (zero_extend:SI + (vec_select:HI + (match_operand:V2HI 1 "register_operand" " r") + (parallel [(const_int 1)]))) + (zero_extend:SI + (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 0)])))) + (const_int 1)))) + (vec_duplicate:V2HI + (truncate:HI + (lshiftrt:SI + (plus:SI + (zero_extend:SI + (vec_select:HI + (match_dup 2) + (parallel [(const_int 1)]))) + (zero_extend:SI + (vec_select:HI + (match_dup 1) + (parallel [(const_int 0)])))) + (const_int 1)))) + (const_int 2)))] + "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" + "urcras16\t%0, %1, %2" + [(set_attr "type" "dalu")] +) + +(define_expand "rcrsa16_1" + [(match_operand:V2HI 0 "register_operand" "") + (match_operand:V2HI 1 "register_operand" "") + (match_operand:V2HI 2 "register_operand" "")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_rcrsa16_1_be (operands[0], operands[1], operands[2])); + else + emit_insn (gen_rcrsa16_1_le (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_insn "rcrsa16_1_le" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (vec_merge:V2HI + (vec_duplicate:V2HI + (truncate:HI + (ashiftrt:SI + (minus:SI + (sign_extend:SI + (vec_select:HI + (match_operand:V2HI 1 "register_operand" " r") + (parallel [(const_int 1)]))) + (sign_extend:SI + (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 0)])))) + (const_int 1)))) + (vec_duplicate:V2HI + (truncate:HI + (ashiftrt:SI + (plus:SI + (sign_extend:SI + (vec_select:HI + (match_dup 1) + (parallel [(const_int 0)]))) + (sign_extend:SI + (vec_select:HI + (match_dup 2) + (parallel [(const_int 1)])))) + (const_int 1)))) + (const_int 2)))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "rcrsa16\t%0, %1, %2" + [(set_attr "type" "dalu")] +) + +(define_insn "rcrsa16_1_be" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (vec_merge:V2HI + (vec_duplicate:V2HI + (truncate:HI + (ashiftrt:SI + (minus:SI + (sign_extend:SI + (vec_select:HI + (match_operand:V2HI 1 "register_operand" " r") + (parallel [(const_int 0)]))) + (sign_extend:SI + (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 1)])))) + (const_int 1)))) + (vec_duplicate:V2HI + (truncate:HI + (ashiftrt:SI + (plus:SI + (sign_extend:SI + (vec_select:HI + (match_dup 1) + (parallel [(const_int 1)]))) + (sign_extend:SI + (vec_select:HI + (match_dup 2) + (parallel [(const_int 0)])))) + (const_int 1)))) + (const_int 1)))] + "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" + "rcrsa16\t%0, %1, %2" + [(set_attr "type" "dalu")] +) + +(define_expand "urcrsa16_1" + [(match_operand:V2HI 0 "register_operand" "") + (match_operand:V2HI 1 "register_operand" "") + (match_operand:V2HI 2 "register_operand" "")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_urcrsa16_1_be (operands[0], operands[1], operands[2])); + else + emit_insn (gen_urcrsa16_1_le (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_insn "urcrsa16_1_le" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (vec_merge:V2HI + (vec_duplicate:V2HI + (truncate:HI + (lshiftrt:SI + (minus:SI + (zero_extend:SI + (vec_select:HI + (match_operand:V2HI 1 "register_operand" " r") + (parallel [(const_int 1)]))) + (zero_extend:SI + (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 0)])))) + (const_int 1)))) + (vec_duplicate:V2HI + (truncate:HI + (lshiftrt:SI + (plus:SI + (zero_extend:SI + (vec_select:HI + (match_dup 1) + (parallel [(const_int 0)]))) + (zero_extend:SI + (vec_select:HI + (match_dup 2) + (parallel [(const_int 1)])))) + (const_int 1)))) + (const_int 2)))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "urcrsa16\t%0, %1, %2" + [(set_attr "type" "dalu")] +) + +(define_insn "urcrsa16_1_be" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (vec_merge:V2HI + (vec_duplicate:V2HI + (truncate:HI + (lshiftrt:SI + (minus:SI + (zero_extend:SI + (vec_select:HI + (match_operand:V2HI 1 "register_operand" " r") + (parallel [(const_int 0)]))) + (zero_extend:SI + (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 1)])))) + (const_int 1)))) + (vec_duplicate:V2HI + (truncate:HI + (lshiftrt:SI + (plus:SI + (zero_extend:SI + (vec_select:HI + (match_dup 1) + (parallel [(const_int 1)]))) + (zero_extend:SI + (vec_select:HI + (match_dup 2) + (parallel [(const_int 0)])))) + (const_int 1)))) + (const_int 1)))] + "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" + "urcrsa16\t%0, %1, %2" + [(set_attr "type" "dalu")] +) + +(define_expand "v2hi3" + [(set (match_operand:V2HI 0 "register_operand" "") + (shifts:V2HI (match_operand:V2HI 1 "register_operand" "") + (match_operand:SI 2 "nds32_rimm4u_operand" "")))] + "NDS32_EXT_DSP_P ()" +{ + if (operands[2] == const0_rtx) + { + emit_move_insn (operands[0], operands[1]); + DONE; + } +}) + +(define_insn "*ashlv2hi3" + [(set (match_operand:V2HI 0 "register_operand" "= r, r") + (ashift:V2HI (match_operand:V2HI 1 "register_operand" " r, r") + (match_operand:SI 2 "nds32_rimm4u_operand" " Iu04, r")))] + "NDS32_EXT_DSP_P ()" + "@ + slli16\t%0, %1, %2 + sll16\t%0, %1, %2" + [(set_attr "type" "dalu,dalu") + (set_attr "length" " 4, 4")]) + +(define_insn "kslli16" + [(set (match_operand:V2HI 0 "register_operand" "= r, r") + (ss_ashift:V2HI (match_operand:V2HI 1 "register_operand" " r, r") + (match_operand:SI 2 "nds32_rimm4u_operand" " Iu04, r")))] + "NDS32_EXT_DSP_P ()" + "@ + kslli16\t%0, %1, %2 + ksll16\t%0, %1, %2" + [(set_attr "type" "dalu,dalu") + (set_attr "length" " 4, 4")]) + +(define_insn "*ashrv2hi3" + [(set (match_operand:V2HI 0 "register_operand" "= r, r") + (ashiftrt:V2HI (match_operand:V2HI 1 "register_operand" " r, r") + (match_operand:SI 2 "nds32_rimm4u_operand" " Iu04, r")))] + "NDS32_EXT_DSP_P ()" + "@ + srai16\t%0, %1, %2 + sra16\t%0, %1, %2" + [(set_attr "type" "dalu,dalu") + (set_attr "length" " 4, 4")]) + +(define_insn "sra16_round" + [(set (match_operand:V2HI 0 "register_operand" "= r, r") + (unspec:V2HI [(ashiftrt:V2HI (match_operand:V2HI 1 "register_operand" " r, r") + (match_operand:SI 2 "nds32_rimm4u_operand" " Iu04, r"))] + UNSPEC_ROUND))] + "NDS32_EXT_DSP_P ()" + "@ + srai16.u\t%0, %1, %2 + sra16.u\t%0, %1, %2" + [(set_attr "type" "daluround,daluround") + (set_attr "length" " 4, 4")]) + +(define_insn "*lshrv2hi3" + [(set (match_operand:V2HI 0 "register_operand" "= r, r") + (lshiftrt:V2HI (match_operand:V2HI 1 "register_operand" " r, r") + (match_operand:SI 2 "nds32_rimm4u_operand" " Iu04, r")))] + "NDS32_EXT_DSP_P ()" + "@ + srli16\t%0, %1, %2 + srl16\t%0, %1, %2" + [(set_attr "type" "dalu,dalu") + (set_attr "length" " 4, 4")]) + +(define_insn "srl16_round" + [(set (match_operand:V2HI 0 "register_operand" "= r, r") + (unspec:V2HI [(lshiftrt:V2HI (match_operand:V2HI 1 "register_operand" " r, r") + (match_operand:SI 2 "nds32_rimm4u_operand" " Iu04, r"))] + UNSPEC_ROUND))] + "NDS32_EXT_DSP_P ()" + "@ + srli16.u\t%0, %1, %2 + srl16.u\t%0, %1, %2" + [(set_attr "type" "daluround,daluround") + (set_attr "length" " 4, 4")]) + +(define_insn "kslra16" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (if_then_else:V2HI + (lt:SI (match_operand:SI 2 "register_operand" " r") + (const_int 0)) + (ashiftrt:V2HI (match_operand:V2HI 1 "register_operand" " r") + (neg:SI (match_dup 2))) + (ashift:V2HI (match_dup 1) + (match_dup 2))))] + "NDS32_EXT_DSP_P ()" + "kslra16\t%0, %1, %2" + [(set_attr "type" "dalu") + (set_attr "length" "4")]) + +(define_insn "kslra16_round" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (if_then_else:V2HI + (lt:SI (match_operand:SI 2 "register_operand" " r") + (const_int 0)) + (unspec:V2HI [(ashiftrt:V2HI (match_operand:V2HI 1 "register_operand" " r") + (neg:SI (match_dup 2)))] + UNSPEC_ROUND) + (ashift:V2HI (match_dup 1) + (match_dup 2))))] + "NDS32_EXT_DSP_P ()" + "kslra16.u\t%0, %1, %2" + [(set_attr "type" "daluround") + (set_attr "length" "4")]) + +(define_insn "cmpeq" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(eq:SI (match_operand:VQIHI 1 "register_operand" " r") + (match_operand:VQIHI 2 "register_operand" " r"))] + UNSPEC_VEC_COMPARE))] + "NDS32_EXT_DSP_P ()" + "cmpeq\t%0, %1, %2" + [(set_attr "type" "dcmp") + (set_attr "length" "4")]) + +(define_insn "scmplt" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(lt:SI (match_operand:VQIHI 1 "register_operand" " r") + (match_operand:VQIHI 2 "register_operand" " r"))] + UNSPEC_VEC_COMPARE))] + "NDS32_EXT_DSP_P ()" + "scmplt\t%0, %1, %2" + [(set_attr "type" "dcmp") + (set_attr "length" "4")]) + +(define_insn "scmple" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(le:SI (match_operand:VQIHI 1 "register_operand" " r") + (match_operand:VQIHI 2 "register_operand" " r"))] + UNSPEC_VEC_COMPARE))] + "NDS32_EXT_DSP_P ()" + "scmple\t%0, %1, %2" + [(set_attr "type" "dcmp") + (set_attr "length" "4")]) + +(define_insn "ucmplt" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(ltu:SI (match_operand:VQIHI 1 "register_operand" " r") + (match_operand:VQIHI 2 "register_operand" " r"))] + UNSPEC_VEC_COMPARE))] + "NDS32_EXT_DSP_P ()" + "ucmplt\t%0, %1, %2" + [(set_attr "type" "dcmp") + (set_attr "length" "4")]) + +(define_insn "ucmple" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(leu:SI (match_operand:VQIHI 1 "register_operand" " r") + (match_operand:VQIHI 2 "register_operand" " r"))] + UNSPEC_VEC_COMPARE))] + "NDS32_EXT_DSP_P ()" + "ucmple\t%0, %1, %2" + [(set_attr "type" "dcmp") + (set_attr "length" "4")]) + +(define_insn "sclip16" + [(set (match_operand:V2HI 0 "register_operand" "= r") + (unspec:V2HI [(match_operand:V2HI 1 "register_operand" " r") + (match_operand:SI 2 "nds32_imm4u_operand" " Iu04")] + UNSPEC_CLIPS))] + "NDS32_EXT_DSP_P ()" + "sclip16\t%0, %1, %2" + [(set_attr "type" "dclip") + (set_attr "length" "4")]) + +(define_insn "uclip16" + [(set (match_operand:V2HI 0 "register_operand" "= r") + (unspec:V2HI [(match_operand:V2HI 1 "register_operand" " r") + (match_operand:SI 2 "nds32_imm4u_operand" " Iu04")] + UNSPEC_CLIP))] + "NDS32_EXT_DSP_P ()" + "uclip16\t%0, %1, %2" + [(set_attr "type" "dclip") + (set_attr "length" "4")]) + +(define_insn "khm16" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (unspec:V2HI [(match_operand:V2HI 1 "register_operand" " r") + (match_operand:V2HI 2 "register_operand" " r")] + UNSPEC_KHM))] + "NDS32_EXT_DSP_P ()" + "khm16\t%0, %1, %2" + [(set_attr "type" "dmul") + (set_attr "length" "4")]) + +(define_insn "khmx16" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (unspec:V2HI [(match_operand:V2HI 1 "register_operand" " r") + (match_operand:V2HI 2 "register_operand" " r")] + UNSPEC_KHMX))] + "NDS32_EXT_DSP_P ()" + "khmx16\t%0, %1, %2" + [(set_attr "type" "dmul") + (set_attr "length" "4")]) + +(define_expand "vec_setv4qi" + [(match_operand:V4QI 0 "register_operand" "") + (match_operand:QI 1 "register_operand" "") + (match_operand:SI 2 "immediate_operand" "")] + "NDS32_EXT_DSP_P ()" +{ + HOST_WIDE_INT pos = INTVAL (operands[2]); + if (pos > 4) + gcc_unreachable (); + HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << pos; + emit_insn (gen_vec_setv4qi_internal (operands[0], operands[1], + operands[0], GEN_INT (elem))); + DONE; +}) + +(define_expand "insb" + [(match_operand:V4QI 0 "register_operand" "") + (match_operand:V4QI 1 "register_operand" "") + (match_operand:SI 2 "register_operand" "") + (match_operand:SI 3 "const_int_operand" "")] + "NDS32_EXT_DSP_P ()" +{ + if (INTVAL (operands[3]) > 3 || INTVAL (operands[3]) < 0) + gcc_unreachable (); + + rtx src = gen_reg_rtx (QImode); + + convert_move (src, operands[2], false); + + HOST_WIDE_INT selector_index; + /* Big endian need reverse index. */ + if (TARGET_BIG_ENDIAN) + selector_index = 4 - INTVAL (operands[3]) - 1; + else + selector_index = INTVAL (operands[3]); + rtx selector = gen_int_mode (1 << selector_index, SImode); + emit_insn (gen_vec_setv4qi_internal (operands[0], src, + operands[1], selector)); + DONE; +}) + +(define_expand "insvsi" + [(set (zero_extract:SI (match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "const_int_operand" "") + (match_operand:SI 2 "nds32_insv_operand" "")) + (match_operand:SI 3 "register_operand" ""))] + "NDS32_EXT_DSP_P ()" +{ + if (INTVAL (operands[1]) != 8) + FAIL; +} + [(set_attr "type" "dinsb") + (set_attr "length" "4")]) + + +(define_insn "insvsi_internal" + [(set (zero_extract:SI (match_operand:SI 0 "register_operand" "+r") + (const_int 8) + (match_operand:SI 1 "nds32_insv_operand" "i")) + (match_operand:SI 2 "register_operand" "r"))] + "NDS32_EXT_DSP_P ()" + "insb\t%0, %2, %v1" + [(set_attr "type" "dinsb") + (set_attr "length" "4")]) + +(define_insn "insvsiqi_internal" + [(set (zero_extract:SI (match_operand:SI 0 "register_operand" "+r") + (const_int 8) + (match_operand:SI 1 "nds32_insv_operand" "i")) + (zero_extend:SI (match_operand:QI 2 "register_operand" "r")))] + "NDS32_EXT_DSP_P ()" + "insb\t%0, %2, %v1" + [(set_attr "type" "dinsb") + (set_attr "length" "4")]) + +;; Intermedium pattern for synthetize insvsiqi_internal +;; v0 = ((v1 & 0xff) << 8) +(define_insn_and_split "and0xff_s8" + [(set (match_operand:SI 0 "register_operand" "=r") + (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r") + (const_int 8)) + (const_int 65280)))] + "NDS32_EXT_DSP_P () && !reload_completed" + "#" + "NDS32_EXT_DSP_P () && !reload_completed" + [(const_int 1)] +{ + rtx tmp = gen_reg_rtx (SImode); + emit_insn (gen_ashlsi3 (tmp, operands[1], gen_int_mode (8, SImode))); + emit_insn (gen_andsi3 (operands[0], tmp, gen_int_mode (0xffff, SImode))); + DONE; +}) + +;; v0 = (v1 & 0xff00ffff) | ((v2 << 16) | 0xff0000) +(define_insn_and_split "insbsi2" + [(set (match_operand:SI 0 "register_operand" "=r") + (ior:SI (and:SI (match_operand:SI 1 "register_operand" "0") + (const_int -16711681)) + (and:SI (ashift:SI (match_operand:SI 2 "register_operand" "r") + (const_int 16)) + (const_int 16711680))))] + "NDS32_EXT_DSP_P () && !reload_completed" + "#" + "NDS32_EXT_DSP_P () && !reload_completed" + [(const_int 1)] +{ + rtx tmp = gen_reg_rtx (SImode); + emit_move_insn (tmp, operands[1]); + emit_insn (gen_insvsi_internal (tmp, gen_int_mode(16, SImode), operands[2])); + emit_move_insn (operands[0], tmp); + DONE; +}) + +;; v0 = (v1 & 0xff00ffff) | v2 +(define_insn_and_split "ior_and0xff00ffff_reg" + [(set (match_operand:SI 0 "register_operand" "=r") + (ior:SI (and:SI (match_operand:SI 1 "register_operand" "r") + (const_int -16711681)) + (match_operand:SI 2 "register_operand" "r")))] + "NDS32_EXT_DSP_P () && !reload_completed" + "#" + "NDS32_EXT_DSP_P () && !reload_completed" + [(const_int 1)] +{ + rtx tmp = gen_reg_rtx (SImode); + emit_insn (gen_andsi3 (tmp, operands[1], gen_int_mode (0xff00ffff, SImode))); + emit_insn (gen_iorsi3 (operands[0], tmp, operands[2])); + DONE; +}) + +(define_insn "vec_setv4qi_internal" + [(set (match_operand:V4QI 0 "register_operand" "= r, r, r, r") + (vec_merge:V4QI + (vec_duplicate:V4QI + (match_operand:QI 1 "register_operand" " r, r, r, r")) + (match_operand:V4QI 2 "register_operand" " 0, 0, 0, 0") + (match_operand:SI 3 "nds32_imm_1_2_4_8_operand" " Iv01, Iv02, Iv04, Iv08")))] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + { + const char *pats[] = { "insb\t%0, %1, 3", + "insb\t%0, %1, 2", + "insb\t%0, %1, 1", + "insb\t%0, %1, 0" }; + return pats[which_alternative]; + } + else + { + const char *pats[] = { "insb\t%0, %1, 0", + "insb\t%0, %1, 1", + "insb\t%0, %1, 2", + "insb\t%0, %1, 3" }; + return pats[which_alternative]; + } +} + [(set_attr "type" "dinsb") + (set_attr "length" "4")]) + +(define_insn "vec_setv4qi_internal_vec" + [(set (match_operand:V4QI 0 "register_operand" "= r, r, r, r") + (vec_merge:V4QI + (vec_duplicate:V4QI + (vec_select:QI + (match_operand:V4QI 1 "register_operand" " r, r, r, r") + (parallel [(const_int 0)]))) + (match_operand:V4QI 2 "register_operand" " 0, 0, 0, 0") + (match_operand:SI 3 "nds32_imm_1_2_4_8_operand" " Iv01, Iv02, Iv04, Iv08")))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "@ + insb\t%0, %1, 0 + insb\t%0, %1, 1 + insb\t%0, %1, 2 + insb\t%0, %1, 3" + [(set_attr "type" "dinsb") + (set_attr "length" "4")]) + +(define_insn "vec_mergev4qi_and_cv0_1" + [(set (match_operand:V4QI 0 "register_operand" "=$l,r") + (vec_merge:V4QI + (vec_duplicate:V4QI + (vec_select:QI + (match_operand:V4QI 1 "register_operand" " l,r") + (parallel [(const_int 0)]))) + (const_vector:V4QI [ + (const_int 0) + (const_int 0) + (const_int 0) + (const_int 0)]) + (const_int 1)))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "@ + zeb33\t%0, %1 + zeb\t%0, %1" + [(set_attr "type" "alu,alu") + (set_attr "length" " 2, 4")]) + +(define_insn "vec_mergev4qi_and_cv0_2" + [(set (match_operand:V4QI 0 "register_operand" "=$l,r") + (vec_merge:V4QI + (const_vector:V4QI [ + (const_int 0) + (const_int 0) + (const_int 0) + (const_int 0)]) + (vec_duplicate:V4QI + (vec_select:QI + (match_operand:V4QI 1 "register_operand" " l,r") + (parallel [(const_int 0)]))) + (const_int 2)))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "@ + zeb33\t%0, %1 + zeb\t%0, %1" + [(set_attr "type" "alu,alu") + (set_attr "length" " 2, 4")]) + +(define_insn "vec_mergeqi_and_cv0_1" + [(set (match_operand:V4QI 0 "register_operand" "=$l,r") + (vec_merge:V4QI + (vec_duplicate:V4QI (match_operand:QI 1 "register_operand" " l,r")) + (const_vector:V4QI [ + (const_int 0) + (const_int 0) + (const_int 0) + (const_int 0)]) + (const_int 1)))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "@ + zeb33\t%0, %1 + zeb\t%0, %1" + [(set_attr "type" "alu,alu") + (set_attr "length" " 2, 4")]) + +(define_insn "vec_mergeqi_and_cv0_2" + [(set (match_operand:V4QI 0 "register_operand" "=$l,r") + (vec_merge:V4QI + (const_vector:V4QI [ + (const_int 0) + (const_int 0) + (const_int 0) + (const_int 0)]) + (vec_duplicate:V4QI (match_operand:QI 1 "register_operand" " l,r")) + (const_int 2)))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "@ + zeb33\t%0, %1 + zeb\t%0, %1" + [(set_attr "type" "alu,alu") + (set_attr "length" " 2, 4")]) + +(define_expand "vec_setv2hi" + [(match_operand:V2HI 0 "register_operand" "") + (match_operand:HI 1 "register_operand" "") + (match_operand:SI 2 "immediate_operand" "")] + "NDS32_EXT_DSP_P ()" +{ + HOST_WIDE_INT pos = INTVAL (operands[2]); + if (pos > 2) + gcc_unreachable (); + HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << pos; + emit_insn (gen_vec_setv2hi_internal (operands[0], operands[1], + operands[0], GEN_INT (elem))); + DONE; +}) + +(define_insn "vec_setv2hi_internal" + [(set (match_operand:V2HI 0 "register_operand" "= r, r") + (vec_merge:V2HI + (vec_duplicate:V2HI + (match_operand:HI 1 "register_operand" " r, r")) + (match_operand:V2HI 2 "register_operand" " r, r") + (match_operand:SI 3 "nds32_imm_1_2_operand" " Iv01, Iv02")))] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + { + const char *pats[] = { "pkbb16\t%0, %1, %2", + "pktb16\t%0, %2, %1" }; + return pats[which_alternative]; + } + else + { + const char *pats[] = { "pktb16\t%0, %2, %1", + "pkbb16\t%0, %1, %2" }; + return pats[which_alternative]; + } +} + [(set_attr "type" "dpack") + (set_attr "length" "4")]) + +(define_insn "vec_mergev2hi_and_cv0_1" + [(set (match_operand:V2HI 0 "register_operand" "=$l,r") + (vec_merge:V2HI + (vec_duplicate:V2HI + (vec_select:HI + (match_operand:V2HI 1 "register_operand" " l,r") + (parallel [(const_int 0)]))) + (const_vector:V2HI [ + (const_int 0) + (const_int 0)]) + (const_int 1)))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "@ + zeh33\t%0, %1 + zeh\t%0, %1" + [(set_attr "type" "alu,alu") + (set_attr "length" " 2, 4")]) + +(define_insn "vec_mergev2hi_and_cv0_2" + [(set (match_operand:V2HI 0 "register_operand" "=$l,r") + (vec_merge:V2HI + (const_vector:V2HI [ + (const_int 0) + (const_int 0)]) + (vec_duplicate:V2HI + (vec_select:HI + (match_operand:V2HI 1 "register_operand" " l,r") + (parallel [(const_int 0)]))) + (const_int 2)))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "@ + zeh33\t%0, %1 + zeh\t%0, %1" + [(set_attr "type" "alu,alu") + (set_attr "length" " 2, 4")]) + +(define_insn "vec_mergehi_and_cv0_1" + [(set (match_operand:V2HI 0 "register_operand" "=$l,r") + (vec_merge:V2HI + (vec_duplicate:V2HI (match_operand:HI 1 "register_operand" " l,r")) + (const_vector:V2HI [ + (const_int 0) + (const_int 0)]) + (const_int 1)))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "@ + zeh33\t%0, %1 + zeh\t%0, %1" + [(set_attr "type" "alu,alu") + (set_attr "length" " 2, 4")]) + +(define_insn "vec_mergehi_and_cv0_2" + [(set (match_operand:V2HI 0 "register_operand" "=$l,r") + (vec_merge:V2HI + (const_vector:V2HI [ + (const_int 0) + (const_int 0)]) + (vec_duplicate:V2HI (match_operand:HI 1 "register_operand" " l,r")) + (const_int 2)))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "@ + zeh33\t%0, %1 + zeh\t%0, %1" + [(set_attr "type" "alu,alu") + (set_attr "length" " 2, 4")]) + +(define_expand "pkbb" + [(match_operand:V2HI 0 "register_operand") + (match_operand:V2HI 1 "register_operand") + (match_operand:V2HI 2 "register_operand")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + { + emit_insn (gen_vec_mergevv (operands[0], operands[1], operands[2], + GEN_INT (1), GEN_INT (1), GEN_INT (1))); + } + else + { + emit_insn (gen_vec_mergevv (operands[0], operands[1], operands[2], + GEN_INT (2), GEN_INT (0), GEN_INT (0))); + } + DONE; +}) + +(define_insn "pkbbsi_1" + [(set (match_operand:SI 0 "register_operand" "=r") + (ior:SI (and:SI (match_operand:SI 1 "register_operand" "r") + (const_int 65535)) + (ashift:SI (match_operand:SI 2 "register_operand" "r") + (const_int 16))))] + "NDS32_EXT_DSP_P ()" + "pkbb16\t%0, %2, %1" + [(set_attr "type" "dpack") + (set_attr "length" "4")]) + +(define_insn "pkbbsi_2" + [(set (match_operand:SI 0 "register_operand" "=r") + (ior:SI (ashift:SI (match_operand:SI 2 "register_operand" "r") + (const_int 16)) + (and:SI (match_operand:SI 1 "register_operand" "r") + (const_int 65535))))] + "NDS32_EXT_DSP_P ()" + "pkbb16\t%0, %2, %1" + [(set_attr "type" "dpack") + (set_attr "length" "4")]) + +(define_insn "pkbbsi_3" + [(set (match_operand:SI 0 "register_operand" "=r") + (ior:SI (zero_extend:SI (match_operand:HI 1 "register_operand" "r")) + (ashift:SI (match_operand:SI 2 "register_operand" "r") + (const_int 16))))] + "NDS32_EXT_DSP_P ()" + "pkbb16\t%0, %2, %1" + [(set_attr "type" "dpack") + (set_attr "length" "4")]) + +(define_insn "pkbbsi_4" + [(set (match_operand:SI 0 "register_operand" "=r") + (ior:SI (ashift:SI (match_operand:SI 2 "register_operand" "r") + (const_int 16)) + (zero_extend:SI (match_operand:HI 1 "register_operand" "r"))))] + "NDS32_EXT_DSP_P ()" + "pkbb16\t%0, %2, %1" + [(set_attr "type" "dpack") + (set_attr "length" "4")]) + +;; v0 = (v1 & 0xffff0000) | (v2 & 0xffff) +(define_insn "pktbsi_1" + [(set (match_operand:SI 0 "register_operand" "=r") + (ior:SI (and:SI (match_operand:SI 1 "register_operand" "r") + (const_int -65536)) + (zero_extend:SI (match_operand:HI 2 "register_operand" "r"))))] + "NDS32_EXT_DSP_P ()" + "pktb16\t%0, %1, %2" + [(set_attr "type" "dpack") + (set_attr "length" "4")]) + +(define_insn "pktbsi_2" + [(set (match_operand:SI 0 "register_operand" "=r") + (ior:SI (and:SI (match_operand:SI 1 "register_operand" "r") + (const_int -65536)) + (and:SI (match_operand:SI 2 "register_operand" "r") + (const_int 65535))))] + "NDS32_EXT_DSP_P ()" + "pktb16\t%0, %1, %2" + [(set_attr "type" "alu") + (set_attr "length" "4")]) + +(define_insn "pktbsi_3" + [(set (zero_extract:SI (match_operand:SI 0 "register_operand" "+r") + (const_int 16 ) + (const_int 0)) + (match_operand:SI 1 "register_operand" " r"))] + "NDS32_EXT_DSP_P ()" + "pktb16\t%0, %0, %1" + [(set_attr "type" "dpack") + (set_attr "length" "4")]) + +(define_insn "pktbsi_4" + [(set (zero_extract:SI (match_operand:SI 0 "register_operand" "+r") + (const_int 16 ) + (const_int 0)) + (zero_extend:SI (match_operand:HI 1 "register_operand" " r")))] + "NDS32_EXT_DSP_P ()" + "pktb16\t%0, %0, %1" + [(set_attr "type" "dpack") + (set_attr "length" "4")]) + +(define_insn "pkttsi" + [(set (match_operand:SI 0 "register_operand" "=r") + (ior:SI (and:SI (match_operand:SI 1 "register_operand" " r") + (const_int -65536)) + (lshiftrt:SI (match_operand:SI 2 "register_operand" " r") + (const_int 16))))] + "NDS32_EXT_DSP_P ()" + "pktt16\t%0, %1, %2" + [(set_attr "type" "dpack") + (set_attr "length" "4")]) + +(define_expand "pkbt" + [(match_operand:V2HI 0 "register_operand") + (match_operand:V2HI 1 "register_operand") + (match_operand:V2HI 2 "register_operand")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + { + emit_insn (gen_vec_mergevv (operands[0], operands[1], operands[2], + GEN_INT (1), GEN_INT (1), GEN_INT (0))); + } + else + { + emit_insn (gen_vec_mergevv (operands[0], operands[1], operands[2], + GEN_INT (2), GEN_INT (0), GEN_INT (1))); + } + DONE; +}) + +(define_expand "pktt" + [(match_operand:V2HI 0 "register_operand") + (match_operand:V2HI 1 "register_operand") + (match_operand:V2HI 2 "register_operand")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + { + emit_insn (gen_vec_mergevv (operands[0], operands[1], operands[2], + GEN_INT (1), GEN_INT (0), GEN_INT (0))); + } + else + { + emit_insn (gen_vec_mergevv (operands[0], operands[1], operands[2], + GEN_INT (2), GEN_INT (1), GEN_INT (1))); + } + DONE; +}) + +(define_expand "pktb" + [(match_operand:V2HI 0 "register_operand") + (match_operand:V2HI 1 "register_operand") + (match_operand:V2HI 2 "register_operand")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + { + emit_insn (gen_vec_mergevv (operands[0], operands[1], operands[2], + GEN_INT (1), GEN_INT (0), GEN_INT (1))); + } + else + { + emit_insn (gen_vec_mergevv (operands[0], operands[1], operands[2], + GEN_INT (2), GEN_INT (1), GEN_INT (0))); + } + DONE; +}) + +(define_insn "vec_mergerr" + [(set (match_operand:V2HI 0 "register_operand" "= r, r") + (vec_merge:V2HI + (vec_duplicate:V2HI + (match_operand:HI 1 "register_operand" " r, r")) + (vec_duplicate:V2HI + (match_operand:HI 2 "register_operand" " r, r")) + (match_operand:SI 3 "nds32_imm_1_2_operand" " Iv01, Iv02")))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "@ + pkbb16\t%0, %2, %1 + pkbb16\t%0, %1, %2" + [(set_attr "type" "dpack") + (set_attr "length" "4")]) + + +(define_insn "vec_merge" + [(set (match_operand:V2HI 0 "register_operand" "= r, r") + (vec_merge:V2HI + (match_operand:V2HI 1 "register_operand" " r, r") + (match_operand:V2HI 2 "register_operand" " r, r") + (match_operand:SI 3 "nds32_imm_1_2_operand" " Iv01, Iv02")))] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + { + const char *pats[] = { "pktb16\t%0, %1, %2", + "pktb16\t%0, %2, %1" }; + return pats[which_alternative]; + } + else + { + const char *pats[] = { "pktb16\t%0, %2, %1", + "pktb16\t%0, %1, %2" }; + return pats[which_alternative]; + } +} + [(set_attr "type" "dpack") + (set_attr "length" "4")]) + +(define_insn "vec_mergerv" + [(set (match_operand:V2HI 0 "register_operand" "= r, r, r, r") + (vec_merge:V2HI + (vec_duplicate:V2HI + (match_operand:HI 1 "register_operand" " r, r, r, r")) + (vec_duplicate:V2HI + (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r, r, r, r") + (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iv00, Iv01, Iv00, Iv01")]))) + (match_operand:SI 3 "nds32_imm_1_2_operand" " Iv01, Iv01, Iv02, Iv02")))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "@ + pkbb16\t%0, %2, %1 + pktb16\t%0, %2, %1 + pkbb16\t%0, %1, %2 + pkbt16\t%0, %1, %2" + [(set_attr "type" "dpack") + (set_attr "length" "4")]) + +(define_insn "vec_mergevr" + [(set (match_operand:V2HI 0 "register_operand" "= r, r, r, r") + (vec_merge:V2HI + (vec_duplicate:V2HI + (vec_select:HI + (match_operand:V2HI 1 "register_operand" " r, r, r, r") + (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iv00, Iv01, Iv00, Iv01")]))) + (vec_duplicate:V2HI + (match_operand:HI 2 "register_operand" " r, r, r, r")) + (match_operand:SI 3 "nds32_imm_1_2_operand" " Iv01, Iv01, Iv02, Iv02")))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "@ + pkbb16\t%0, %2, %1 + pkbt16\t%0, %2, %1 + pkbb16\t%0, %1, %2 + pktb16\t%0, %1, %2" + [(set_attr "type" "dpack") + (set_attr "length" "4")]) + +(define_insn "vec_mergevv" + [(set (match_operand:V2HI 0 "register_operand" "= r, r, r, r, r, r, r, r") + (vec_merge:V2HI + (vec_duplicate:V2HI + (vec_select:HI + (match_operand:V2HI 1 "register_operand" " r, r, r, r, r, r, r, r") + (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iv00, Iv00, Iv01, Iv01, Iv00, Iv00, Iv01, Iv01")]))) + (vec_duplicate:V2HI + (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r, r, r, r, r, r, r, r") + (parallel [(match_operand:SI 5 "nds32_imm_0_1_operand" " Iv00, Iv01, Iv01, Iv00, Iv00, Iv01, Iv01, Iv00")]))) + (match_operand:SI 3 "nds32_imm_1_2_operand" " Iv01, Iv01, Iv01, Iv01, Iv02, Iv02, Iv02, Iv02")))] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + { + const char *pats[] = { "pktt16\t%0, %1, %2", + "pktb16\t%0, %1, %2", + "pkbb16\t%0, %1, %2", + "pkbt16\t%0, %1, %2", + "pktt16\t%0, %2, %1", + "pkbt16\t%0, %2, %1", + "pkbb16\t%0, %2, %1", + "pktb16\t%0, %2, %1" }; + return pats[which_alternative]; + } + else + { + const char *pats[] = { "pkbb16\t%0, %2, %1", + "pktb16\t%0, %2, %1", + "pktt16\t%0, %2, %1", + "pkbt16\t%0, %2, %1", + "pkbb16\t%0, %1, %2", + "pkbt16\t%0, %1, %2", + "pktt16\t%0, %1, %2", + "pktb16\t%0, %1, %2" }; + return pats[which_alternative]; + } +} + [(set_attr "type" "dpack") + (set_attr "length" "4")]) + +(define_expand "vec_extractv4qi" + [(set (match_operand:QI 0 "register_operand" "") + (vec_select:QI + (match_operand:V4QI 1 "nonimmediate_operand" "") + (parallel [(match_operand:SI 2 "const_int_operand" "")])))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" +{ + if (INTVAL (operands[2]) != 0 + && INTVAL (operands[2]) != 1 + && INTVAL (operands[2]) != 2 + && INTVAL (operands[2]) != 3) + gcc_unreachable (); + + if (INTVAL (operands[2]) != 0 && MEM_P (operands[0])) + FAIL; +}) + +(define_insn "vec_extractv4qi0" + [(set (match_operand:QI 0 "register_operand" "=l,r,r") + (vec_select:QI + (match_operand:V4QI 1 "nonimmediate_operand" " l,r,m") + (parallel [(const_int 0)])))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" +{ + switch (which_alternative) + { + case 0: + return "zeb33\t%0, %1"; + case 1: + return "zeb\t%0, %1"; + case 2: + return nds32_output_32bit_load (operands, 1); + default: + gcc_unreachable (); + } +} + [(set_attr "type" "alu") + (set_attr "length" "4")]) + +(define_insn "vec_extractv4qi0_ze" + [(set (match_operand:SI 0 "register_operand" "=l,r,r") + (zero_extend:SI + (vec_select:QI + (match_operand:V4QI 1 "nonimmediate_operand" " l,r,m") + (parallel [(const_int 0)]))))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" +{ + switch (which_alternative) + { + case 0: + return "zeb33\t%0, %1"; + case 1: + return "zeb\t%0, %1"; + case 2: + return nds32_output_32bit_load (operands, 1); + default: + gcc_unreachable (); + } +} + [(set_attr "type" "alu") + (set_attr "length" "4")]) + +(define_insn "vec_extractv4qi0_se" + [(set (match_operand:SI 0 "register_operand" "=l,r,r") + (sign_extend:SI + (vec_select:QI + (match_operand:V4QI 1 "nonimmediate_operand" " l,r,m") + (parallel [(const_int 0)]))))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" +{ + switch (which_alternative) + { + case 0: + return "seb33\t%0, %1"; + case 1: + return "seb\t%0, %1"; + case 2: + return nds32_output_32bit_load_s (operands, 1); + default: + gcc_unreachable (); + } +} + [(set_attr "type" "alu") + (set_attr "length" "4")]) + +(define_insn_and_split "vec_extractv4qi1" + [(set (match_operand:QI 0 "register_operand" "=r") + (vec_select:QI + (match_operand:V4QI 1 "register_operand" " r") + (parallel [(const_int 1)])))] + "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN" + "#" + "NDS32_EXT_DSP_P () && !reload_completed" + [(const_int 1)] +{ + rtx tmp = gen_reg_rtx (V4QImode); + emit_insn (gen_rotrv4qi_1 (tmp, operands[1])); + emit_insn (gen_vec_extractv4qi0 (operands[0], tmp)); + DONE; +} + [(set_attr "type" "alu") + (set_attr "length" "4")]) + +(define_insn_and_split "vec_extractv4qi2" + [(set (match_operand:QI 0 "register_operand" "=r") + (vec_select:QI + (match_operand:V4QI 1 "register_operand" " r") + (parallel [(const_int 2)])))] + "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN" + "#" + "NDS32_EXT_DSP_P () && !reload_completed" + [(const_int 1)] +{ + rtx tmp = gen_reg_rtx (V4QImode); + emit_insn (gen_rotrv4qi_2 (tmp, operands[1])); + emit_insn (gen_vec_extractv4qi0 (operands[0], tmp)); + DONE; +} + [(set_attr "type" "alu") + (set_attr "length" "4")]) + +(define_insn_and_split "vec_extractv4qi3" + [(set (match_operand:QI 0 "register_operand" "=r") + (vec_select:QI + (match_operand:V4QI 1 "register_operand" " r") + (parallel [(const_int 3)])))] + "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN" + "#" + "NDS32_EXT_DSP_P () && !reload_completed" + [(const_int 1)] +{ + rtx tmp = gen_reg_rtx (V4QImode); + emit_insn (gen_rotrv4qi_3 (tmp, operands[1])); + emit_insn (gen_vec_extractv4qi0 (operands[0], tmp)); + DONE; +} + [(set_attr "type" "alu") + (set_attr "length" "4")]) + +(define_insn "vec_extractv4qi3_se" + [(set (match_operand:SI 0 "register_operand" "=$d,r") + (sign_extend:SI + (vec_select:QI + (match_operand:V4QI 1 "register_operand" " 0,r") + (parallel [(const_int 3)]))))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "@ + srai45\t%0, 24 + srai\t%0, %1, 24" + [(set_attr "type" "alu,alu") + (set_attr "length" " 2, 4")]) + +(define_insn "vec_extractv4qi3_ze" + [(set (match_operand:SI 0 "register_operand" "=$d,r") + (zero_extend:SI + (vec_select:QI + (match_operand:V4QI 1 "register_operand" " 0,r") + (parallel [(const_int 3)]))))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "@ + srli45\t%0, 24 + srli\t%0, %1, 24" + [(set_attr "type" "alu,alu") + (set_attr "length" " 2, 4")]) + +(define_insn_and_split "vec_extractv4qihi0" + [(set (match_operand:HI 0 "register_operand" "=r") + (sign_extend:HI + (vec_select:QI + (match_operand:V4QI 1 "register_operand" " r") + (parallel [(const_int 0)]))))] + "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN" + "#" + "NDS32_EXT_DSP_P () && !reload_completed" + [(const_int 1)] +{ + rtx tmp = gen_reg_rtx (QImode); + emit_insn (gen_vec_extractv4qi0 (tmp, operands[1])); + emit_insn (gen_extendqihi2 (operands[0], tmp)); + DONE; +} + [(set_attr "type" "alu") + (set_attr "length" "4")]) + +(define_insn_and_split "vec_extractv4qihi1" + [(set (match_operand:HI 0 "register_operand" "=r") + (sign_extend:HI + (vec_select:QI + (match_operand:V4QI 1 "register_operand" " r") + (parallel [(const_int 1)]))))] + "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN" + "#" + "NDS32_EXT_DSP_P () && !reload_completed" + [(const_int 1)] +{ + rtx tmp = gen_reg_rtx (QImode); + emit_insn (gen_vec_extractv4qi1 (tmp, operands[1])); + emit_insn (gen_extendqihi2 (operands[0], tmp)); + DONE; +} + [(set_attr "type" "alu") + (set_attr "length" "4")]) + +(define_insn_and_split "vec_extractv4qihi2" + [(set (match_operand:HI 0 "register_operand" "=r") + (sign_extend:HI + (vec_select:QI + (match_operand:V4QI 1 "register_operand" " r") + (parallel [(const_int 2)]))))] + "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN" + "#" + "NDS32_EXT_DSP_P () && !reload_completed" + [(const_int 1)] +{ + rtx tmp = gen_reg_rtx (QImode); + emit_insn (gen_vec_extractv4qi2 (tmp, operands[1])); + emit_insn (gen_extendqihi2 (operands[0], tmp)); + DONE; +} + [(set_attr "type" "alu") + (set_attr "length" "4")]) + +(define_insn_and_split "vec_extractv4qihi3" + [(set (match_operand:HI 0 "register_operand" "=r") + (sign_extend:HI + (vec_select:QI + (match_operand:V4QI 1 "register_operand" " r") + (parallel [(const_int 3)]))))] + "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN" + "#" + "NDS32_EXT_DSP_P () && !reload_completed" + [(const_int 1)] +{ + rtx tmp = gen_reg_rtx (QImode); + emit_insn (gen_vec_extractv4qi3 (tmp, operands[1])); + emit_insn (gen_extendqihi2 (operands[0], tmp)); + DONE; +} + [(set_attr "type" "alu") + (set_attr "length" "4")]) + +(define_expand "vec_extractv2hi" + [(set (match_operand:HI 0 "register_operand" "") + (vec_select:HI + (match_operand:V2HI 1 "nonimmediate_operand" "") + (parallel [(match_operand:SI 2 "const_int_operand" "")])))] + "NDS32_EXT_DSP_P ()" +{ + if (INTVAL (operands[2]) != 0 + && INTVAL (operands[2]) != 1) + gcc_unreachable (); + + if (INTVAL (operands[2]) != 0 && MEM_P (operands[0])) + FAIL; +}) + +(define_insn "vec_extractv2hi0" + [(set (match_operand:HI 0 "register_operand" "=$l,r,r") + (vec_select:HI + (match_operand:V2HI 1 "nonimmediate_operand" " l,r,m") + (parallel [(const_int 0)])))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" +{ + switch (which_alternative) + { + case 0: + return "seh33\t%0, %1"; + case 1: + return "seh\t%0, %1"; + case 2: + return nds32_output_32bit_load_s (operands, 2); + + default: + gcc_unreachable (); + } +} + [(set_attr "type" "alu,alu,load") + (set_attr "length" " 2, 4, 4")]) + +(define_insn "vec_extractv2hi0_ze" + [(set (match_operand:SI 0 "register_operand" "=$l, r,$ l, *r") + (zero_extend:SI + (vec_select:HI + (match_operand:V2HI 1 "nonimmediate_operand" " l, r, U33, m") + (parallel [(const_int 0)]))))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" +{ + switch (which_alternative) + { + case 0: + return "zeh33\t%0, %1"; + case 1: + return "zeh\t%0, %1"; + case 2: + return nds32_output_16bit_load (operands, 2); + case 3: + return nds32_output_32bit_load (operands, 2); + + default: + gcc_unreachable (); + } +} + [(set_attr "type" "alu,alu,load,load") + (set_attr "length" " 2, 4, 2, 4")]) + +(define_insn "vec_extractv2hi0_se" + [(set (match_operand:SI 0 "register_operand" "=$l, r, r") + (sign_extend:SI + (vec_select:HI + (match_operand:V2HI 1 "nonimmediate_operand" " l,r,m") + (parallel [(const_int 0)]))))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" +{ + switch (which_alternative) + { + case 0: + return "seh33\t%0, %1"; + case 1: + return "seh\t%0, %1"; + case 2: + return nds32_output_32bit_load_s (operands, 2); + + default: + gcc_unreachable (); + } +} + [(set_attr "type" "alu,alu,load") + (set_attr "length" " 2, 4, 4")]) + +(define_insn "vec_extractv2hi0_be" + [(set (match_operand:HI 0 "register_operand" "=$d,r") + (vec_select:HI + (match_operand:V2HI 1 "register_operand" " 0,r") + (parallel [(const_int 0)])))] + "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" + "@ + srai45\t%0, 16 + srai\t%0, %1, 16" + [(set_attr "type" "alu,alu") + (set_attr "length" " 2, 4")]) + +(define_insn "vec_extractv2hi1" + [(set (match_operand:HI 0 "register_operand" "=$d,r") + (vec_select:HI + (match_operand:V2HI 1 "register_operand" " 0,r") + (parallel [(const_int 1)])))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "@ + srai45\t%0, 16 + srai\t%0, %1, 16" + [(set_attr "type" "alu,alu") + (set_attr "length" " 2, 4")]) + +(define_insn "vec_extractv2hi1_se" + [(set (match_operand:SI 0 "register_operand" "=$d,r") + (sign_extend:SI + (vec_select:HI + (match_operand:V2HI 1 "register_operand" " 0,r") + (parallel [(const_int 1)]))))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "@ + srai45\t%0, 16 + srai\t%0, %1, 16" + [(set_attr "type" "alu,alu") + (set_attr "length" " 2, 4")]) + +(define_insn "vec_extractv2hi1_ze" + [(set (match_operand:SI 0 "register_operand" "=$d,r") + (zero_extend:SI + (vec_select:HI + (match_operand:V2HI 1 "register_operand" " 0,r") + (parallel [(const_int 1)]))))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "@ + srli45\t%0, 16 + srli\t%0, %1, 16" + [(set_attr "type" "alu,alu") + (set_attr "length" " 2, 4")]) + +(define_insn "vec_extractv2hi1_be" + [(set (match_operand:HI 0 "register_operand" "=$l,r,r") + (vec_select:HI + (match_operand:V2HI 1 "nonimmediate_operand" " l,r,m") + (parallel [(const_int 1)])))] + "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" +{ + switch (which_alternative) + { + case 0: + return "seh33\t%0, %1"; + case 1: + return "seh\t%0, %1"; + case 2: + return nds32_output_32bit_load_s (operands, 2); + + default: + gcc_unreachable (); + } +} + [(set_attr "type" "alu,alu,load") + (set_attr "length" " 2, 4, 4")]) + +(define_insn "mul16" + [(set (match_operand:V2SI 0 "register_operand" "=r") + (mult:V2SI (extend:V2SI (match_operand:V2HI 1 "register_operand" "%r")) + (extend:V2SI (match_operand:V2HI 2 "register_operand" " r"))))] + "NDS32_EXT_DSP_P ()" + "mul16\t%0, %1, %2" + [(set_attr "type" "dmul") + (set_attr "length" "4")]) + +(define_insn "mulx16" + [(set (match_operand:V2SI 0 "register_operand" "=r") + (vec_merge:V2SI + (vec_duplicate:V2SI + (mult:SI + (extend:SI + (vec_select:HI + (match_operand:V2HI 1 "register_operand" " r") + (parallel [(const_int 0)]))) + (extend:SI + (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 1)]))))) + (vec_duplicate:V2SI + (mult:SI + (extend:SI + (vec_select:HI + (match_dup 1) + (parallel [(const_int 1)]))) + (extend:SI + (vec_select:HI + (match_dup 2) + (parallel [(const_int 0)]))))) + (const_int 1)))] + "NDS32_EXT_DSP_P ()" + "mulx16\t%0, %1, %2" + [(set_attr "type" "dmul") + (set_attr "length" "4")]) + +(define_insn "rotrv2hi_1" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (vec_select:V2HI + (match_operand:V2HI 1 "register_operand" " r") + (parallel [(const_int 1) (const_int 0)])))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "rotri\t%0, %1, 16" + [(set_attr "type" "alu") + (set_attr "length" "4")]) + +(define_insn "rotrv2hi_1_be" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (vec_select:V2HI + (match_operand:V2HI 1 "register_operand" " r") + (parallel [(const_int 0) (const_int 1)])))] + "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" + "rotri\t%0, %1, 16" + [(set_attr "type" "alu") + (set_attr "length" "4")]) + +(define_insn "rotrv4qi_1" + [(set (match_operand:V4QI 0 "register_operand" "=r") + (vec_select:V4QI + (match_operand:V4QI 1 "register_operand" " r") + (parallel [(const_int 1) (const_int 2) (const_int 3) (const_int 0)])))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "rotri\t%0, %1, 8" + [(set_attr "type" "alu") + (set_attr "length" "4")]) + +(define_insn "rotrv4qi_1_be" + [(set (match_operand:V4QI 0 "register_operand" "=r") + (vec_select:V4QI + (match_operand:V4QI 1 "register_operand" " r") + (parallel [(const_int 2) (const_int 1) (const_int 0) (const_int 3)])))] + "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" + "rotri\t%0, %1, 8" + [(set_attr "type" "alu") + (set_attr "length" "4")]) + +(define_insn "rotrv4qi_2" + [(set (match_operand:V4QI 0 "register_operand" "=r") + (vec_select:V4QI + (match_operand:V4QI 1 "register_operand" " r") + (parallel [(const_int 2) (const_int 3) (const_int 0) (const_int 1)])))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "rotri\t%0, %1, 16" + [(set_attr "type" "alu") + (set_attr "length" "4")]) + +(define_insn "rotrv4qi_2_be" + [(set (match_operand:V4QI 0 "register_operand" "=r") + (vec_select:V4QI + (match_operand:V4QI 1 "register_operand" " r") + (parallel [(const_int 1) (const_int 0) (const_int 3) (const_int 2)])))] + "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" + "rotri\t%0, %1, 16" + [(set_attr "type" "alu") + (set_attr "length" "4")]) + +(define_insn "rotrv4qi_3" + [(set (match_operand:V4QI 0 "register_operand" "=r") + (vec_select:V4QI + (match_operand:V4QI 1 "register_operand" " r") + (parallel [(const_int 3) (const_int 0) (const_int 1) (const_int 2)])))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "rotri\t%0, %1, 24" + [(set_attr "type" "alu") + (set_attr "length" "4")]) + +(define_insn "rotrv4qi_3_be" + [(set (match_operand:V4QI 0 "register_operand" "=r") + (vec_select:V4QI + (match_operand:V4QI 1 "register_operand" " r") + (parallel [(const_int 0) (const_int 3) (const_int 2) (const_int 1)])))] + "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" + "rotri\t%0, %1, 24" + [(set_attr "type" "alu") + (set_attr "length" "4")]) + +(define_insn "v4qi_dup_10" + [(set (match_operand:V4QI 0 "register_operand" "=r") + (vec_select:V4QI + (match_operand:V4QI 1 "register_operand" " r") + (parallel [(const_int 0) (const_int 1) (const_int 0) (const_int 1)])))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "pkbb\t%0, %1, %1" + [(set_attr "type" "dpack") + (set_attr "length" "4")]) + +(define_insn "v4qi_dup_32" + [(set (match_operand:V4QI 0 "register_operand" "=r") + (vec_select:V4QI + (match_operand:V4QI 1 "register_operand" " r") + (parallel [(const_int 2) (const_int 3) (const_int 2) (const_int 3)])))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "pktt\t%0, %1, %1" + [(set_attr "type" "dpack") + (set_attr "length" "4")]) + +(define_expand "vec_unpacks_lo_v4qi" + [(match_operand:V2HI 0 "register_operand" "=r") + (match_operand:V4QI 1 "register_operand" " r")] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" +{ + emit_insn (gen_sunpkd810 (operands[0], operands[1])); + DONE; +}) + +(define_expand "sunpkd810" + [(match_operand:V2HI 0 "register_operand") + (match_operand:V4QI 1 "register_operand")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_sunpkd810_imp_be (operands[0], operands[1])); + else + emit_insn (gen_sunpkd810_imp (operands[0], operands[1])); + DONE; +}) + +(define_insn "unpkd810_imp" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (vec_merge:V2HI + (vec_duplicate:V2HI + (extend:HI + (vec_select:QI + (match_operand:V4QI 1 "register_operand" " r") + (parallel [(const_int 1)])))) + (vec_duplicate:V2HI + (extend:HI + (vec_select:QI + (match_dup 1) + (parallel [(const_int 0)])))) + (const_int 2)))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "unpkd810\t%0, %1" + [(set_attr "type" "dpack") + (set_attr "length" "4")]) + +(define_insn "unpkd810_imp_inv" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (vec_merge:V2HI + (vec_duplicate:V2HI + (extend:HI + (vec_select:QI + (match_operand:V4QI 1 "register_operand" " r") + (parallel [(const_int 0)])))) + (vec_duplicate:V2HI + (extend:HI + (vec_select:QI + (match_dup 1) + (parallel [(const_int 1)])))) + (const_int 1)))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "unpkd810\t%0, %1" + [(set_attr "type" "dpack") + (set_attr "length" "4")]) + +(define_insn "unpkd810_imp_be" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (vec_merge:V2HI + (vec_duplicate:V2HI + (extend:HI + (vec_select:QI + (match_operand:V4QI 1 "register_operand" " r") + (parallel [(const_int 2)])))) + (vec_duplicate:V2HI + (extend:HI + (vec_select:QI + (match_dup 1) + (parallel [(const_int 3)])))) + (const_int 1)))] + "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" + "unpkd810\t%0, %1" + [(set_attr "type" "dpack") + (set_attr "length" "4")]) + +(define_insn "unpkd810_imp_inv_be" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (vec_merge:V2HI + (vec_duplicate:V2HI + (extend:HI + (vec_select:QI + (match_operand:V4QI 1 "register_operand" " r") + (parallel [(const_int 3)])))) + (vec_duplicate:V2HI + (extend:HI + (vec_select:QI + (match_dup 1) + (parallel [(const_int 2)])))) + (const_int 2)))] + "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" + "unpkd810\t%0, %1" + [(set_attr "type" "dpack") + (set_attr "length" "4")]) + +(define_expand "sunpkd820" + [(match_operand:V2HI 0 "register_operand") + (match_operand:V4QI 1 "register_operand")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_sunpkd820_imp_be (operands[0], operands[1])); + else + emit_insn (gen_sunpkd820_imp (operands[0], operands[1])); + DONE; +}) + +(define_insn "unpkd820_imp" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (vec_merge:V2HI + (vec_duplicate:V2HI + (extend:HI + (vec_select:QI + (match_operand:V4QI 1 "register_operand" " r") + (parallel [(const_int 2)])))) + (vec_duplicate:V2HI + (extend:HI + (vec_select:QI + (match_dup 1) + (parallel [(const_int 0)])))) + (const_int 2)))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "unpkd820\t%0, %1" + [(set_attr "type" "dpack") + (set_attr "length" "4")]) + +(define_insn "unpkd820_imp_inv" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (vec_merge:V2HI + (vec_duplicate:V2HI + (extend:HI + (vec_select:QI + (match_operand:V4QI 1 "register_operand" " r") + (parallel [(const_int 0)])))) + (vec_duplicate:V2HI + (extend:HI + (vec_select:QI + (match_dup 1) + (parallel [(const_int 2)])))) + (const_int 1)))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "unpkd820\t%0, %1" + [(set_attr "type" "dpack") + (set_attr "length" "4")]) + +(define_insn "unpkd820_imp_be" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (vec_merge:V2HI + (vec_duplicate:V2HI + (extend:HI + (vec_select:QI + (match_operand:V4QI 1 "register_operand" " r") + (parallel [(const_int 1)])))) + (vec_duplicate:V2HI + (extend:HI + (vec_select:QI + (match_dup 1) + (parallel [(const_int 3)])))) + (const_int 1)))] + "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" + "unpkd820\t%0, %1" + [(set_attr "type" "dpack") + (set_attr "length" "4")]) + +(define_insn "unpkd820_imp_inv_be" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (vec_merge:V2HI + (vec_duplicate:V2HI + (extend:HI + (vec_select:QI + (match_operand:V4QI 1 "register_operand" " r") + (parallel [(const_int 3)])))) + (vec_duplicate:V2HI + (extend:HI + (vec_select:QI + (match_dup 1) + (parallel [(const_int 1)])))) + (const_int 2)))] + "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" + "unpkd820\t%0, %1" + [(set_attr "type" "dpack") + (set_attr "length" "4")]) + +(define_expand "sunpkd830" + [(match_operand:V2HI 0 "register_operand") + (match_operand:V4QI 1 "register_operand")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_sunpkd830_imp_be (operands[0], operands[1])); + else + emit_insn (gen_sunpkd830_imp (operands[0], operands[1])); + DONE; +}) + +(define_insn "unpkd830_imp" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (vec_merge:V2HI + (vec_duplicate:V2HI + (extend:HI + (vec_select:QI + (match_operand:V4QI 1 "register_operand" " r") + (parallel [(const_int 3)])))) + (vec_duplicate:V2HI + (extend:HI + (vec_select:QI + (match_dup 1) + (parallel [(const_int 0)])))) + (const_int 2)))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "unpkd830\t%0, %1" + [(set_attr "type" "dpack") + (set_attr "length" "4")]) + +(define_insn "unpkd830_imp_inv" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (vec_merge:V2HI + (vec_duplicate:V2HI + (extend:HI + (vec_select:QI + (match_operand:V4QI 1 "register_operand" " r") + (parallel [(const_int 0)])))) + (vec_duplicate:V2HI + (extend:HI + (vec_select:QI + (match_dup 1) + (parallel [(const_int 3)])))) + (const_int 1)))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "unpkd830\t%0, %1" + [(set_attr "type" "dpack") + (set_attr "length" "4")]) + +(define_insn "unpkd830_imp_be" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (vec_merge:V2HI + (vec_duplicate:V2HI + (extend:HI + (vec_select:QI + (match_operand:V4QI 1 "register_operand" " r") + (parallel [(const_int 0)])))) + (vec_duplicate:V2HI + (extend:HI + (vec_select:QI + (match_dup 1) + (parallel [(const_int 3)])))) + (const_int 1)))] + "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" + "unpkd830\t%0, %1" + [(set_attr "type" "dpack") + (set_attr "length" "4")]) + +(define_insn "unpkd830_imp_inv_be" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (vec_merge:V2HI + (vec_duplicate:V2HI + (extend:HI + (vec_select:QI + (match_operand:V4QI 1 "register_operand" " r") + (parallel [(const_int 3)])))) + (vec_duplicate:V2HI + (extend:HI + (vec_select:QI + (match_dup 1) + (parallel [(const_int 0)])))) + (const_int 2)))] + "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" + "unpkd830\t%0, %1" + [(set_attr "type" "dpack") + (set_attr "length" "4")]) + +(define_expand "sunpkd831" + [(match_operand:V2HI 0 "register_operand") + (match_operand:V4QI 1 "register_operand")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_sunpkd831_imp_be (operands[0], operands[1])); + else + emit_insn (gen_sunpkd831_imp (operands[0], operands[1])); + DONE; +}) + +(define_insn "unpkd831_imp" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (vec_merge:V2HI + (vec_duplicate:V2HI + (extend:HI + (vec_select:QI + (match_operand:V4QI 1 "register_operand" " r") + (parallel [(const_int 3)])))) + (vec_duplicate:V2HI + (extend:HI + (vec_select:QI + (match_dup 1) + (parallel [(const_int 1)])))) + (const_int 2)))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "unpkd831\t%0, %1" + [(set_attr "type" "dpack") + (set_attr "length" "4")]) + +(define_insn "unpkd831_imp_inv" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (vec_merge:V2HI + (vec_duplicate:V2HI + (extend:HI + (vec_select:QI + (match_operand:V4QI 1 "register_operand" " r") + (parallel [(const_int 1)])))) + (vec_duplicate:V2HI + (extend:HI + (vec_select:QI + (match_dup 1) + (parallel [(const_int 3)])))) + (const_int 1)))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "unpkd831\t%0, %1" + [(set_attr "type" "dpack") + (set_attr "length" "4")]) + +(define_insn "unpkd831_imp_be" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (vec_merge:V2HI + (vec_duplicate:V2HI + (extend:HI + (vec_select:QI + (match_operand:V4QI 1 "register_operand" " r") + (parallel [(const_int 0)])))) + (vec_duplicate:V2HI + (extend:HI + (vec_select:QI + (match_dup 1) + (parallel [(const_int 2)])))) + (const_int 1)))] + "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" + "unpkd831\t%0, %1" + [(set_attr "type" "dpack") + (set_attr "length" "4")]) + +(define_insn "unpkd831_imp_inv_be" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (vec_merge:V2HI + (vec_duplicate:V2HI + (extend:HI + (vec_select:QI + (match_operand:V4QI 1 "register_operand" " r") + (parallel [(const_int 2)])))) + (vec_duplicate:V2HI + (extend:HI + (vec_select:QI + (match_dup 1) + (parallel [(const_int 0)])))) + (const_int 2)))] + "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" + "unpkd831\t%0, %1" + [(set_attr "type" "dpack") + (set_attr "length" "4")]) + +(define_expand "zunpkd810" + [(match_operand:V2HI 0 "register_operand") + (match_operand:V4QI 1 "register_operand")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_zunpkd810_imp_be (operands[0], operands[1])); + else + emit_insn (gen_zunpkd810_imp (operands[0], operands[1])); + DONE; +}) + +(define_expand "zunpkd820" + [(match_operand:V2HI 0 "register_operand") + (match_operand:V4QI 1 "register_operand")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_zunpkd820_imp_be (operands[0], operands[1])); + else + emit_insn (gen_zunpkd820_imp (operands[0], operands[1])); + DONE; +}) + +(define_expand "zunpkd830" + [(match_operand:V2HI 0 "register_operand") + (match_operand:V4QI 1 "register_operand")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_zunpkd830_imp_be (operands[0], operands[1])); + else + emit_insn (gen_zunpkd830_imp (operands[0], operands[1])); + DONE; +}) + +(define_expand "zunpkd831" + [(match_operand:V2HI 0 "register_operand") + (match_operand:V4QI 1 "register_operand")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_zunpkd831_imp_be (operands[0], operands[1])); + else + emit_insn (gen_zunpkd831_imp (operands[0], operands[1])); + DONE; +}) + +(define_expand "smbb" + [(match_operand:SI 0 "register_operand" "") + (match_operand:V2HI 1 "register_operand" "") + (match_operand:V2HI 2 "register_operand" "")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_mulhisi3v (operands[0], operands[1], operands[2], + GEN_INT (1), GEN_INT (1))); + else + emit_insn (gen_mulhisi3v (operands[0], operands[1], operands[2], + GEN_INT (0), GEN_INT (0))); + DONE; +}) + +(define_expand "smbt" + [(match_operand:SI 0 "register_operand" "") + (match_operand:V2HI 1 "register_operand" "") + (match_operand:V2HI 2 "register_operand" "")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_mulhisi3v (operands[0], operands[1], operands[2], + GEN_INT (1), GEN_INT (0))); + else + emit_insn (gen_mulhisi3v (operands[0], operands[1], operands[2], + GEN_INT (0), GEN_INT (1))); + DONE; +}) + +(define_expand "smtt" + [(match_operand:SI 0 "register_operand" "") + (match_operand:V2HI 1 "register_operand" "") + (match_operand:V2HI 2 "register_operand" "")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_mulhisi3v (operands[0], operands[1], operands[2], + GEN_INT (0), GEN_INT (0))); + else + emit_insn (gen_mulhisi3v (operands[0], operands[1], operands[2], + GEN_INT (1), GEN_INT (1))); + DONE; +}) + +(define_insn "mulhisi3v" + [(set (match_operand:SI 0 "register_operand" "= r, r, r, r") + (mult:SI + (sign_extend:SI + (vec_select:HI + (match_operand:V2HI 1 "register_operand" " r, r, r, r") + (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iv00, Iv00, Iv01, Iv01")]))) + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r, r, r, r") + (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iv00, Iv01, Iv01, Iv00")])))))] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + { + const char *pats[] = { "smtt\t%0, %1, %2", + "smbt\t%0, %2, %1", + "smbb\t%0, %1, %2", + "smbt\t%0, %1, %2" }; + return pats[which_alternative]; + } + else + { + const char *pats[] = { "smbb\t%0, %1, %2", + "smbt\t%0, %1, %2", + "smtt\t%0, %1, %2", + "smbt\t%0, %2, %1" }; + return pats[which_alternative]; + } +} + [(set_attr "type" "dmul") + (set_attr "length" "4")]) + +(define_expand "kmabb" + [(match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "register_operand" "") + (match_operand:V2HI 2 "register_operand" "") + (match_operand:V2HI 3 "register_operand" "")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_kma_internal (operands[0], operands[2], operands[3], + GEN_INT (1), GEN_INT (1), + operands[1])); + else + emit_insn (gen_kma_internal (operands[0], operands[2], operands[3], + GEN_INT (0), GEN_INT (0), + operands[1])); + DONE; +}) + +(define_expand "kmabt" + [(match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "register_operand" "") + (match_operand:V2HI 2 "register_operand" "") + (match_operand:V2HI 3 "register_operand" "")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_kma_internal (operands[0], operands[2], operands[3], + GEN_INT (1), GEN_INT (0), + operands[1])); + else + emit_insn (gen_kma_internal (operands[0], operands[2], operands[3], + GEN_INT (0), GEN_INT (1), + operands[1])); + DONE; +}) + +(define_expand "kmatt" + [(match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "register_operand" "") + (match_operand:V2HI 2 "register_operand" "") + (match_operand:V2HI 3 "register_operand" "")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_kma_internal (operands[0], operands[2], operands[3], + GEN_INT (0), GEN_INT (0), + operands[1])); + else + emit_insn (gen_kma_internal (operands[0], operands[2], operands[3], + GEN_INT (1), GEN_INT (1), + operands[1])); + DONE; +}) + +(define_insn "kma_internal" + [(set (match_operand:SI 0 "register_operand" "= r, r, r, r") + (ss_plus:SI + (mult:SI + (sign_extend:SI + (vec_select:HI + (match_operand:V2HI 1 "register_operand" " r, r, r, r") + (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iv00, Iv00, Iv01, Iv01")]))) + (sign_extend:SI + (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r, r, r, r") + (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iv00, Iv01, Iv01, Iv00")])))) + (match_operand:SI 5 "register_operand" " 0, 0, 0, 0")))] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + { + const char *pats[] = { "kmatt\t%0, %1, %2", + "kmabt\t%0, %2, %1", + "kmabb\t%0, %1, %2", + "kmabt\t%0, %1, %2" }; + return pats[which_alternative]; + } + else + { + const char *pats[] = { "kmabb\t%0, %1, %2", + "kmabt\t%0, %1, %2", + "kmatt\t%0, %1, %2", + "kmabt\t%0, %2, %1" }; + return pats[which_alternative]; + } +} + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_expand "smds" + [(match_operand:SI 0 "register_operand" "") + (match_operand:V2HI 1 "register_operand" "") + (match_operand:V2HI 2 "register_operand" "")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_smds_be (operands[0], operands[1], operands[2])); + else + emit_insn (gen_smds_le (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_expand "smds_le" + [(set (match_operand:SI 0 "register_operand" "=r") + (minus:SI + (mult:SI + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 1 "register_operand" " r") + (parallel [(const_int 1)]))) + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 1)])))) + (mult:SI + (sign_extend:SI (vec_select:HI + (match_dup 1) + (parallel [(const_int 0)]))) + (sign_extend:SI (vec_select:HI + (match_dup 2) + (parallel [(const_int 0)]))))))] + "NDS32_EXT_DSP_P ()" +{ +}) + +(define_expand "smds_be" + [(set (match_operand:SI 0 "register_operand" "=r") + (minus:SI + (mult:SI + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 1 "register_operand" " r") + (parallel [(const_int 0)]))) + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 0)])))) + (mult:SI + (sign_extend:SI (vec_select:HI + (match_dup 1) + (parallel [(const_int 1)]))) + (sign_extend:SI (vec_select:HI + (match_dup 2) + (parallel [(const_int 1)]))))))] + "NDS32_EXT_DSP_P ()" +{ +}) + +(define_expand "smdrs" + [(match_operand:SI 0 "register_operand" "") + (match_operand:V2HI 1 "register_operand" "") + (match_operand:V2HI 2 "register_operand" "")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_smdrs_be (operands[0], operands[1], operands[2])); + else + emit_insn (gen_smdrs_le (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_expand "smdrs_le" + [(set (match_operand:SI 0 "register_operand" "=r") + (minus:SI + (mult:SI + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 1 "register_operand" " r") + (parallel [(const_int 0)]))) + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 0)])))) + (mult:SI + (sign_extend:SI (vec_select:HI + (match_dup 1) + (parallel [(const_int 1)]))) + (sign_extend:SI (vec_select:HI + (match_dup 2) + (parallel [(const_int 1)]))))))] + "NDS32_EXT_DSP_P ()" +{ +}) + +(define_expand "smdrs_be" + [(set (match_operand:SI 0 "register_operand" "=r") + (minus:SI + (mult:SI + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 1 "register_operand" " r") + (parallel [(const_int 1)]))) + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 1)])))) + (mult:SI + (sign_extend:SI (vec_select:HI + (match_dup 1) + (parallel [(const_int 0)]))) + (sign_extend:SI (vec_select:HI + (match_dup 2) + (parallel [(const_int 0)]))))))] + "NDS32_EXT_DSP_P ()" +{ +}) + +(define_expand "smxdsv" + [(match_operand:SI 0 "register_operand" "") + (match_operand:V2HI 1 "register_operand" "") + (match_operand:V2HI 2 "register_operand" "")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_smxdsv_be (operands[0], operands[1], operands[2])); + else + emit_insn (gen_smxdsv_le (operands[0], operands[1], operands[2])); + DONE; +}) + + +(define_expand "smxdsv_le" + [(set (match_operand:SI 0 "register_operand" "=r") + (minus:SI + (mult:SI + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 1 "register_operand" " r") + (parallel [(const_int 1)]))) + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 0)])))) + (mult:SI + (sign_extend:SI (vec_select:HI + (match_dup 1) + (parallel [(const_int 0)]))) + (sign_extend:SI (vec_select:HI + (match_dup 2) + (parallel [(const_int 1)]))))))] + "NDS32_EXT_DSP_P ()" +{ +}) + +(define_expand "smxdsv_be" + [(set (match_operand:SI 0 "register_operand" "=r") + (minus:SI + (mult:SI + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 1 "register_operand" " r") + (parallel [(const_int 0)]))) + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 1)])))) + (mult:SI + (sign_extend:SI (vec_select:HI + (match_dup 1) + (parallel [(const_int 1)]))) + (sign_extend:SI (vec_select:HI + (match_dup 2) + (parallel [(const_int 0)]))))))] + "NDS32_EXT_DSP_P ()" +{ +}) + +(define_insn "smal1" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI (match_operand:DI 1 "register_operand" " r") + (sign_extend:DI + (mult:SI + (sign_extend:SI + (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 0)]))) + (sign_extend:SI + (vec_select:HI + (match_dup 2) + (parallel [(const_int 1)])))))))] + "NDS32_EXT_DSP_P ()" + "smal\t%0, %1, %2" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_insn "smal2" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI (match_operand:DI 1 "register_operand" " r") + (mult:DI + (sign_extend:DI + (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 0)]))) + (sign_extend:DI + (vec_select:HI + (match_dup 2) + (parallel [(const_int 1)]))))))] + "NDS32_EXT_DSP_P ()" + "smal\t%0, %1, %2" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_insn "smal3" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI (match_operand:DI 1 "register_operand" " r") + (sign_extend:DI + (mult:SI + (sign_extend:SI + (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 1)]))) + (sign_extend:SI + (vec_select:HI + (match_dup 2) + (parallel [(const_int 0)])))))))] + "NDS32_EXT_DSP_P ()" + "smal\t%0, %1, %2" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_insn "smal4" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI (match_operand:DI 1 "register_operand" " r") + (mult:DI + (sign_extend:DI + (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 1)]))) + (sign_extend:DI + (vec_select:HI + (match_dup 2) + (parallel [(const_int 0)]))))))] + "NDS32_EXT_DSP_P ()" + "smal\t%0, %1, %2" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_insn "smal5" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI + (sign_extend:DI + (mult:SI + (sign_extend:SI + (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 0)]))) + (sign_extend:SI + (vec_select:HI + (match_dup 2) + (parallel [(const_int 1)]))))) + (match_operand:DI 1 "register_operand" " r")))] + "NDS32_EXT_DSP_P ()" + "smal\t%0, %1, %2" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_insn "smal6" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI + (mult:DI + (sign_extend:DI + (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 0)]))) + (sign_extend:DI + (vec_select:HI + (match_dup 2) + (parallel [(const_int 1)])))) + (match_operand:DI 1 "register_operand" " r")))] + "NDS32_EXT_DSP_P ()" + "smal\t%0, %1, %2" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_insn "smal7" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI + (sign_extend:DI + (mult:SI + (sign_extend:SI + (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 1)]))) + (sign_extend:SI + (vec_select:HI + (match_dup 2) + (parallel [(const_int 0)]))))) + (match_operand:DI 1 "register_operand" " r")))] + "NDS32_EXT_DSP_P ()" + "smal\t%0, %1, %2" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_insn "smal8" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI + (mult:DI + (sign_extend:DI + (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 1)]))) + (sign_extend:DI + (vec_select:HI + (match_dup 2) + (parallel [(const_int 0)])))) + (match_operand:DI 1 "register_operand" " r")))] + "NDS32_EXT_DSP_P ()" + "smal\t%0, %1, %2" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +;; We need this dummy pattern for smal +(define_insn_and_split "extendsidi2" + [(set (match_operand:DI 0 "register_operand" "") + (sign_extend:DI (match_operand:SI 1 "nds32_move_operand" "")))] + "NDS32_EXT_DSP_P ()" + "#" + "NDS32_EXT_DSP_P ()" + [(const_int 0)] +{ + rtx high_part_dst, low_part_dst; + + low_part_dst = nds32_di_low_part_subreg (operands[0]); + high_part_dst = nds32_di_high_part_subreg (operands[0]); + + emit_move_insn (low_part_dst, operands[1]); + emit_insn (gen_ashrsi3 (high_part_dst, low_part_dst, GEN_INT (31))); + DONE; +} + [(set_attr "type" "alu") + (set_attr "length" "4")]) + +;; We need this dummy pattern for usmar64/usmsr64 +(define_insn_and_split "zero_extendsidi2" + [(set (match_operand:DI 0 "register_operand" "") + (zero_extend:DI (match_operand:SI 1 "nds32_move_operand" "")))] + "NDS32_EXT_DSP_P ()" + "#" + "NDS32_EXT_DSP_P ()" + [(const_int 0)] +{ + rtx high_part_dst, low_part_dst; + + low_part_dst = nds32_di_low_part_subreg (operands[0]); + high_part_dst = nds32_di_high_part_subreg (operands[0]); + + emit_move_insn (low_part_dst, operands[1]); + emit_move_insn (high_part_dst, const0_rtx); + DONE; +} + [(set_attr "type" "alu") + (set_attr "length" "4")]) + +(define_insn_and_split "extendhidi2" + [(set (match_operand:DI 0 "register_operand" "") + (sign_extend:DI (match_operand:HI 1 "nonimmediate_operand" "")))] + "NDS32_EXT_DSP_P ()" + "#" + "NDS32_EXT_DSP_P ()" + [(const_int 0)] +{ + rtx high_part_dst, low_part_dst; + + low_part_dst = nds32_di_low_part_subreg (operands[0]); + high_part_dst = nds32_di_high_part_subreg (operands[0]); + + + emit_insn (gen_extendhisi2 (low_part_dst, operands[1])); + emit_insn (gen_ashrsi3 (high_part_dst, low_part_dst, GEN_INT (31))); + DONE; +} + [(set_attr "type" "alu") + (set_attr "length" "4")]) + +(define_insn "extendqihi2" + [(set (match_operand:HI 0 "register_operand" "=r") + (sign_extend:HI (match_operand:QI 1 "register_operand" " r")))] + "NDS32_EXT_DSP_P ()" + "sunpkd820\t%0, %1" + [(set_attr "type" "dpack") + (set_attr "length" "4")]) + +(define_insn "smulsi3_highpart" + [(set (match_operand:SI 0 "register_operand" "=r") + (truncate:SI + (lshiftrt:DI + (mult:DI + (sign_extend:DI (match_operand:SI 1 "register_operand" " r")) + (sign_extend:DI (match_operand:SI 2 "register_operand" " r"))) + (const_int 32))))] + "NDS32_EXT_DSP_P ()" + "smmul\t%0, %1, %2" + [(set_attr "type" "dmul") + (set_attr "length" "4")]) + +(define_insn "smmul_round" + [(set (match_operand:SI 0 "register_operand" "=r") + (truncate:SI + (lshiftrt:DI + (unspec:DI [(mult:DI + (sign_extend:DI (match_operand:SI 1 "register_operand" " r")) + (sign_extend:DI (match_operand:SI 2 "register_operand" " r")))] + UNSPEC_ROUND) + (const_int 32))))] + "NDS32_EXT_DSP_P ()" + "smmul.u\t%0, %1, %2" + [(set_attr "type" "dmul") + (set_attr "length" "4")]) + +(define_insn "kmmac" + [(set (match_operand:SI 0 "register_operand" "=r") + (ss_plus:SI (match_operand:SI 1 "register_operand" " 0") + (truncate:SI + (lshiftrt:DI + (mult:DI + (sign_extend:DI (match_operand:SI 2 "register_operand" " r")) + (sign_extend:DI (match_operand:SI 3 "register_operand" " r"))) + (const_int 32)))))] + "NDS32_EXT_DSP_P ()" + "kmmac\t%0, %2, %3" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_insn "kmmac_round" + [(set (match_operand:SI 0 "register_operand" "=r") + (ss_plus:SI (match_operand:SI 1 "register_operand" " 0") + (truncate:SI + (lshiftrt:DI + (unspec:DI [(mult:DI + (sign_extend:DI (match_operand:SI 2 "register_operand" " r")) + (sign_extend:DI (match_operand:SI 3 "register_operand" " r")))] + UNSPEC_ROUND) + (const_int 32)))))] + "NDS32_EXT_DSP_P ()" + "kmmac.u\t%0, %2, %3" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_insn "kmmsb" + [(set (match_operand:SI 0 "register_operand" "=r") + (ss_minus:SI (match_operand:SI 1 "register_operand" " 0") + (truncate:SI + (lshiftrt:DI + (mult:DI + (sign_extend:DI (match_operand:SI 2 "register_operand" " r")) + (sign_extend:DI (match_operand:SI 3 "register_operand" " r"))) + (const_int 32)))))] + "NDS32_EXT_DSP_P ()" + "kmmsb\t%0, %2, %3" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_insn "kmmsb_round" + [(set (match_operand:SI 0 "register_operand" "=r") + (ss_minus:SI (match_operand:SI 1 "register_operand" " 0") + (truncate:SI + (lshiftrt:DI + (unspec:DI [(mult:DI + (sign_extend:DI (match_operand:SI 2 "register_operand" " r")) + (sign_extend:DI (match_operand:SI 3 "register_operand" " r")))] + UNSPEC_ROUND) + (const_int 32)))))] + "NDS32_EXT_DSP_P ()" + "kmmsb.u\t%0, %2, %3" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_insn "kwmmul" + [(set (match_operand:SI 0 "register_operand" "=r") + (truncate:SI + (lshiftrt:DI + (ss_mult:DI + (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" " r")) (const_int 2)) + (mult:DI (sign_extend:DI (match_operand:SI 2 "register_operand" " r")) (const_int 2))) + (const_int 32))))] + "NDS32_EXT_DSP_P ()" + "kwmmul\t%0, %1, %2" + [(set_attr "type" "dmul") + (set_attr "length" "4")]) + +(define_insn "kwmmul_round" + [(set (match_operand:SI 0 "register_operand" "=r") + (truncate:SI + (lshiftrt:DI + (unspec:DI [ + (ss_mult:DI + (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" " r")) (const_int 2)) + (mult:DI (sign_extend:DI (match_operand:SI 2 "register_operand" " r")) (const_int 2)))] + UNSPEC_ROUND) + (const_int 32))))] + "NDS32_EXT_DSP_P ()" + "kwmmul.u\t%0, %1, %2" + [(set_attr "type" "dmul") + (set_attr "length" "4")]) + +(define_expand "smmwb" + [(match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "register_operand" "") + (match_operand:V2HI 2 "register_operand" "")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_smulhisi3_highpart_1 (operands[0], operands[1], operands[2], GEN_INT (1))); + else + emit_insn (gen_smulhisi3_highpart_1 (operands[0], operands[1], operands[2], GEN_INT (0))); + DONE; +}) + +(define_expand "smmwt" + [(match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "register_operand" "") + (match_operand:V2HI 2 "register_operand" "")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_smulhisi3_highpart_1 (operands[0], operands[1], operands[2], GEN_INT (0))); + else + emit_insn (gen_smulhisi3_highpart_1 (operands[0], operands[1], operands[2], GEN_INT (1))); + DONE; +}) + +(define_insn "smulhisi3_highpart_1" + [(set (match_operand:SI 0 "register_operand" "= r, r") + (truncate:SI + (lshiftrt:DI + (mult:DI + (sign_extend:DI (match_operand:SI 1 "register_operand" " r, r")) + (sign_extend:DI + (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r, r") + (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iv00, Iv01")])))) + (const_int 16))))] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + { + const char *pats[] = { "smmwt\t%0, %1, %2", + "smmwb\t%0, %1, %2" }; + return pats[which_alternative]; + } + else + { + const char *pats[] = { "smmwb\t%0, %1, %2", + "smmwt\t%0, %1, %2" }; + return pats[which_alternative]; + } +} + [(set_attr "type" "dmul") + (set_attr "length" "4")]) + +(define_insn "smulhisi3_highpart_2" + [(set (match_operand:SI 0 "register_operand" "= r, r") + (truncate:SI + (lshiftrt:DI + (mult:DI + (sign_extend:DI + (vec_select:HI + (match_operand:V2HI 1 "register_operand" " r, r") + (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iv00, Iv01")]))) + (sign_extend:DI (match_operand:SI 2 "register_operand" " r, r"))) + (const_int 16))))] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + { + const char *pats[] = { "smmwt\t%0, %1, %2", + "smmwb\t%0, %1, %2" }; + return pats[which_alternative]; + } + else + { + const char *pats[] = { "smmwb\t%0, %1, %2", + "smmwt\t%0, %1, %2" }; + return pats[which_alternative]; + } +} + [(set_attr "type" "dmul") + (set_attr "length" "4")]) + +(define_expand "smmwb_round" + [(match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "register_operand" "") + (match_operand:V2HI 2 "register_operand" "")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_smmw_round_internal (operands[0], operands[1], operands[2], GEN_INT (1))); + else + emit_insn (gen_smmw_round_internal (operands[0], operands[1], operands[2], GEN_INT (0))); + DONE; +}) + +(define_expand "smmwt_round" + [(match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "register_operand" "") + (match_operand:V2HI 2 "register_operand" "")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_smmw_round_internal (operands[0], operands[1], operands[2], GEN_INT (0))); + else + emit_insn (gen_smmw_round_internal (operands[0], operands[1], operands[2], GEN_INT (1))); + DONE; +}) + +(define_insn "smmw_round_internal" + [(set (match_operand:SI 0 "register_operand" "= r, r") + (truncate:SI + (lshiftrt:DI + (unspec:DI + [(mult:DI + (sign_extend:DI (match_operand:SI 1 "register_operand" " r, r")) + (sign_extend:DI + (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r, r") + (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iv00, Iv01")]))))] + UNSPEC_ROUND) + (const_int 16))))] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + { + const char *pats[] = { "smmwt.u\t%0, %1, %2", + "smmwb.u\t%0, %1, %2" }; + return pats[which_alternative]; + } + else + { + const char *pats[] = { "smmwb.u\t%0, %1, %2", + "smmwt.u\t%0, %1, %2" }; + return pats[which_alternative]; + } +} + [(set_attr "type" "dmul") + (set_attr "length" "4")]) + +(define_expand "kmmawb" + [(match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "register_operand" "") + (match_operand:V2HI 3 "register_operand" "")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_kmmaw_internal (operands[0], operands[2], operands[3], GEN_INT (1), operands[1])); + else + emit_insn (gen_kmmaw_internal (operands[0], operands[2], operands[3], GEN_INT (0), operands[1])); + DONE; +}) + +(define_expand "kmmawt" + [(match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "register_operand" "") + (match_operand:V2HI 3 "register_operand" "")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_kmmaw_internal (operands[0], operands[2], operands[3], GEN_INT (0), operands[1])); + else + emit_insn (gen_kmmaw_internal (operands[0], operands[2], operands[3], GEN_INT (1), operands[1])); + DONE; +}) + +(define_insn "kmmaw_internal" + [(set (match_operand:SI 0 "register_operand" "= r, r") + (ss_plus:SI + (match_operand:SI 4 "register_operand" " 0, 0") + (truncate:SI + (lshiftrt:DI + (mult:DI + (sign_extend:DI (match_operand:SI 1 "register_operand" " r, r")) + (sign_extend:DI + (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r, r") + (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iv00, Iv01")])))) + (const_int 16)))))] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + { + const char *pats[] = { "kmmawt\t%0, %1, %2", + "kmmawb\t%0, %1, %2" }; + return pats[which_alternative]; + } + else + { + const char *pats[] = { "kmmawb\t%0, %1, %2", + "kmmawt\t%0, %1, %2" }; + return pats[which_alternative]; + } +} + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_expand "kmmawb_round" + [(match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "register_operand" "") + (match_operand:V2HI 3 "register_operand" "")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_kmmaw_round_internal (operands[0], operands[2], operands[3], GEN_INT (1), operands[1])); + else + emit_insn (gen_kmmaw_round_internal (operands[0], operands[2], operands[3], GEN_INT (0), operands[1])); + DONE; +} + [(set_attr "type" "alu") + (set_attr "length" "4")]) + +(define_expand "kmmawt_round" + [(match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "register_operand" "") + (match_operand:V2HI 3 "register_operand" "")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_kmmaw_round_internal (operands[0], operands[2], operands[3], GEN_INT (0), operands[1])); + else + emit_insn (gen_kmmaw_round_internal (operands[0], operands[2], operands[3], GEN_INT (1), operands[1])); + DONE; +} + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + + +(define_insn "kmmaw_round_internal" + [(set (match_operand:SI 0 "register_operand" "= r, r") + (ss_plus:SI + (match_operand:SI 4 "register_operand" " 0, 0") + (truncate:SI + (lshiftrt:DI + (unspec:DI + [(mult:DI + (sign_extend:DI (match_operand:SI 1 "register_operand" " r, r")) + (sign_extend:DI + (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r, r") + (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iv00, Iv01")]))))] + UNSPEC_ROUND) + (const_int 16)))))] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + { + const char *pats[] = { "kmmawt.u\t%0, %1, %2", + "kmmawb.u\t%0, %1, %2" }; + return pats[which_alternative]; + } + else + { + const char *pats[] = { "kmmawb.u\t%0, %1, %2", + "kmmawt.u\t%0, %1, %2" }; + return pats[which_alternative]; + } +} + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_expand "smalbb" + [(match_operand:DI 0 "register_operand" "") + (match_operand:DI 1 "register_operand" "") + (match_operand:V2HI 2 "register_operand" "") + (match_operand:V2HI 3 "register_operand" "")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_smaddhidi (operands[0], operands[2], + operands[3], operands[1], + GEN_INT (1), GEN_INT (1))); + else + emit_insn (gen_smaddhidi (operands[0], operands[2], + operands[3], operands[1], + GEN_INT (0), GEN_INT (0))); + DONE; +}) + +(define_expand "smalbt" + [(match_operand:DI 0 "register_operand" "") + (match_operand:DI 1 "register_operand" "") + (match_operand:V2HI 2 "register_operand" "") + (match_operand:V2HI 3 "register_operand" "")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_smaddhidi (operands[0], operands[2], + operands[3], operands[1], + GEN_INT (1), GEN_INT (0))); + else + emit_insn (gen_smaddhidi (operands[0], operands[2], + operands[3], operands[1], + GEN_INT (0), GEN_INT (1))); + DONE; +}) + +(define_expand "smaltt" + [(match_operand:DI 0 "register_operand" "") + (match_operand:DI 1 "register_operand" "") + (match_operand:V2HI 2 "register_operand" "") + (match_operand:V2HI 3 "register_operand" "")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_smaddhidi (operands[0], operands[2], + operands[3], operands[1], + GEN_INT (0), GEN_INT (0))); + else + emit_insn (gen_smaddhidi (operands[0], operands[2], + operands[3], operands[1], + GEN_INT (1), GEN_INT (1))); + DONE; +}) + +(define_insn "smaddhidi" + [(set (match_operand:DI 0 "register_operand" "= r, r, r, r") + (plus:DI + (match_operand:DI 3 "register_operand" " 0, 0, 0, 0") + (mult:DI + (sign_extend:DI + (vec_select:HI + (match_operand:V2HI 1 "register_operand" " r, r, r, r") + (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iv00, Iv00, Iv01, Iv01")]))) + (sign_extend:DI + (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r, r, r, r") + (parallel [(match_operand:SI 5 "nds32_imm_0_1_operand" " Iv00, Iv01, Iv01, Iv00")]))))))] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + { + const char *pats[] = { "smaltt\t%0, %1, %2", + "smalbt\t%0, %2, %1", + "smalbb\t%0, %1, %2", + "smalbt\t%0, %1, %2" }; + return pats[which_alternative]; + } + else + { + const char *pats[] = { "smalbb\t%0, %1, %2", + "smalbt\t%0, %1, %2", + "smaltt\t%0, %1, %2", + "smalbt\t%0, %2, %1" }; + return pats[which_alternative]; + } +} + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_insn "smaddhidi2" + [(set (match_operand:DI 0 "register_operand" "= r, r, r, r") + (plus:DI + (mult:DI + (sign_extend:DI + (vec_select:HI + (match_operand:V2HI 1 "register_operand" " r, r, r, r") + (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iv00, Iv00, Iv01, Iv01")]))) + (sign_extend:DI + (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r, r, r, r") + (parallel [(match_operand:SI 5 "nds32_imm_0_1_operand" " Iv00, Iv01, Iv01, Iv00")])))) + (match_operand:DI 3 "register_operand" " 0, 0, 0, 0")))] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + { + const char *pats[] = { "smaltt\t%0, %1, %2", + "smalbt\t%0, %2, %1", + "smalbb\t%0, %1, %2", + "smalbt\t%0, %1, %2" }; + return pats[which_alternative]; + } + else + { + const char *pats[] = { "smalbb\t%0, %1, %2", + "smalbt\t%0, %1, %2", + "smaltt\t%0, %1, %2", + "smalbt\t%0, %2, %1" }; + return pats[which_alternative]; + } +} + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_expand "smalda1" + [(match_operand:DI 0 "register_operand" "") + (match_operand:DI 1 "register_operand" "") + (match_operand:V2HI 2 "register_operand" " r") + (match_operand:V2HI 3 "register_operand" " r")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_smalda1_be (operands[0], operands[1], operands[2], operands[3])); + else + emit_insn (gen_smalda1_le (operands[0], operands[1], operands[2], operands[3])); + DONE; +}) + +(define_expand "smalds1" + [(match_operand:DI 0 "register_operand" "") + (match_operand:DI 1 "register_operand" "") + (match_operand:V2HI 2 "register_operand" " r") + (match_operand:V2HI 3 "register_operand" " r")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_smalds1_be (operands[0], operands[1], operands[2], operands[3])); + else + emit_insn (gen_smalds1_le (operands[0], operands[1], operands[2], operands[3])); + DONE; +}) + +(define_insn "smalda1_le" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI + (match_operand:DI 1 "register_operand" " 0") + (sign_extend:DI + (plus:SI + (mult:SI + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 1)]))) + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 3 "register_operand" " r") + (parallel [(const_int 1)])))) + (mult:SI + (sign_extend:SI (vec_select:HI + (match_dup 2) + (parallel [(const_int 0)]))) + (sign_extend:SI (vec_select:HI + (match_dup 3) + (parallel [(const_int 0)]))))))))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "smalda\t%0, %2, %3" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_insn "smalds1_le" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI + (match_operand:DI 1 "register_operand" " 0") + (sign_extend:DI + (minus:SI + (mult:SI + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 1)]))) + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 3 "register_operand" " r") + (parallel [(const_int 1)])))) + (mult:SI + (sign_extend:SI (vec_select:HI + (match_dup 2) + (parallel [(const_int 0)]))) + (sign_extend:SI (vec_select:HI + (match_dup 3) + (parallel [(const_int 0)]))))))))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "smalds\t%0, %2, %3" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_insn "smalda1_be" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI + (match_operand:DI 1 "register_operand" " 0") + (sign_extend:DI + (plus:SI + (mult:SI + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 0)]))) + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 3 "register_operand" " r") + (parallel [(const_int 0)])))) + (mult:SI + (sign_extend:SI (vec_select:HI + (match_dup 2) + (parallel [(const_int 1)]))) + (sign_extend:SI (vec_select:HI + (match_dup 3) + (parallel [(const_int 1)]))))))))] + "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" + "smalda\t%0, %2, %3" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_insn "smalds1_be" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI + (match_operand:DI 1 "register_operand" " 0") + (sign_extend:DI + (minus:SI + (mult:SI + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 0)]))) + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 3 "register_operand" " r") + (parallel [(const_int 0)])))) + (mult:SI + (sign_extend:SI (vec_select:HI + (match_dup 2) + (parallel [(const_int 1)]))) + (sign_extend:SI (vec_select:HI + (match_dup 3) + (parallel [(const_int 1)]))))))))] + "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" + "smalds\t%0, %2, %3" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_expand "smaldrs3" + [(match_operand:DI 0 "register_operand" "") + (match_operand:DI 1 "register_operand" "") + (match_operand:V2HI 2 "register_operand" " r") + (match_operand:V2HI 3 "register_operand" " r")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_smaldrs3_be (operands[0], operands[1], operands[2], operands[3])); + else + emit_insn (gen_smaldrs3_le (operands[0], operands[1], operands[2], operands[3])); + DONE; +}) + +(define_insn "smaldrs3_le" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI + (match_operand:DI 1 "register_operand" " 0") + (sign_extend:DI + (minus:SI + (mult:SI + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 0)]))) + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 3 "register_operand" " r") + (parallel [(const_int 0)])))) + (mult:SI + (sign_extend:SI (vec_select:HI + (match_dup 2) + (parallel [(const_int 1)]))) + (sign_extend:SI (vec_select:HI + (match_dup 3) + (parallel [(const_int 1)]))))))))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "smaldrs\t%0, %2, %3" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_insn "smaldrs3_be" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI + (match_operand:DI 1 "register_operand" " 0") + (sign_extend:DI + (minus:SI + (mult:SI + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 1)]))) + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 3 "register_operand" " r") + (parallel [(const_int 1)])))) + (mult:SI + (sign_extend:SI (vec_select:HI + (match_dup 2) + (parallel [(const_int 0)]))) + (sign_extend:SI (vec_select:HI + (match_dup 3) + (parallel [(const_int 0)]))))))))] + "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" + "smaldrs\t%0, %2, %3" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_expand "smalxda1" + [(match_operand:DI 0 "register_operand" "") + (match_operand:DI 1 "register_operand" "") + (match_operand:V2HI 2 "register_operand" " r") + (match_operand:V2HI 3 "register_operand" " r")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_smalxda1_be (operands[0], operands[1], operands[2], operands[3])); + else + emit_insn (gen_smalxda1_le (operands[0], operands[1], operands[2], operands[3])); + DONE; +}) + +(define_expand "smalxds1" + [(match_operand:DI 0 "register_operand" "") + (match_operand:DI 1 "register_operand" "") + (match_operand:V2HI 2 "register_operand" " r") + (match_operand:V2HI 3 "register_operand" " r")] + "NDS32_EXT_DSP_P ()" +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_smalxds1_be (operands[0], operands[1], operands[2], operands[3])); + else + emit_insn (gen_smalxds1_le (operands[0], operands[1], operands[2], operands[3])); + DONE; +}) + +(define_insn "smalxd1_le" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI + (match_operand:DI 1 "register_operand" " 0") + (sign_extend:DI + (plus_minus:SI + (mult:SI + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 1)]))) + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 3 "register_operand" " r") + (parallel [(const_int 0)])))) + (mult:SI + (sign_extend:SI (vec_select:HI + (match_dup 2) + (parallel [(const_int 0)]))) + (sign_extend:SI (vec_select:HI + (match_dup 3) + (parallel [(const_int 1)]))))))))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "smalxd\t%0, %2, %3" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + + +(define_insn "smalxd1_be" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI + (match_operand:DI 1 "register_operand" " 0") + (sign_extend:DI + (plus_minus:SI + (mult:SI + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 0)]))) + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 3 "register_operand" " r") + (parallel [(const_int 1)])))) + (mult:SI + (sign_extend:SI (vec_select:HI + (match_dup 2) + (parallel [(const_int 1)]))) + (sign_extend:SI (vec_select:HI + (match_dup 3) + (parallel [(const_int 0)]))))))))] + "NDS32_EXT_DSP_P () && TARGET_BIG_ENDIAN" + "smalxd\t%0, %2, %3" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_insn "smslda1" + [(set (match_operand:DI 0 "register_operand" "=r") + (minus:DI + (minus:DI + (match_operand:DI 1 "register_operand" " 0") + (sign_extend:DI + (mult:SI + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 1)]))) + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 3 "register_operand" " r") + (parallel [(const_int 1)])))))) + (sign_extend:DI + (mult:SI + (sign_extend:SI (vec_select:HI + (match_dup 2) + (parallel [(const_int 0)]))) + (sign_extend:SI (vec_select:HI + (match_dup 3) + (parallel [(const_int 0)])))))))] + "NDS32_EXT_DSP_P ()" + "smslda\t%0, %2, %3" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_insn "smslxda1" + [(set (match_operand:DI 0 "register_operand" "=r") + (minus:DI + (minus:DI + (match_operand:DI 1 "register_operand" " 0") + (sign_extend:DI + (mult:SI + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 1)]))) + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 3 "register_operand" " r") + (parallel [(const_int 0)])))))) + (sign_extend:DI + (mult:SI + (sign_extend:SI (vec_select:HI + (match_dup 2) + (parallel [(const_int 0)]))) + (sign_extend:SI (vec_select:HI + (match_dup 3) + (parallel [(const_int 1)])))))))] + "NDS32_EXT_DSP_P ()" + "smslxda\t%0, %2, %3" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +;; mada for synthetize smalda +(define_insn_and_split "mada1" + [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI + (mult:SI + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 1 "register_operand" "r") + (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iu01")]))) + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 2 "register_operand" "r") + (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iu01")])))) + (mult:SI + (sign_extend:SI (vec_select:HI + (match_dup 1) + (parallel [(match_operand:SI 5 "nds32_imm_0_1_operand" " Iu01")]))) + (sign_extend:SI (vec_select:HI + (match_dup 2) + (parallel [(match_operand:SI 6 "nds32_imm_0_1_operand" " Iu01")]))))))] + "NDS32_EXT_DSP_P () && !reload_completed" + "#" + "NDS32_EXT_DSP_P () && !reload_completed" + [(const_int 1)] +{ + rtx result0 = gen_reg_rtx (SImode); + rtx result1 = gen_reg_rtx (SImode); + emit_insn (gen_mulhisi3v (result0, operands[1], operands[2], + operands[3], operands[4])); + emit_insn (gen_mulhisi3v (result1, operands[1], operands[2], + operands[5], operands[6])); + emit_insn (gen_addsi3 (operands[0], result0, result1)); + DONE; +}) + +(define_insn_and_split "mada2" + [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI + (mult:SI + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 1 "register_operand" "r") + (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iu01")]))) + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 2 "register_operand" "r") + (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iu01")])))) + (mult:SI + (sign_extend:SI (vec_select:HI + (match_dup 2) + (parallel [(match_operand:SI 5 "nds32_imm_0_1_operand" " Iu01")]))) + (sign_extend:SI (vec_select:HI + (match_dup 1) + (parallel [(match_operand:SI 6 "nds32_imm_0_1_operand" " Iu01")]))))))] + "NDS32_EXT_DSP_P () && !reload_completed" + "#" + "NDS32_EXT_DSP_P () && !reload_completed" + [(const_int 1)] +{ + rtx result0 = gen_reg_rtx (SImode); + rtx result1 = gen_reg_rtx (SImode); + emit_insn (gen_mulhisi3v (result0, operands[1], operands[2], + operands[3], operands[4])); + emit_insn (gen_mulhisi3v (result1, operands[1], operands[2], + operands[6], operands[5])); + emit_insn (gen_addsi3 (operands[0], result0, result1)); + DONE; +}) + +;; sms for synthetize smalds +(define_insn_and_split "sms1" + [(set (match_operand:SI 0 "register_operand" "= r") + (minus:SI + (mult:SI + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 1 "register_operand" " r") + (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iu01")]))) + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iu01")])))) + (mult:SI + (sign_extend:SI (vec_select:HI + (match_dup 1) + (parallel [(match_operand:SI 5 "nds32_imm_0_1_operand" " Iu01")]))) + (sign_extend:SI (vec_select:HI + (match_dup 2) + (parallel [(match_operand:SI 6 "nds32_imm_0_1_operand" " Iu01")]))))))] + "NDS32_EXT_DSP_P () + && (!reload_completed + || !nds32_need_split_sms_p (operands[3], operands[4], + operands[5], operands[6]))" + +{ + return nds32_output_sms (operands[3], operands[4], + operands[5], operands[6]); +} + "NDS32_EXT_DSP_P () + && !reload_completed + && nds32_need_split_sms_p (operands[3], operands[4], + operands[5], operands[6])" + [(const_int 1)] +{ + nds32_split_sms (operands[0], operands[1], operands[2], + operands[3], operands[4], + operands[5], operands[6]); + DONE; +} + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_insn_and_split "sms2" + [(set (match_operand:SI 0 "register_operand" "= r") + (minus:SI + (mult:SI + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 1 "register_operand" " r") + (parallel [(match_operand:SI 3 "nds32_imm_0_1_operand" " Iu01")]))) + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(match_operand:SI 4 "nds32_imm_0_1_operand" " Iu01")])))) + (mult:SI + (sign_extend:SI (vec_select:HI + (match_dup 2) + (parallel [(match_operand:SI 5 "nds32_imm_0_1_operand" " Iu01")]))) + (sign_extend:SI (vec_select:HI + (match_dup 1) + (parallel [(match_operand:SI 6 "nds32_imm_0_1_operand" " Iu01")]))))))] + "NDS32_EXT_DSP_P () + && (!reload_completed + || !nds32_need_split_sms_p (operands[3], operands[4], + operands[6], operands[5]))" +{ + return nds32_output_sms (operands[3], operands[4], + operands[6], operands[5]); +} + "NDS32_EXT_DSP_P () + && !reload_completed + && nds32_need_split_sms_p (operands[3], operands[4], + operands[6], operands[5])" + [(const_int 1)] +{ + nds32_split_sms (operands[0], operands[1], operands[2], + operands[3], operands[4], + operands[6], operands[5]); + DONE; +} + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_insn "kmda" + [(set (match_operand:SI 0 "register_operand" "=r") + (ss_plus:SI + (mult:SI + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 1 "register_operand" "r") + (parallel [(const_int 1)]))) + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 2 "register_operand" "r") + (parallel [(const_int 1)])))) + (mult:SI + (sign_extend:SI (vec_select:HI + (match_dup 1) + (parallel [(const_int 0)]))) + (sign_extend:SI (vec_select:HI + (match_dup 2) + (parallel [(const_int 0)]))))))] + "NDS32_EXT_DSP_P ()" + "kmda\t%0, %1, %2" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_insn "kmxda" + [(set (match_operand:SI 0 "register_operand" "=r") + (ss_plus:SI + (mult:SI + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 1 "register_operand" "r") + (parallel [(const_int 1)]))) + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 2 "register_operand" "r") + (parallel [(const_int 0)])))) + (mult:SI + (sign_extend:SI (vec_select:HI + (match_dup 1) + (parallel [(const_int 0)]))) + (sign_extend:SI (vec_select:HI + (match_dup 2) + (parallel [(const_int 1)]))))))] + "NDS32_EXT_DSP_P ()" + "kmxda\t%0, %1, %2" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_insn "kmada" + [(set (match_operand:SI 0 "register_operand" "=r") + (ss_plus:SI + (match_operand:SI 1 "register_operand" " 0") + (ss_plus:SI + (mult:SI + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 1)]))) + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 3 "register_operand" " r") + (parallel [(const_int 1)])))) + (mult:SI + (sign_extend:SI (vec_select:HI + (match_dup 2) + (parallel [(const_int 0)]))) + (sign_extend:SI (vec_select:HI + (match_dup 3) + (parallel [(const_int 0)])))))))] + "NDS32_EXT_DSP_P ()" + "kmada\t%0, %2, %3" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_insn "kmada2" + [(set (match_operand:SI 0 "register_operand" "=r") + (ss_plus:SI + (match_operand:SI 1 "register_operand" " 0") + (ss_plus:SI + (mult:SI + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 0)]))) + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 3 "register_operand" " r") + (parallel [(const_int 0)])))) + (mult:SI + (sign_extend:SI (vec_select:HI + (match_dup 2) + (parallel [(const_int 1)]))) + (sign_extend:SI (vec_select:HI + (match_dup 3) + (parallel [(const_int 1)])))))))] + "NDS32_EXT_DSP_P ()" + "kmada\t%0, %2, %3" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_insn "kmaxda" + [(set (match_operand:SI 0 "register_operand" "=r") + (ss_plus:SI + (match_operand:SI 1 "register_operand" " 0") + (ss_plus:SI + (mult:SI + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 1)]))) + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 3 "register_operand" " r") + (parallel [(const_int 0)])))) + (mult:SI + (sign_extend:SI (vec_select:HI + (match_dup 2) + (parallel [(const_int 0)]))) + (sign_extend:SI (vec_select:HI + (match_dup 3) + (parallel [(const_int 1)])))))))] + "NDS32_EXT_DSP_P ()" + "kmaxda\t%0, %2, %3" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_insn "kmads" + [(set (match_operand:SI 0 "register_operand" "=r") + (ss_plus:SI + (match_operand:SI 1 "register_operand" " 0") + (ss_minus:SI + (mult:SI + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 1)]))) + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 3 "register_operand" " r") + (parallel [(const_int 1)])))) + (mult:SI + (sign_extend:SI (vec_select:HI + (match_dup 2) + (parallel [(const_int 0)]))) + (sign_extend:SI (vec_select:HI + (match_dup 3) + (parallel [(const_int 0)])))))))] + "NDS32_EXT_DSP_P ()" + "kmads\t%0, %2, %3" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_insn "kmadrs" + [(set (match_operand:SI 0 "register_operand" "=r") + (ss_plus:SI + (match_operand:SI 1 "register_operand" " 0") + (ss_minus:SI + (mult:SI + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 0)]))) + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 3 "register_operand" " r") + (parallel [(const_int 0)])))) + (mult:SI + (sign_extend:SI (vec_select:HI + (match_dup 2) + (parallel [(const_int 1)]))) + (sign_extend:SI (vec_select:HI + (match_dup 3) + (parallel [(const_int 1)])))))))] + "NDS32_EXT_DSP_P ()" + "kmadrs\t%0, %2, %3" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_insn "kmaxds" + [(set (match_operand:SI 0 "register_operand" "=r") + (ss_plus:SI + (match_operand:SI 1 "register_operand" " 0") + (ss_minus:SI + (mult:SI + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 1)]))) + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 3 "register_operand" " r") + (parallel [(const_int 0)])))) + (mult:SI + (sign_extend:SI (vec_select:HI + (match_dup 2) + (parallel [(const_int 0)]))) + (sign_extend:SI (vec_select:HI + (match_dup 3) + (parallel [(const_int 1)])))))))] + "NDS32_EXT_DSP_P ()" + "kmaxds\t%0, %2, %3" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_insn "kmsda" + [(set (match_operand:SI 0 "register_operand" "=r") + (ss_minus:SI + (match_operand:SI 1 "register_operand" " 0") + (ss_minus:SI + (mult:SI + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 1)]))) + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 3 "register_operand" " r") + (parallel [(const_int 1)])))) + (mult:SI + (sign_extend:SI (vec_select:HI + (match_dup 2) + (parallel [(const_int 0)]))) + (sign_extend:SI (vec_select:HI + (match_dup 3) + (parallel [(const_int 0)])))))))] + "NDS32_EXT_DSP_P ()" + "kmsda\t%0, %2, %3" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_insn "kmsxda" + [(set (match_operand:SI 0 "register_operand" "=r") + (ss_minus:SI + (match_operand:SI 1 "register_operand" " 0") + (ss_minus:SI + (mult:SI + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 1)]))) + (sign_extend:SI (vec_select:HI + (match_operand:V2HI 3 "register_operand" " r") + (parallel [(const_int 0)])))) + (mult:SI + (sign_extend:SI (vec_select:HI + (match_dup 2) + (parallel [(const_int 0)]))) + (sign_extend:SI (vec_select:HI + (match_dup 3) + (parallel [(const_int 1)])))))))] + "NDS32_EXT_DSP_P ()" + "kmsxda\t%0, %2, %3" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +;; smax[8|16] and umax[8|16] +(define_insn "3" + [(set (match_operand:VQIHI 0 "register_operand" "=r") + (sumax:VQIHI (match_operand:VQIHI 1 "register_operand" " r") + (match_operand:VQIHI 2 "register_operand" " r")))] + "NDS32_EXT_DSP_P ()" + "\t%0, %1, %2" + [(set_attr "type" "dalu") + (set_attr "length" "4")]) + +;; smin[8|16] and umin[8|16] +(define_insn "3" + [(set (match_operand:VQIHI 0 "register_operand" "=r") + (sumin:VQIHI (match_operand:VQIHI 1 "register_operand" " r") + (match_operand:VQIHI 2 "register_operand" " r")))] + "NDS32_EXT_DSP_P ()" + "\t%0, %1, %2" + [(set_attr "type" "dalu") + (set_attr "length" "4")]) + +(define_insn "3_bb" + [(set (match_operand: 0 "register_operand" "=r") + (sumin_max: (vec_select: + (match_operand:VQIHI 1 "register_operand" " r") + (parallel [(const_int 0)])) + (vec_select: + (match_operand:VQIHI 2 "register_operand" " r") + (parallel [(const_int 0)]))))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "\t%0, %1, %2" + [(set_attr "type" "dalu") + (set_attr "length" "4")]) + +(define_insn_and_split "3_tt" + [(set (match_operand: 0 "register_operand" "=r") + (sumin_max: (vec_select: + (match_operand:VQIHI 1 "register_operand" " r") + (parallel [(const_int 1)])) + (vec_select: + (match_operand:VQIHI 2 "register_operand" " r") + (parallel [(const_int 1)]))))] + "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN" + "#" + "NDS32_EXT_DSP_P () && !reload_completed" + [(const_int 0)] +{ + rtx tmp = gen_reg_rtx (mode); + emit_insn (gen_3 (tmp, operands[1], operands[2])); + emit_insn (gen_rotr_1 (tmp, tmp)); + emit_move_insn (operands[0], simplify_gen_subreg (mode, tmp, mode, 0)); + DONE; +} + [(set_attr "type" "dalu") + (set_attr "length" "4")]) + +(define_insn_and_split "v4qi3_22" + [(set (match_operand:QI 0 "register_operand" "=r") + (sumin_max:QI (vec_select:QI + (match_operand:V4QI 1 "register_operand" " r") + (parallel [(const_int 2)])) + (vec_select:QI + (match_operand:V4QI 2 "register_operand" " r") + (parallel [(const_int 2)]))))] + "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN" + "#" + "NDS32_EXT_DSP_P () && !reload_completed" + [(const_int 0)] +{ + rtx tmp = gen_reg_rtx (V4QImode); + emit_insn (gen_v4qi3 (tmp, operands[1], operands[2])); + emit_insn (gen_rotrv4qi_2 (tmp, tmp)); + emit_move_insn (operands[0], simplify_gen_subreg (QImode, tmp, V4QImode, 0)); + DONE; +} + [(set_attr "type" "dalu") + (set_attr "length" "4")]) + +(define_insn_and_split "v4qi3_33" + [(set (match_operand:QI 0 "register_operand" "=r") + (sumin_max:QI (vec_select:QI + (match_operand:V4QI 1 "register_operand" " r") + (parallel [(const_int 3)])) + (vec_select:QI + (match_operand:V4QI 2 "register_operand" " r") + (parallel [(const_int 3)]))))] + "NDS32_EXT_DSP_P () && !reload_completed && !TARGET_BIG_ENDIAN" + "#" + "NDS32_EXT_DSP_P () && !reload_completed" + [(const_int 0)] +{ + rtx tmp = gen_reg_rtx (V4QImode); + emit_insn (gen_v4qi3 (tmp, operands[1], operands[2])); + emit_insn (gen_rotrv4qi_3 (tmp, tmp)); + emit_move_insn (operands[0], simplify_gen_subreg (QImode, tmp, V4QImode, 0)); + DONE; +} + [(set_attr "type" "dalu") + (set_attr "length" "4")]) + +(define_insn_and_split "v2hi3_bbtt" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (vec_merge:V2HI + (vec_duplicate:V2HI + (sumin_max:HI (vec_select:HI + (match_operand:V2HI 1 "register_operand" " r") + (parallel [(const_int 1)])) + (vec_select:HI + (match_operand:V2HI 2 "register_operand" " r") + (parallel [(const_int 1)])))) + (vec_duplicate:V2HI + (sumin_max:HI (vec_select:HI + (match_dup:V2HI 1) + (parallel [(const_int 0)])) + (vec_select:HI + (match_dup:V2HI 2) + (parallel [(const_int 0)])))) + (const_int 2)))] + "NDS32_EXT_DSP_P () && !TARGET_BIG_ENDIAN" + "#" + "NDS32_EXT_DSP_P ()" + [(const_int 0)] +{ + emit_insn (gen_v2hi3 (operands[0], operands[1], operands[2])); + DONE; +} + [(set_attr "type" "dalu") + (set_attr "length" "4")]) + +(define_expand "abs2" + [(set (match_operand:VQIHI 0 "register_operand" "=r") + (ss_abs:VQIHI (match_operand:VQIHI 1 "register_operand" " r")))] + "NDS32_EXT_DSP_P () && TARGET_HW_ABS && !flag_wrapv" +{ +}) + +(define_insn "kabs2" + [(set (match_operand:VQIHI 0 "register_operand" "=r") + (ss_abs:VQIHI (match_operand:VQIHI 1 "register_operand" " r")))] + "NDS32_EXT_DSP_P ()" + "kabs\t%0, %1" + [(set_attr "type" "dalu") + (set_attr "length" "4")]) + +(define_insn "mar64_1" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI + (match_operand:DI 1 "register_operand" " 0") + (mult:DI + (extend:DI + (match_operand:SI 2 "register_operand" " r")) + (extend:DI + (match_operand:SI 3 "register_operand" " r")))))] + "NDS32_EXT_DSP_P ()" + "mar64\t%0, %2, %3" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_insn "mar64_2" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI + (mult:DI + (extend:DI + (match_operand:SI 2 "register_operand" " r")) + (extend:DI + (match_operand:SI 3 "register_operand" " r"))) + (match_operand:DI 1 "register_operand" " 0")))] + "NDS32_EXT_DSP_P ()" + "mar64\t%0, %2, %3" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_insn "mar64_3" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI + (match_operand:DI 1 "register_operand" " 0") + (extend:DI + (mult:SI + (match_operand:SI 2 "register_operand" " r") + (match_operand:SI 3 "register_operand" " r")))))] + "NDS32_EXT_DSP_P ()" + "mar64\t%0, %2, %3" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_insn "mar64_4" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI + (extend:DI + (mult:SI + (match_operand:SI 2 "register_operand" " r") + (match_operand:SI 3 "register_operand" " r"))) + (match_operand:DI 1 "register_operand" " 0")))] + "NDS32_EXT_DSP_P ()" + "mar64\t%0, %2, %3" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_insn "msr64" + [(set (match_operand:DI 0 "register_operand" "=r") + (minus:DI + (match_operand:DI 1 "register_operand" " 0") + (mult:DI + (extend:DI + (match_operand:SI 2 "register_operand" " r")) + (extend:DI + (match_operand:SI 3 "register_operand" " r")))))] + "NDS32_EXT_DSP_P ()" + "msr64\t%0, %2, %3" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_insn "msr64_2" + [(set (match_operand:DI 0 "register_operand" "=r") + (minus:DI + (match_operand:DI 1 "register_operand" " 0") + (extend:DI + (mult:SI + (match_operand:SI 2 "register_operand" " r") + (match_operand:SI 3 "register_operand" " r")))))] + "NDS32_EXT_DSP_P ()" + "msr64\t%0, %2, %3" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +;; kmar64, kmsr64, ukmar64 and ukmsr64 +(define_insn "kmar64_1" + [(set (match_operand:DI 0 "register_operand" "=r") + (ss_plus:DI + (match_operand:DI 1 "register_operand" " 0") + (mult:DI + (sign_extend:DI + (match_operand:SI 2 "register_operand" " r")) + (sign_extend:DI + (match_operand:SI 3 "register_operand" " r")))))] + "NDS32_EXT_DSP_P ()" + "kmar64\t%0, %2, %3" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_insn "kmar64_2" + [(set (match_operand:DI 0 "register_operand" "=r") + (ss_plus:DI + (mult:DI + (sign_extend:DI + (match_operand:SI 2 "register_operand" " r")) + (sign_extend:DI + (match_operand:SI 3 "register_operand" " r"))) + (match_operand:DI 1 "register_operand" " 0")))] + "NDS32_EXT_DSP_P ()" + "kmar64\t%0, %2, %3" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_insn "kmsr64" + [(set (match_operand:DI 0 "register_operand" "=r") + (ss_minus:DI + (match_operand:DI 1 "register_operand" " 0") + (mult:DI + (sign_extend:DI + (match_operand:SI 2 "register_operand" " r")) + (sign_extend:DI + (match_operand:SI 3 "register_operand" " r")))))] + "NDS32_EXT_DSP_P ()" + "kmsr64\t%0, %2, %3" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_insn "ukmar64_1" + [(set (match_operand:DI 0 "register_operand" "=r") + (us_plus:DI + (match_operand:DI 1 "register_operand" " 0") + (mult:DI + (zero_extend:DI + (match_operand:SI 2 "register_operand" " r")) + (zero_extend:DI + (match_operand:SI 3 "register_operand" " r")))))] + "NDS32_EXT_DSP_P ()" + "ukmar64\t%0, %2, %3" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_insn "ukmar64_2" + [(set (match_operand:DI 0 "register_operand" "=r") + (us_plus:DI + (mult:DI + (zero_extend:DI + (match_operand:SI 2 "register_operand" " r")) + (zero_extend:DI + (match_operand:SI 3 "register_operand" " r"))) + (match_operand:DI 1 "register_operand" " 0")))] + "NDS32_EXT_DSP_P ()" + "ukmar64\t%0, %2, %3" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_insn "ukmsr64" + [(set (match_operand:DI 0 "register_operand" "=r") + (us_minus:DI + (match_operand:DI 1 "register_operand" " 0") + (mult:DI + (zero_extend:DI + (match_operand:SI 2 "register_operand" " r")) + (zero_extend:DI + (match_operand:SI 3 "register_operand" " r")))))] + "NDS32_EXT_DSP_P ()" + "ukmsr64\t%0, %2, %3" + [(set_attr "type" "dmac") + (set_attr "length" "4")]) + +(define_insn "bpick1" + [(set (match_operand:SI 0 "register_operand" "=r") + (ior:SI + (and:SI + (match_operand:SI 1 "register_operand" " r") + (match_operand:SI 3 "register_operand" " r")) + (and:SI + (match_operand:SI 2 "register_operand" " r") + (not:SI (match_dup 3)))))] + "NDS32_EXT_DSP_P ()" + "bpick\t%0, %1, %2, %3" + [(set_attr "type" "dbpick") + (set_attr "length" "4")]) + +(define_insn "bpick2" + [(set (match_operand:SI 0 "register_operand" "=r") + (ior:SI + (and:SI + (match_operand:SI 1 "register_operand" " r") + (match_operand:SI 2 "register_operand" " r")) + (and:SI + (not:SI (match_dup 2)) + (match_operand:SI 3 "register_operand" " r"))))] + "NDS32_EXT_DSP_P ()" + "bpick\t%0, %1, %3, %2" + [(set_attr "type" "dbpick") + (set_attr "length" "4")]) + +(define_insn "bpick3" + [(set (match_operand:SI 0 "register_operand" "=r") + (ior:SI + (and:SI + (match_operand:SI 1 "register_operand" " r") + (match_operand:SI 2 "register_operand" " r")) + (and:SI + (match_operand:SI 3 "register_operand" " r") + (not:SI (match_dup 1)))))] + "NDS32_EXT_DSP_P ()" + "bpick\t%0, %2, %3, %1" + [(set_attr "type" "dbpick") + (set_attr "length" "4")]) + +(define_insn "bpick4" + [(set (match_operand:SI 0 "register_operand" "=r") + (ior:SI + (and:SI + (match_operand:SI 1 "register_operand" " r") + (match_operand:SI 2 "register_operand" " r")) + (and:SI + (not:SI (match_dup 1)) + (match_operand:SI 3 "register_operand" " r"))))] + "NDS32_EXT_DSP_P ()" + "bpick\t%0, %2, %3, %1" + [(set_attr "type" "dbpick") + (set_attr "length" "4")]) + +(define_insn "bpick5" + [(set (match_operand:SI 0 "register_operand" "=r") + (ior:SI + (and:SI + (match_operand:SI 1 "register_operand" " r") + (not:SI (match_operand:SI 2 "register_operand" " r"))) + (and:SI + (match_operand:SI 3 "register_operand" " r") + (match_dup 2))))] + "NDS32_EXT_DSP_P ()" + "bpick\t%0, %3, %1, %2" + [(set_attr "type" "dbpick") + (set_attr "length" "4")]) + +(define_insn "bpick6" + [(set (match_operand:SI 0 "register_operand" "=r") + (ior:SI + (and:SI + (not:SI (match_operand:SI 1 "register_operand" " r")) + (match_operand:SI 2 "register_operand" " r")) + (and:SI + (match_operand:SI 3 "register_operand" " r") + (match_dup 1))))] + "NDS32_EXT_DSP_P ()" + "bpick\t%0, %3, %2, %1" + [(set_attr "type" "dbpick") + (set_attr "length" "4")]) + +(define_insn "bpick7" + [(set (match_operand:SI 0 "register_operand" "=r") + (ior:SI + (and:SI + (match_operand:SI 1 "register_operand" " r") + (not:SI (match_operand:SI 2 "register_operand" " r"))) + (and:SI + (match_dup 2) + (match_operand:SI 3 "register_operand" " r"))))] + "NDS32_EXT_DSP_P ()" + "bpick\t%0, %3, %1, %2" + [(set_attr "type" "dbpick") + (set_attr "length" "4")]) + +(define_insn "bpick8" + [(set (match_operand:SI 0 "register_operand" "=r") + (ior:SI + (and:SI + (not:SI (match_operand:SI 1 "register_operand" " r")) + (match_operand:SI 2 "register_operand" " r")) + (and:SI + (match_dup 1) + (match_operand:SI 3 "register_operand" " r"))))] + "NDS32_EXT_DSP_P ()" + "bpick\t%0, %3, %2, %1" + [(set_attr "type" "dbpick") + (set_attr "length" "4")]) + +(define_insn "sraiu" + [(set (match_operand:SI 0 "register_operand" "= r, r") + (unspec:SI [(ashiftrt:SI (match_operand:SI 1 "register_operand" " r, r") + (match_operand:SI 2 "nds32_rimm5u_operand" " Iu05, r"))] + UNSPEC_ROUND))] + "NDS32_EXT_DSP_P ()" + "@ + srai.u\t%0, %1, %2 + sra.u\t%0, %1, %2" + [(set_attr "type" "daluround") + (set_attr "length" "4")]) + +(define_insn "kssl" + [(set (match_operand:SI 0 "register_operand" "= r, r") + (ss_ashift:SI (match_operand:SI 1 "register_operand" " r, r") + (match_operand:SI 2 "nds32_rimm5u_operand" " Iu05, r")))] + "NDS32_EXT_DSP_P ()" + "@ + kslli\t%0, %1, %2 + ksll\t%0, %1, %2" + [(set_attr "type" "dalu") + (set_attr "length" "4")]) + +(define_insn "kslraw_round" + [(set (match_operand:SI 0 "register_operand" "=r") + (if_then_else:SI + (lt:SI (match_operand:SI 2 "register_operand" " r") + (const_int 0)) + (unspec:SI [(ashiftrt:SI (match_operand:SI 1 "register_operand" " r") + (neg:SI (match_dup 2)))] + UNSPEC_ROUND) + (ss_ashift:SI (match_dup 1) + (match_dup 2))))] + "NDS32_EXT_DSP_P ()" + "kslraw.u\t%0, %1, %2" + [(set_attr "type" "daluround") + (set_attr "length" "4")]) + +(define_insn_and_split "di3" + [(set (match_operand:DI 0 "register_operand" "") + (shift_rotate:DI (match_operand:DI 1 "register_operand" "") + (match_operand:SI 2 "nds32_rimm6u_operand" "")))] + "NDS32_EXT_DSP_P () && !reload_completed" + "#" + "NDS32_EXT_DSP_P () && !reload_completed" + [(const_int 0)] +{ + if (REGNO (operands[0]) == REGNO (operands[1])) + { + rtx tmp = gen_reg_rtx (DImode); + nds32_split_di3 (tmp, operands[1], operands[2]); + emit_move_insn (operands[0], tmp); + } + else + nds32_split_di3 (operands[0], operands[1], operands[2]); + DONE; +}) + +(define_insn "sclip32" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "immediate_operand" "i")] UNSPEC_CLIPS_OV))] + "NDS32_EXT_DSP_P ()" + "sclip32\t%0, %1, %2" + [(set_attr "type" "dclip") + (set_attr "length" "4")] +) + +(define_insn "uclip32" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "immediate_operand" "i")] UNSPEC_CLIP_OV))] + "NDS32_EXT_DSP_P ()" + "uclip32\t%0, %1, %2" + [(set_attr "type" "dclip") + (set_attr "length" "4")] +) + +(define_insn "bitrev" + [(set (match_operand:SI 0 "register_operand" "=r, r") + (unspec:SI [(match_operand:SI 1 "register_operand" " r, r") + (match_operand:SI 2 "nds32_rimm5u_operand" " r, Iu05")] + UNSPEC_BITREV))] + "" + "@ + bitrev\t%0, %1, %2 + bitrevi\t%0, %1, %2" + [(set_attr "type" "dalu") + (set_attr "length" "4")] +) + +;; wext, wexti +(define_insn "wext" + [(set (match_operand:SI 0 "register_operand" "=r, r") + (truncate:SI + (shiftrt:DI + (match_operand:DI 1 "register_operand" " r, r") + (match_operand:SI 2 "nds32_rimm5u_operand" " r,Iu05"))))] + "NDS32_EXT_DSP_P ()" + "@ + wext\t%0, %1, %2 + wexti\t%0, %1, %2" + [(set_attr "type" "dwext") + (set_attr "length" "4")]) + +;; 32-bit add/sub instruction: raddw and rsubw. +(define_insn "rsi3" + [(set (match_operand:SI 0 "register_operand" "=r") + (truncate:SI + (ashiftrt:DI + (plus_minus:DI + (sign_extend:DI (match_operand:SI 1 "register_operand" " r")) + (sign_extend:DI (match_operand:SI 2 "register_operand" " r"))) + (const_int 1))))] + "NDS32_EXT_DSP_P ()" + "rw\t%0, %1, %2" + [(set_attr "type" "dalu") + (set_attr "length" "4")]) + +;; 32-bit add/sub instruction: uraddw and ursubw. +(define_insn "ursi3" + [(set (match_operand:SI 0 "register_operand" "=r") + (truncate:SI + (lshiftrt:DI + (plus_minus:DI + (zero_extend:DI (match_operand:SI 1 "register_operand" " r")) + (zero_extend:DI (match_operand:SI 2 "register_operand" " r"))) + (const_int 1))))] + "NDS32_EXT_DSP_P ()" + "urw\t%0, %1, %2" + [(set_attr "type" "dalu") + (set_attr "length" "4")]) diff --git a/gcc/config/nds32/nds32-elf.opt b/gcc/config/nds32/nds32-elf.opt new file mode 100644 index 0000000..afe6aad --- /dev/null +++ b/gcc/config/nds32/nds32-elf.opt @@ -0,0 +1,16 @@ +mcmodel= +Target RejectNegative Joined Enum(nds32_cmodel_type) Var(nds32_cmodel_option) Init(CMODEL_MEDIUM) +Specify the address generation strategy for code model. + +Enum +Name(nds32_cmodel_type) Type(enum nds32_cmodel_type) +Known cmodel types (for use with the -mcmodel= option): + +EnumValue +Enum(nds32_cmodel_type) String(small) Value(CMODEL_SMALL) + +EnumValue +Enum(nds32_cmodel_type) String(medium) Value(CMODEL_MEDIUM) + +EnumValue +Enum(nds32_cmodel_type) String(large) Value(CMODEL_LARGE) diff --git a/gcc/config/nds32/nds32-fp-as-gp.c b/gcc/config/nds32/nds32-fp-as-gp.c index 95c9586..26d2865 100644 --- a/gcc/config/nds32/nds32-fp-as-gp.c +++ b/gcc/config/nds32/nds32-fp-as-gp.c @@ -26,19 +26,256 @@ #include "system.h" #include "coretypes.h" #include "backend.h" +#include "hard-reg-set.h" +#include "tm_p.h" +#include "rtl.h" +#include "memmodel.h" +#include "emit-rtl.h" +#include "insn-config.h" +#include "regs.h" +#include "hard-reg-set.h" +#include "ira.h" +#include "ira-int.h" +#include "df.h" +#include "tree-core.h" +#include "tree-pass.h" +#include "nds32-protos.h" /* ------------------------------------------------------------------------ */ +/* A helper function to check if this function should contain prologue. */ +static bool +nds32_have_prologue_p (void) +{ + int i; + + for (i = 0; i < 28; i++) + if (NDS32_REQUIRED_CALLEE_SAVED_P (i)) + return true; + + return (flag_pic + || NDS32_REQUIRED_CALLEE_SAVED_P (FP_REGNUM) + || NDS32_REQUIRED_CALLEE_SAVED_P (LP_REGNUM)); +} + +static int +nds32_get_symbol_count (void) +{ + int symbol_count = 0; + rtx_insn *insn; + basic_block bb; + + FOR_EACH_BB_FN (bb, cfun) + { + FOR_BB_INSNS (bb, insn) + { + /* Counting the insn number which the addressing mode is symbol. */ + if (single_set (insn) && nds32_symbol_load_store_p (insn)) + { + rtx pattern = PATTERN (insn); + rtx mem; + gcc_assert (GET_CODE (pattern) == SET); + if (GET_CODE (SET_SRC (pattern)) == REG ) + mem = SET_DEST (pattern); + else + mem = SET_SRC (pattern); + + /* We have only lwi37 and swi37 for fp-as-gp optimization, + so don't count any other than SImode. + MEM for QImode and HImode will wrap by ZERO_EXTEND + or SIGN_EXTEND */ + if (GET_CODE (mem) == MEM) + symbol_count++; + } + } + } + + return symbol_count; +} + /* Function to determine whether it is worth to do fp_as_gp optimization. - Return 0: It is NOT worth to do fp_as_gp optimization. - Return 1: It is APPROXIMATELY worth to do fp_as_gp optimization. + Return false: It is NOT worth to do fp_as_gp optimization. + Return true: It is APPROXIMATELY worth to do fp_as_gp optimization. Note that if it is worth to do fp_as_gp optimization, we MUST set FP_REGNUM ever live in this function. */ -int +static bool nds32_fp_as_gp_check_available (void) { - /* By default we return 0. */ - return 0; + basic_block bb; + basic_block exit_bb; + edge_iterator ei; + edge e; + bool first_exit_blocks_p; + + /* If there exists ANY of following conditions, + we DO NOT perform fp_as_gp optimization: + 1. TARGET_FORBID_FP_AS_GP is set + regardless of the TARGET_FORCE_FP_AS_GP. + 2. User explicitly uses 'naked'/'no_prologue' attribute. + We use nds32_naked_function_p() to help such checking. + 3. Not optimize for size. + 4. Need frame pointer. + 5. If $fp is already required to be saved, + it means $fp is already choosen by register allocator. + Thus we better not to use it for fp_as_gp optimization. + 6. This function is a vararg function. + DO NOT apply fp_as_gp optimization on this function + because it may change and break stack frame. + 7. The epilogue is empty. + This happens when the function uses exit() + or its attribute is no_return. + In that case, compiler will not expand epilogue + so that we have no chance to output .omit_fp_end directive. */ + if (TARGET_FORBID_FP_AS_GP + || nds32_naked_function_p (current_function_decl) + || !optimize_size + || frame_pointer_needed + || NDS32_REQUIRED_CALLEE_SAVED_P (FP_REGNUM) + || (cfun->stdarg == 1) + || (find_fallthru_edge (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) == NULL)) + return false; + + /* Disable fp_as_gp if there is any infinite loop since the fp may + reuse in infinite loops by register rename. + For check infinite loops we should make sure exit_bb is post dominate + all other basic blocks if there is no infinite loops. */ + first_exit_blocks_p = true; + exit_bb = NULL; + + FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) + { + /* More than one exit block also do not perform fp_as_gp optimization. */ + if (!first_exit_blocks_p) + return false; + + exit_bb = e->src; + first_exit_blocks_p = false; + } + + /* Not found exit_bb? just abort fp_as_gp! */ + if (!exit_bb) + return false; + + /* Each bb should post dominate by exit_bb if there is no infinite loop! */ + FOR_EACH_BB_FN (bb, cfun) + { + if (!dominated_by_p (CDI_POST_DOMINATORS, + bb, + exit_bb)) + return false; + } + + /* Now we can check the possibility of using fp_as_gp optimization. */ + if (TARGET_FORCE_FP_AS_GP) + { + /* User explicitly issues -mforce-fp-as-gp option. */ + return true; + } + else + { + /* In the following we are going to evaluate whether + it is worth to do fp_as_gp optimization. */ + bool good_gain = false; + int symbol_count; + + int threshold; + + /* We check if there already requires prologue. + Note that $gp will be saved in prologue for PIC code generation. + After that, we can set threshold by the existence of prologue. + Each fp-implied instruction will gain 2-byte code size + from gp-aware instruction, so we have following heuristics. */ + if (flag_pic + || nds32_have_prologue_p ()) + { + /* Have-prologue: + Compiler already intends to generate prologue content, + so the fp_as_gp optimization will only insert + 'la $fp,_FP_BASE_' instruction, which will be + converted into 4-byte instruction at link time. + The threshold is "3" symbol accesses, 2 + 2 + 2 > 4. */ + threshold = 3; + } + else + { + /* None-prologue: + Compiler originally does not generate prologue content, + so the fp_as_gp optimization will NOT ONLY insert + 'la $fp,_FP_BASE' instruction, but also causes + push/pop instructions. + If we are using v3push (push25/pop25), + the threshold is "5" symbol accesses, 5*2 > 4 + 2 + 2; + If we are using normal push (smw/lmw), + the threshold is "5+2" symbol accesses 7*2 > 4 + 4 + 4. */ + threshold = 5 + (TARGET_V3PUSH ? 0 : 2); + } + + symbol_count = nds32_get_symbol_count (); + + if (symbol_count >= threshold) + good_gain = true; + + /* Enable fp_as_gp optimization when potential gain is good enough. */ + return good_gain; + } +} + +static unsigned int +nds32_fp_as_gp (void) +{ + bool fp_as_gp_p; + calculate_dominance_info (CDI_POST_DOMINATORS); + fp_as_gp_p = nds32_fp_as_gp_check_available (); + + /* Here is a hack to IRA for enable/disable a hard register per function. + We *MUST* review this way after migrate gcc 4.9! */ + if (fp_as_gp_p) { + SET_HARD_REG_BIT(this_target_ira_int->x_no_unit_alloc_regs, FP_REGNUM); + df_set_regs_ever_live (FP_REGNUM, 1); + } else { + CLEAR_HARD_REG_BIT(this_target_ira_int->x_no_unit_alloc_regs, FP_REGNUM); + } + + cfun->machine->fp_as_gp_p = fp_as_gp_p; + + free_dominance_info (CDI_POST_DOMINATORS); + return 1; +} + +const pass_data pass_data_nds32_fp_as_gp = +{ + RTL_PASS, /* type */ + "fp_as_gp", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + TV_MACH_DEP, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + 0 /* todo_flags_finish */ +}; + +class pass_nds32_fp_as_gp : public rtl_opt_pass +{ +public: + pass_nds32_fp_as_gp (gcc::context *ctxt) + : rtl_opt_pass (pass_data_nds32_fp_as_gp, ctxt) + {} + + /* opt_pass methods: */ + bool gate (function *) + { + return !TARGET_LINUX_ABI + && TARGET_16_BIT + && optimize_size; + } + unsigned int execute (function *) { return nds32_fp_as_gp (); } +}; + +rtl_opt_pass * +make_pass_nds32_fp_as_gp (gcc::context *ctxt) +{ + return new pass_nds32_fp_as_gp (ctxt); } /* ------------------------------------------------------------------------ */ diff --git a/gcc/config/nds32/nds32-fpu.md b/gcc/config/nds32/nds32-fpu.md index 719b042..9b84402 100644 --- a/gcc/config/nds32/nds32-fpu.md +++ b/gcc/config/nds32/nds32-fpu.md @@ -1,5 +1,5 @@ ;; Machine description of Andes NDS32 cpu for GNU compiler -;; Copyright (C) 2012-2015 Free Software Foundation, Inc. +;; Copyright (C) 2012-2018 Free Software Foundation, Inc. ;; Contributed by Andes Technology Corporation. ;; ;; This file is part of GCC. diff --git a/gcc/config/nds32/nds32-graywolf.md b/gcc/config/nds32/nds32-graywolf.md new file mode 100644 index 0000000..f0c98a6 --- /dev/null +++ b/gcc/config/nds32/nds32-graywolf.md @@ -0,0 +1,471 @@ +;; Pipeline descriptions of Andes NDS32 cpu for GNU compiler +;; Copyright (C) 2012-2013 Free Software Foundation, Inc. +;; Contributed by Andes Technology Corporation. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. +;; +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;; ------------------------------------------------------------------------ +;; Define Graywolf pipeline settings. +;; ------------------------------------------------------------------------ + +(define_automaton "nds32_graywolf_machine") + +(define_cpu_unit "gw_ii_0" "nds32_graywolf_machine") +(define_cpu_unit "gw_ii_1" "nds32_graywolf_machine") +(define_cpu_unit "gw_ex_p0" "nds32_graywolf_machine") +(define_cpu_unit "gw_mm_p0" "nds32_graywolf_machine") +(define_cpu_unit "gw_wb_p0" "nds32_graywolf_machine") +(define_cpu_unit "gw_ex_p1" "nds32_graywolf_machine") +(define_cpu_unit "gw_mm_p1" "nds32_graywolf_machine") +(define_cpu_unit "gw_wb_p1" "nds32_graywolf_machine") +(define_cpu_unit "gw_iq_p2" "nds32_graywolf_machine") +(define_cpu_unit "gw_rf_p2" "nds32_graywolf_machine") +(define_cpu_unit "gw_e1_p2" "nds32_graywolf_machine") +(define_cpu_unit "gw_e2_p2" "nds32_graywolf_machine") +(define_cpu_unit "gw_e3_p2" "nds32_graywolf_machine") +(define_cpu_unit "gw_e4_p2" "nds32_graywolf_machine") + +(define_reservation "gw_ii" "gw_ii_0 | gw_ii_1") +(define_reservation "gw_ex" "gw_ex_p0 | gw_ex_p1") +(define_reservation "gw_mm" "gw_mm_p0 | gw_mm_p1") +(define_reservation "gw_wb" "gw_wb_p0 | gw_wb_p1") + +(define_reservation "gw_ii_all" "gw_ii_0 + gw_ii_1") + +(define_insn_reservation "nds_gw_unknown" 1 + (and (eq_attr "type" "unknown") + (eq_attr "pipeline_model" "graywolf")) + "gw_ii, gw_ex, gw_mm, gw_wb") + +(define_insn_reservation "nds_gw_misc" 1 + (and (eq_attr "type" "misc") + (eq_attr "pipeline_model" "graywolf")) + "gw_ii, gw_ex, gw_mm, gw_wb") + +(define_insn_reservation "nds_gw_mmu" 1 + (and (eq_attr "type" "mmu") + (eq_attr "pipeline_model" "graywolf")) + "gw_ii, gw_ex, gw_mm, gw_wb") + +(define_insn_reservation "nds_gw_alu" 1 + (and (and (eq_attr "type" "alu") + (match_test "!nds32::movd44_insn_p (insn)")) + (eq_attr "pipeline_model" "graywolf")) + "gw_ii, gw_ex, gw_mm, gw_wb") + +(define_insn_reservation "nds_gw_movd44" 1 + (and (and (eq_attr "type" "alu") + (match_test "nds32::movd44_insn_p (insn)")) + (eq_attr "pipeline_model" "graywolf")) + "gw_ii_1, gw_ex, gw_mm, gw_wb") + +(define_insn_reservation "nds_gw_alu_shift" 1 + (and (eq_attr "type" "alu_shift") + (eq_attr "pipeline_model" "graywolf")) + "gw_ii, gw_ex*2, gw_mm, gw_wb") + +(define_insn_reservation "nds_gw_pbsad" 1 + (and (eq_attr "type" "pbsad") + (eq_attr "pipeline_model" "graywolf")) + "gw_ii, gw_ex*3, gw_mm, gw_wb") + +(define_insn_reservation "nds_gw_pbsada" 1 + (and (eq_attr "type" "pbsada") + (eq_attr "pipeline_model" "graywolf")) + "gw_ii, gw_ex*3, gw_mm, gw_wb") + +(define_insn_reservation "nds_gw_load" 1 + (and (and (eq_attr "type" "load") + (match_test "!nds32::post_update_insn_p (insn)")) + (eq_attr "pipeline_model" "graywolf")) + "gw_ii_1, gw_ex_p1, gw_mm_p1, gw_wb_p1") + +(define_insn_reservation "nds_gw_load_2w" 1 + (and (and (eq_attr "type" "load") + (match_test "nds32::post_update_insn_p (insn)")) + (eq_attr "pipeline_model" "graywolf")) + "gw_ii_all, gw_ex_p1, gw_mm_p1, gw_wb_p1") + +(define_insn_reservation "nds_gw_store" 1 + (and (and (eq_attr "type" "store") + (match_test "!nds32::store_offset_reg_p (insn)")) + (eq_attr "pipeline_model" "graywolf")) + "gw_ii_1, gw_ex_p1, gw_mm_p1, gw_wb_p1") + +(define_insn_reservation "nds_gw_store_3r" 1 + (and (and (eq_attr "type" "store") + (match_test "nds32::store_offset_reg_p (insn)")) + (eq_attr "pipeline_model" "graywolf")) + "gw_ii_all, gw_ex_p1, gw_mm_p1, gw_wb_p1") + +(define_insn_reservation "nds_gw_load_multiple_1" 1 + (and (and (eq_attr "type" "load_multiple") + (eq_attr "combo" "1")) + (eq_attr "pipeline_model" "graywolf")) + "gw_ii_1, gw_ex_p1, gw_mm_p1, gw_wb_p1") + +(define_insn_reservation "nds_gw_load_multiple_2" 1 + (and (and (eq_attr "type" "load_multiple") + (eq_attr "combo" "2")) + (eq_attr "pipeline_model" "graywolf")) + "gw_ii_1, gw_ex_p1*2, gw_mm_p1, gw_wb_p1") + +(define_insn_reservation "nds_gw_load_multiple_3" 1 + (and (and (eq_attr "type" "load_multiple") + (eq_attr "combo" "3")) + (eq_attr "pipeline_model" "graywolf")) + "gw_ii_1, gw_ex_p1*3, gw_mm_p1, gw_wb_p1") + +(define_insn_reservation "nds_gw_load_multiple_4" 1 + (and (and (eq_attr "type" "load_multiple") + (eq_attr "combo" "4")) + (eq_attr "pipeline_model" "graywolf")) + "gw_ii_1, gw_ex_p1*4, gw_mm_p1, gw_wb_p1") + +(define_insn_reservation "nds_gw_load_multiple_5" 1 + (and (and (eq_attr "type" "load_multiple") + (eq_attr "combo" "5")) + (eq_attr "pipeline_model" "graywolf")) + "gw_ii_1, gw_ex_p1*4, gw_mm_p1, gw_wb_p1") + +(define_insn_reservation "nds_gw_load_multiple_6" 1 + (and (and (eq_attr "type" "load_multiple") + (eq_attr "combo" "6")) + (eq_attr "pipeline_model" "graywolf")) + "gw_ii_1, gw_ex_p1*4, gw_mm_p1, gw_wb_p1") + +(define_insn_reservation "nds_gw_load_multiple_7" 1 + (and (and (eq_attr "type" "load_multiple") + (eq_attr "combo" "7")) + (eq_attr "pipeline_model" "graywolf")) + "gw_ii_1, gw_ex_p1*4, gw_mm_p1, gw_wb_p1") + +(define_insn_reservation "nds_gw_load_multiple_8" 1 + (and (and (eq_attr "type" "load_multiple") + (eq_attr "combo" "8")) + (eq_attr "pipeline_model" "graywolf")) + "gw_ii_1, gw_ex_p1*4, gw_mm_p1, gw_wb_p1") + +(define_insn_reservation "nds_gw_load_multiple_12" 1 + (and (and (eq_attr "type" "load_multiple") + (eq_attr "combo" "12")) + (eq_attr "pipeline_model" "graywolf")) + "gw_ii_1, gw_ex_p1*4, gw_mm_p1, gw_wb_p1") + +(define_insn_reservation "nds_gw_store_multiple_1" 1 + (and (and (eq_attr "type" "store_multiple") + (eq_attr "combo" "1")) + (eq_attr "pipeline_model" "graywolf")) + "gw_ii_1, gw_ex_p1, gw_mm_p1, gw_wb_p1") + +(define_insn_reservation "nds_gw_store_multiple_2" 1 + (and (and (eq_attr "type" "store_multiple") + (eq_attr "combo" "2")) + (eq_attr "pipeline_model" "graywolf")) + "gw_ii_1, gw_ex_p1*2, gw_mm_p1, gw_wb_p1") + +(define_insn_reservation "nds_gw_store_multiple_3" 1 + (and (and (eq_attr "type" "store_multiple") + (eq_attr "combo" "3")) + (eq_attr "pipeline_model" "graywolf")) + "gw_ii_1, gw_ex_p1*3, gw_mm_p1, gw_wb_p1") + +(define_insn_reservation "nds_gw_store_multiple_4" 1 + (and (and (eq_attr "type" "store_multiple") + (eq_attr "combo" "4")) + (eq_attr "pipeline_model" "graywolf")) + "gw_ii_1, gw_ex_p1*4, gw_mm_p1, gw_wb_p1") + +(define_insn_reservation "nds_gw_store_multiple_5" 1 + (and (and (eq_attr "type" "store_multiple") + (eq_attr "combo" "5")) + (eq_attr "pipeline_model" "graywolf")) + "gw_ii_1, gw_ex_p1*4, gw_mm_p1, gw_wb_p1") + +(define_insn_reservation "nds_gw_store_multiple_6" 1 + (and (and (eq_attr "type" "store_multiple") + (eq_attr "combo" "6")) + (eq_attr "pipeline_model" "graywolf")) + "gw_ii_1, gw_ex_p1*4, gw_mm_p1, gw_wb_p1") + +(define_insn_reservation "nds_gw_store_multiple_7" 1 + (and (and (eq_attr "type" "store_multiple") + (eq_attr "combo" "7")) + (eq_attr "pipeline_model" "graywolf")) + "gw_ii_1, gw_ex_p1*4, gw_mm_p1, gw_wb_p1") + +(define_insn_reservation "nds_gw_store_multiple_8" 1 + (and (and (eq_attr "type" "store_multiple") + (eq_attr "combo" "8")) + (eq_attr "pipeline_model" "graywolf")) + "gw_ii_1, gw_ex_p1*4, gw_mm_p1, gw_wb_p1") + +(define_insn_reservation "nds_gw_store_multiple_12" 1 + (and (and (eq_attr "type" "store_multiple") + (eq_attr "combo" "12")) + (eq_attr "pipeline_model" "graywolf")) + "gw_ii_1, gw_ex_p1*4, gw_mm_p1, gw_wb_p1") + +(define_insn_reservation "nds_gw_mul_fast1" 1 + (and (match_test "nds32_mul_config == MUL_TYPE_FAST_1") + (and (eq_attr "type" "mul") + (eq_attr "pipeline_model" "graywolf"))) + "gw_ii_0, gw_ex_p0, gw_mm_p0, gw_wb_p0") + +(define_insn_reservation "nds_gw_mul_fast2" 1 + (and (match_test "nds32_mul_config == MUL_TYPE_FAST_2") + (and (eq_attr "type" "mul") + (eq_attr "pipeline_model" "graywolf"))) + "gw_ii_0, gw_ex_p0*2, gw_mm_p0, gw_wb_p0") + +(define_insn_reservation "nds_gw_mul_slow" 1 + (and (match_test "nds32_mul_config == MUL_TYPE_SLOW") + (and (eq_attr "type" "mul") + (eq_attr "pipeline_model" "graywolf"))) + "gw_ii_0, gw_ex_p0*4, gw_mm_p0, gw_wb_p0") + +(define_insn_reservation "nds_gw_mac_fast1" 1 + (and (match_test "nds32_mul_config == MUL_TYPE_FAST_1") + (and (eq_attr "type" "mac") + (eq_attr "pipeline_model" "graywolf"))) + "gw_ii_all, gw_ex_p0, gw_mm_p0, gw_wb_p0") + +(define_insn_reservation "nds_gw_mac_fast2" 1 + (and (match_test "nds32_mul_config == MUL_TYPE_FAST_2") + (and (eq_attr "type" "mac") + (eq_attr "pipeline_model" "graywolf"))) + "gw_ii_all, gw_ex_p0*2, gw_mm_p0, gw_wb_p0") + +(define_insn_reservation "nds_gw_mac_slow" 1 + (and (match_test "nds32_mul_config == MUL_TYPE_SLOW") + (and (eq_attr "type" "mac") + (eq_attr "pipeline_model" "graywolf"))) + "gw_ii_all, gw_ex_p0*4, gw_mm_p0, gw_wb_p0") + +(define_insn_reservation "nds_gw_div" 1 + (and (and (eq_attr "type" "div") + (match_test "!nds32::divmod_p (insn)")) + (eq_attr "pipeline_model" "graywolf")) + "gw_ii_0, gw_ex_p0*4, gw_mm_p0, gw_wb_p0") + +(define_insn_reservation "nds_gw_div_2w" 1 + (and (and (eq_attr "type" "div") + (match_test "nds32::divmod_p (insn)")) + (eq_attr "pipeline_model" "graywolf")) + "gw_ii_all, gw_ex_p0*4, gw_mm_p0, gw_wb_p0") + +(define_insn_reservation "nds_gw_branch" 1 + (and (eq_attr "type" "branch") + (eq_attr "pipeline_model" "graywolf")) + "gw_ii_0, gw_ex_p0, gw_mm_p0, gw_wb_p0") + +(define_insn_reservation "nds_gw_dsp_alu" 1 + (and (eq_attr "type" "dalu") + (eq_attr "pipeline_model" "graywolf")) + "gw_ii, gw_ex, gw_mm, gw_wb") + +(define_insn_reservation "nds_gw_dsp_alu64" 1 + (and (eq_attr "type" "dalu64") + (eq_attr "pipeline_model" "graywolf")) + "gw_ii_all, gw_ex_p0, gw_mm_p0, gw_wb_p0") + +(define_insn_reservation "nds_gw_dsp_alu_round" 1 + (and (eq_attr "type" "daluround") + (eq_attr "pipeline_model" "graywolf")) + "gw_ii_0, gw_ex_p0, gw_mm_p0, gw_wb_p0") + +(define_insn_reservation "nds_gw_dsp_cmp" 1 + (and (eq_attr "type" "dcmp") + (eq_attr "pipeline_model" "graywolf")) + "gw_ii_0, gw_ex_p0, gw_mm_p0, gw_wb_p0") + +(define_insn_reservation "nds_gw_dsp_clip" 1 + (and (eq_attr "type" "dclip") + (eq_attr "pipeline_model" "graywolf")) + "gw_ii_0, gw_ex_p0, gw_mm_p0, gw_wb_p0") + +(define_insn_reservation "nds_gw_dsp_mul" 1 + (and (eq_attr "type" "dmul") + (eq_attr "pipeline_model" "graywolf")) + "gw_ii_0, gw_ex_p0, gw_mm_p0, gw_wb_p0") + +(define_insn_reservation "nds_gw_dsp_mac" 1 + (and (eq_attr "type" "dmac") + (eq_attr "pipeline_model" "graywolf")) + "gw_ii_all, gw_ex_p0, gw_mm_p0, gw_wb_p0") + +(define_insn_reservation "nds_gw_dsp_insb" 1 + (and (eq_attr "type" "dinsb") + (eq_attr "pipeline_model" "graywolf")) + "gw_ii_0, gw_ex_p0, gw_mm_p0, gw_wb_p0") + +(define_insn_reservation "nds_gw_dsp_pack" 1 + (and (eq_attr "type" "dpack") + (eq_attr "pipeline_model" "graywolf")) + "gw_ii_0, gw_ex_p0, gw_mm_p0, gw_wb_p0") + +(define_insn_reservation "nds_gw_dsp_bpick" 1 + (and (eq_attr "type" "dbpick") + (eq_attr "pipeline_model" "graywolf")) + "gw_ii_0, gw_ex_p0, gw_mm_p0, gw_wb_p0") + +(define_insn_reservation "nds_gw_dsp_wext" 1 + (and (eq_attr "type" "dwext") + (eq_attr "pipeline_model" "graywolf")) + "gw_ii_all, gw_ex_p0, gw_mm_p0, gw_wb_p0") + +(define_insn_reservation "nds_gw_fpu_alu" 4 + (and (eq_attr "type" "falu") + (eq_attr "pipeline_model" "graywolf")) + "gw_ii, gw_iq_p2, gw_rf_p2, gw_e1_p2, gw_e2_p2, gw_e3_p2, gw_e4_p2") + +(define_insn_reservation "nds_gw_fpu_muls" 4 + (and (eq_attr "type" "fmuls") + (eq_attr "pipeline_model" "graywolf")) + "gw_ii, gw_iq_p2, gw_rf_p2, gw_e1_p2, gw_e2_p2, gw_e3_p2, gw_e4_p2") + +(define_insn_reservation "nds_gw_fpu_muld" 4 + (and (eq_attr "type" "fmuld") + (eq_attr "pipeline_model" "graywolf")) + "gw_ii, gw_iq_p2, gw_rf_p2, gw_e1_p2, gw_e2_p2*2, gw_e3_p2, gw_e4_p2") + +(define_insn_reservation "nds_gw_fpu_macs" 4 + (and (eq_attr "type" "fmacs") + (eq_attr "pipeline_model" "graywolf")) + "gw_ii, gw_iq_p2, gw_rf_p2, gw_e1_p2, gw_e2_p2*3, gw_e3_p2, gw_e4_p2") + +(define_insn_reservation "nds_gw_fpu_macd" 4 + (and (eq_attr "type" "fmacd") + (eq_attr "pipeline_model" "graywolf")) + "gw_ii, gw_iq_p2, gw_rf_p2, gw_e1_p2, gw_e2_p2*4, gw_e3_p2, gw_e4_p2") + +(define_insn_reservation "nds_gw_fpu_divs" 4 + (and (ior (eq_attr "type" "fdivs") + (eq_attr "type" "fsqrts")) + (eq_attr "pipeline_model" "graywolf")) + "gw_ii, gw_iq_p2, gw_rf_p2, gw_e1_p2, gw_e2_p2*14, gw_e3_p2, gw_e4_p2") + +(define_insn_reservation "nds_gw_fpu_divd" 4 + (and (ior (eq_attr "type" "fdivd") + (eq_attr "type" "fsqrtd")) + (eq_attr "pipeline_model" "graywolf")) + "gw_ii, gw_iq_p2, gw_rf_p2, gw_e1_p2, gw_e2_p2*28, gw_e3_p2, gw_e4_p2") + +(define_insn_reservation "nds_gw_fpu_fast_alu" 2 + (and (ior (eq_attr "type" "fcmp") + (ior (eq_attr "type" "fabs") + (ior (eq_attr "type" "fcpy") + (eq_attr "type" "fcmov")))) + (eq_attr "pipeline_model" "graywolf")) + "gw_ii, gw_iq_p2, gw_rf_p2, gw_e1_p2, gw_e2_p2, gw_e3_p2, gw_e4_p2") + +(define_insn_reservation "nds_gw_fpu_fmtsr" 1 + (and (eq_attr "type" "fmtsr") + (eq_attr "pipeline_model" "graywolf")) + "gw_ii, gw_iq_p2, gw_rf_p2, gw_e1_p2, gw_e2_p2, gw_e3_p2, gw_e4_p2") + +(define_insn_reservation "nds_gw_fpu_fmtdr" 1 + (and (eq_attr "type" "fmtdr") + (eq_attr "pipeline_model" "graywolf")) + "gw_ii, gw_ii+gw_iq_p2, gw_iq_p2+gw_rf_p2, gw_rf_p2+gw_e1_p2, gw_e1_p2+gw_e2_p2, gw_e2_p2+gw_e3_p2, gw_e3_p2+gw_e4_p2, gw_e4_p2") + +(define_insn_reservation "nds_gw_fpu_fmfsr" 1 + (and (eq_attr "type" "fmfsr") + (eq_attr "pipeline_model" "graywolf")) + "gw_ii, gw_iq_p2, gw_rf_p2, gw_e1_p2, gw_e2_p2, gw_e3_p2, gw_e4_p2") + +(define_insn_reservation "nds_gw_fpu_fmfdr" 1 + (and (eq_attr "type" "fmfdr") + (eq_attr "pipeline_model" "graywolf")) + "gw_ii, gw_ii+gw_iq_p2, gw_iq_p2+gw_rf_p2, gw_rf_p2+gw_e1_p2, gw_e1_p2+gw_e2_p2, gw_e2_p2+gw_e3_p2, gw_e3_p2+gw_e4_p2, gw_e4_p2") + +(define_insn_reservation "nds_gw_fpu_load" 3 + (and (eq_attr "type" "fload") + (eq_attr "pipeline_model" "graywolf")) + "gw_ii, gw_iq_p2, gw_rf_p2, gw_e1_p2, gw_e2_p2, gw_e3_p2, gw_e4_p2") + +(define_insn_reservation "nds_gw_fpu_store" 1 + (and (eq_attr "type" "fstore") + (eq_attr "pipeline_model" "graywolf")) + "gw_ii, gw_iq_p2, gw_rf_p2, gw_e1_p2, gw_e2_p2, gw_e3_p2, gw_e4_p2") + +;; FPU_ADDR_OUT -> FPU_ADDR_IN +;; Main pipeline rules don't need this because those default latency is 1. +(define_bypass 1 + "nds_gw_fpu_load, nds_gw_fpu_store" + "nds_gw_fpu_load, nds_gw_fpu_store" + "nds32_gw_ex_to_ex_p" +) + +;; LD, MUL, MAC, DIV, DALU64, DMUL, DMAC, DALUROUND, DBPICK, DWEXT +;; -> ALU, ALU_SHIFT_Rb, PBSAD, PBSADA_RaRb, MOVD44, MUL, MAC_RaRb, DIV, ADDR_IN, BR, MMU, +;; DALU, DALUROUND, DMUL, DMAC_RaRb, DPACK, DINSB, DCMP, DCLIP, WEXT_O, BPICK_RaRb +(define_bypass 2 + "nds_gw_load, nds_gw_load_2w,\ + nds_gw_mul_fast1, nds_gw_mul_fast2, nds_gw_mul_slow,\ + nds_gw_mac_fast1, nds_gw_mac_fast2, nds_gw_mac_slow,\ + nds_gw_div, nds_gw_div_2w,\ + nds_gw_dsp_alu64, nds_gw_dsp_mul, nds_gw_dsp_mac,\ + nds_gw_dsp_alu_round, nds_gw_dsp_bpick, nds_gw_dsp_wext" + "nds_gw_alu, nds_gw_movd44, nds_gw_alu_shift,\ + nds_gw_pbsad, nds_gw_pbsada,\ + nds_gw_mul_fast1, nds_gw_mul_fast2, nds_gw_mul_slow,\ + nds_gw_mac_fast1, nds_gw_mac_fast2, nds_gw_mac_slow,\ + nds_gw_branch,\ + nds_gw_div, nds_gw_div_2w,\ + nds_gw_load, nds_gw_load_2w, nds_gw_store, nds_gw_store_3r,\ + nds_gw_load_multiple_1,nds_gw_load_multiple_2, nds_gw_load_multiple_3,\ + nds_gw_load_multiple_4,nds_gw_load_multiple_5, nds_gw_load_multiple_6,\ + nds_gw_load_multiple_7,nds_gw_load_multiple_8, nds_gw_load_multiple_12,\ + nds_gw_store_multiple_1,nds_gw_store_multiple_2, nds_gw_store_multiple_3,\ + nds_gw_store_multiple_4,nds_gw_store_multiple_5, nds_gw_store_multiple_6,\ + nds_gw_store_multiple_7,nds_gw_store_multiple_8, nds_gw_store_multiple_12,\ + nds_gw_mmu,\ + nds_gw_dsp_alu, nds_gw_dsp_alu_round,\ + nds_gw_dsp_mul, nds_gw_dsp_mac, nds_gw_dsp_pack,\ + nds_gw_dsp_insb, nds_gw_dsp_cmp, nds_gw_dsp_clip,\ + nds_gw_dsp_wext, nds_gw_dsp_bpick" + "nds32_gw_mm_to_ex_p" +) + +;; LMW(N, N) +;; -> ALU, ALU_SHIFT_Rb, PBSAD, PBSADA_RaRb, MOVD44, MUL, MAC_RaRb, DIV, ADDR_IN, BR, MMU +;; DALU, DALUROUND, DMUL, DMAC_RaRb, DPACK, DINSB, DCMP, DCLIP, WEXT_O, BPICK_RaRb +(define_bypass 2 + "nds_gw_load_multiple_1,nds_gw_load_multiple_2, nds_gw_load_multiple_3,\ + nds_gw_load_multiple_4,nds_gw_load_multiple_5, nds_gw_load_multiple_6,\ + nds_gw_load_multiple_7,nds_gw_load_multiple_8, nds_gw_load_multiple_12" + "nds_gw_alu, nds_gw_movd44, nds_gw_alu_shift,\ + nds_gw_pbsad, nds_gw_pbsada,\ + nds_gw_mul_fast1, nds_gw_mul_fast2, nds_gw_mul_slow,\ + nds_gw_mac_fast1, nds_gw_mac_fast2, nds_gw_mac_slow,\ + nds_gw_branch,\ + nds_gw_div, nds_gw_div_2w,\ + nds_gw_load, nds_gw_load_2w, nds_gw_store, nds_gw_store_3r,\ + nds_gw_load_multiple_1,nds_gw_load_multiple_2, nds_gw_load_multiple_3,\ + nds_gw_load_multiple_4,nds_gw_load_multiple_5, nds_gw_load_multiple_6,\ + nds_gw_load_multiple_7,nds_gw_load_multiple_8, nds_gw_load_multiple_12,\ + nds_gw_store_multiple_1,nds_gw_store_multiple_2, nds_gw_store_multiple_3,\ + nds_gw_store_multiple_4,nds_gw_store_multiple_5, nds_gw_store_multiple_6,\ + nds_gw_store_multiple_7,nds_gw_store_multiple_8, nds_gw_store_multiple_12,\ + nds_gw_mmu,\ + nds_gw_dsp_alu, nds_gw_dsp_alu_round,\ + nds_gw_dsp_mul, nds_gw_dsp_mac, nds_gw_dsp_pack,\ + nds_gw_dsp_insb, nds_gw_dsp_cmp, nds_gw_dsp_clip,\ + nds_gw_dsp_wext, nds_gw_dsp_bpick" + "nds32_gw_last_load_to_ex_p" +) diff --git a/gcc/config/nds32/nds32-intrinsic.c b/gcc/config/nds32/nds32-intrinsic.c index b9bb2d9..c2ad927 100644 --- a/gcc/config/nds32/nds32-intrinsic.c +++ b/gcc/config/nds32/nds32-intrinsic.c @@ -519,6 +519,7 @@ static struct builtin_description bdesc_noarg[] = { NDS32_BUILTIN(unspec_fmfcfg, "fmfcfg", FMFCFG) NDS32_BUILTIN(unspec_fmfcsr, "fmfcsr", FMFCSR) + NDS32_BUILTIN(unspec_volatile_rdov, "rdov", RDOV) NDS32_BUILTIN(unspec_get_current_sp, "get_current_sp", GET_CURRENT_SP) NDS32_BUILTIN(unspec_return_address, "return_address", RETURN_ADDRESS) NDS32_BUILTIN(unspec_get_all_pending_int, "get_all_pending_int", @@ -558,6 +559,31 @@ static struct builtin_description bdesc_1arg[] = NDS32_NO_TARGET_BUILTIN(unspec_ret_itoff, "ret_itoff", RET_ITOFF) NDS32_NO_TARGET_BUILTIN(unspec_set_current_sp, "set_current_sp", SET_CURRENT_SP) + NDS32_BUILTIN(kabsv2hi2, "kabs16", KABS16) + NDS32_BUILTIN(kabsv2hi2, "v_kabs16", V_KABS16) + NDS32_BUILTIN(kabsv4qi2, "kabs8", KABS8) + NDS32_BUILTIN(kabsv4qi2, "v_kabs8", V_KABS8) + NDS32_BUILTIN(sunpkd810, "sunpkd810", SUNPKD810) + NDS32_BUILTIN(sunpkd810, "v_sunpkd810", V_SUNPKD810) + NDS32_BUILTIN(sunpkd820, "sunpkd820", SUNPKD820) + NDS32_BUILTIN(sunpkd820, "v_sunpkd820", V_SUNPKD820) + NDS32_BUILTIN(sunpkd830, "sunpkd830", SUNPKD830) + NDS32_BUILTIN(sunpkd830, "v_sunpkd830", V_SUNPKD830) + NDS32_BUILTIN(sunpkd831, "sunpkd831", SUNPKD831) + NDS32_BUILTIN(sunpkd831, "v_sunpkd831", V_SUNPKD831) + NDS32_BUILTIN(zunpkd810, "zunpkd810", ZUNPKD810) + NDS32_BUILTIN(zunpkd810, "v_zunpkd810", V_ZUNPKD810) + NDS32_BUILTIN(zunpkd820, "zunpkd820", ZUNPKD820) + NDS32_BUILTIN(zunpkd820, "v_zunpkd820", V_ZUNPKD820) + NDS32_BUILTIN(zunpkd830, "zunpkd830", ZUNPKD830) + NDS32_BUILTIN(zunpkd830, "v_zunpkd830", V_ZUNPKD830) + NDS32_BUILTIN(zunpkd831, "zunpkd831", ZUNPKD831) + NDS32_BUILTIN(zunpkd831, "v_zunpkd831", V_ZUNPKD831) + NDS32_BUILTIN(unspec_kabs, "kabs", KABS) + NDS32_BUILTIN(unaligned_loadv2hi, "get_unaligned_u16x2", UALOAD_U16) + NDS32_BUILTIN(unaligned_loadv2hi, "get_unaligned_s16x2", UALOAD_S16) + NDS32_BUILTIN(unaligned_loadv4qi, "get_unaligned_u8x4", UALOAD_U8) + NDS32_BUILTIN(unaligned_loadv4qi, "get_unaligned_s8x4", UALOAD_S8) }; /* Intrinsics that take just one argument. and the argument is immediate. */ @@ -593,6 +619,28 @@ static struct builtin_description bdesc_2arg[] = NDS32_BUILTIN(unspec_ffb, "ffb", FFB) NDS32_BUILTIN(unspec_ffmism, "ffmsim", FFMISM) NDS32_BUILTIN(unspec_flmism, "flmism", FLMISM) + NDS32_BUILTIN(unspec_kaddw, "kaddw", KADDW) + NDS32_BUILTIN(unspec_kaddh, "kaddh", KADDH) + NDS32_BUILTIN(unspec_ksubw, "ksubw", KSUBW) + NDS32_BUILTIN(unspec_ksubh, "ksubh", KSUBH) + NDS32_BUILTIN(unspec_kdmbb, "kdmbb", KDMBB) + NDS32_BUILTIN(unspec_kdmbb, "v_kdmbb", V_KDMBB) + NDS32_BUILTIN(unspec_kdmbt, "kdmbt", KDMBT) + NDS32_BUILTIN(unspec_kdmbt, "v_kdmbt", V_KDMBT) + NDS32_BUILTIN(unspec_kdmtb, "kdmtb", KDMTB) + NDS32_BUILTIN(unspec_kdmtb, "v_kdmtb", V_KDMTB) + NDS32_BUILTIN(unspec_kdmtt, "kdmtt", KDMTT) + NDS32_BUILTIN(unspec_kdmtt, "v_kdmtt", V_KDMTT) + NDS32_BUILTIN(unspec_khmbb, "khmbb", KHMBB) + NDS32_BUILTIN(unspec_khmbb, "v_khmbb", V_KHMBB) + NDS32_BUILTIN(unspec_khmbt, "khmbt", KHMBT) + NDS32_BUILTIN(unspec_khmbt, "v_khmbt", V_KHMBT) + NDS32_BUILTIN(unspec_khmtb, "khmtb", KHMTB) + NDS32_BUILTIN(unspec_khmtb, "v_khmtb", V_KHMTB) + NDS32_BUILTIN(unspec_khmtt, "khmtt", KHMTT) + NDS32_BUILTIN(unspec_khmtt, "v_khmtt", V_KHMTT) + NDS32_BUILTIN(unspec_kslraw, "kslraw", KSLRAW) + NDS32_BUILTIN(unspec_kslrawu, "kslraw_u", KSLRAW_U) NDS32_BUILTIN(rotrsi3, "rotr", ROTR) NDS32_BUILTIN(unspec_sva, "sva", SVA) NDS32_BUILTIN(unspec_svs, "svs", SVS) @@ -603,7 +651,202 @@ static struct builtin_description bdesc_2arg[] = NDS32_NO_TARGET_BUILTIN(unaligned_store_hw, "unaligned_store_hw", UASTORE_HW) NDS32_NO_TARGET_BUILTIN(unaligned_storesi, "unaligned_store_hw", UASTORE_W) NDS32_NO_TARGET_BUILTIN(unaligned_storedi, "unaligned_store_hw", UASTORE_DW) - + NDS32_BUILTIN(addv2hi3, "add16", ADD16) + NDS32_BUILTIN(addv2hi3, "v_uadd16", V_UADD16) + NDS32_BUILTIN(addv2hi3, "v_sadd16", V_SADD16) + NDS32_BUILTIN(raddv2hi3, "radd16", RADD16) + NDS32_BUILTIN(raddv2hi3, "v_radd16", V_RADD16) + NDS32_BUILTIN(uraddv2hi3, "uradd16", URADD16) + NDS32_BUILTIN(uraddv2hi3, "v_uradd16", V_URADD16) + NDS32_BUILTIN(kaddv2hi3, "kadd16", KADD16) + NDS32_BUILTIN(kaddv2hi3, "v_kadd16", V_KADD16) + NDS32_BUILTIN(ukaddv2hi3, "ukadd16", UKADD16) + NDS32_BUILTIN(ukaddv2hi3, "v_ukadd16", V_UKADD16) + NDS32_BUILTIN(subv2hi3, "sub16", SUB16) + NDS32_BUILTIN(subv2hi3, "v_usub16", V_USUB16) + NDS32_BUILTIN(subv2hi3, "v_ssub16", V_SSUB16) + NDS32_BUILTIN(rsubv2hi3, "rsub16", RSUB16) + NDS32_BUILTIN(rsubv2hi3, "v_rsub16", V_RSUB16) + NDS32_BUILTIN(ursubv2hi3, "ursub16", URSUB16) + NDS32_BUILTIN(ursubv2hi3, "v_ursub16", V_URSUB16) + NDS32_BUILTIN(ksubv2hi3, "ksub16", KSUB16) + NDS32_BUILTIN(ksubv2hi3, "v_ksub16", V_KSUB16) + NDS32_BUILTIN(uksubv2hi3, "uksub16", UKSUB16) + NDS32_BUILTIN(uksubv2hi3, "v_uksub16", V_UKSUB16) + NDS32_BUILTIN(cras16_1, "cras16", CRAS16) + NDS32_BUILTIN(cras16_1, "v_ucras16", V_UCRAS16) + NDS32_BUILTIN(cras16_1, "v_scras16", V_SCRAS16) + NDS32_BUILTIN(rcras16_1, "rcras16", RCRAS16) + NDS32_BUILTIN(rcras16_1, "v_rcras16", V_RCRAS16) + NDS32_BUILTIN(urcras16_1, "urcras16", URCRAS16) + NDS32_BUILTIN(urcras16_1, "v_urcras16", V_URCRAS16) + NDS32_BUILTIN(kcras16_1, "kcras16", KCRAS16) + NDS32_BUILTIN(kcras16_1, "v_kcras16", V_KCRAS16) + NDS32_BUILTIN(ukcras16_1, "ukcras16", UKCRAS16) + NDS32_BUILTIN(ukcras16_1, "v_ukcras16", V_UKCRAS16) + NDS32_BUILTIN(crsa16_1, "crsa16", CRSA16) + NDS32_BUILTIN(crsa16_1, "v_ucrsa16", V_UCRSA16) + NDS32_BUILTIN(crsa16_1, "v_scrsa16", V_SCRSA16) + NDS32_BUILTIN(rcrsa16_1, "rcrsa16", RCRSA16) + NDS32_BUILTIN(rcrsa16_1, "v_rcrsa16", V_RCRSA16) + NDS32_BUILTIN(urcrsa16_1, "urcrsa16", URCRSA16) + NDS32_BUILTIN(urcrsa16_1, "v_urcrsa16", V_URCRSA16) + NDS32_BUILTIN(kcrsa16_1, "kcrsa16", KCRSA16) + NDS32_BUILTIN(kcrsa16_1, "v_kcrsa16", V_KCRSA16) + NDS32_BUILTIN(ukcrsa16_1, "ukcrsa16", UKCRSA16) + NDS32_BUILTIN(ukcrsa16_1, "v_ukcrsa16", V_UKCRSA16) + NDS32_BUILTIN(addv4qi3, "add8", ADD8) + NDS32_BUILTIN(addv4qi3, "v_uadd8", V_UADD8) + NDS32_BUILTIN(addv4qi3, "v_sadd8", V_SADD8) + NDS32_BUILTIN(raddv4qi3, "radd8", RADD8) + NDS32_BUILTIN(raddv4qi3, "v_radd8", V_RADD8) + NDS32_BUILTIN(uraddv4qi3, "uradd8", URADD8) + NDS32_BUILTIN(uraddv4qi3, "v_uradd8", V_URADD8) + NDS32_BUILTIN(kaddv4qi3, "kadd8", KADD8) + NDS32_BUILTIN(kaddv4qi3, "v_kadd8", V_KADD8) + NDS32_BUILTIN(ukaddv4qi3, "ukadd8", UKADD8) + NDS32_BUILTIN(ukaddv4qi3, "v_ukadd8", V_UKADD8) + NDS32_BUILTIN(subv4qi3, "sub8", SUB8) + NDS32_BUILTIN(subv4qi3, "v_usub8", V_USUB8) + NDS32_BUILTIN(subv4qi3, "v_ssub8", V_SSUB8) + NDS32_BUILTIN(rsubv4qi3, "rsub8", RSUB8) + NDS32_BUILTIN(rsubv4qi3, "v_rsub8", V_RSUB8) + NDS32_BUILTIN(ursubv4qi3, "ursub8", URSUB8) + NDS32_BUILTIN(ursubv4qi3, "v_ursub8", V_URSUB8) + NDS32_BUILTIN(ksubv4qi3, "ksub8", KSUB8) + NDS32_BUILTIN(ksubv4qi3, "v_ksub8", V_KSUB8) + NDS32_BUILTIN(uksubv4qi3, "uksub8", UKSUB8) + NDS32_BUILTIN(uksubv4qi3, "v_uksub8", V_UKSUB8) + NDS32_BUILTIN(ashrv2hi3, "sra16", SRA16) + NDS32_BUILTIN(ashrv2hi3, "v_sra16", V_SRA16) + NDS32_BUILTIN(sra16_round, "sra16_u", SRA16_U) + NDS32_BUILTIN(sra16_round, "v_sra16_u", V_SRA16_U) + NDS32_BUILTIN(lshrv2hi3, "srl16", SRL16) + NDS32_BUILTIN(lshrv2hi3, "v_srl16", V_SRL16) + NDS32_BUILTIN(srl16_round, "srl16_u", SRL16_U) + NDS32_BUILTIN(srl16_round, "v_srl16_u", V_SRL16_U) + NDS32_BUILTIN(ashlv2hi3, "sll16", SLL16) + NDS32_BUILTIN(ashlv2hi3, "v_sll16", V_SLL16) + NDS32_BUILTIN(kslli16, "ksll16", KSLL16) + NDS32_BUILTIN(kslli16, "v_ksll16", V_KSLL16) + NDS32_BUILTIN(kslra16, "kslra16", KSLRA16) + NDS32_BUILTIN(kslra16, "v_kslra16", V_KSLRA16) + NDS32_BUILTIN(kslra16_round, "kslra16_u", KSLRA16_U) + NDS32_BUILTIN(kslra16_round, "v_kslra16_u", V_KSLRA16_U) + NDS32_BUILTIN(cmpeq16, "cmpeq16", CMPEQ16) + NDS32_BUILTIN(cmpeq16, "v_scmpeq16", V_SCMPEQ16) + NDS32_BUILTIN(cmpeq16, "v_ucmpeq16", V_UCMPEQ16) + NDS32_BUILTIN(scmplt16, "scmplt16", SCMPLT16) + NDS32_BUILTIN(scmplt16, "v_scmplt16", V_SCMPLT16) + NDS32_BUILTIN(scmple16, "scmple16", SCMPLE16) + NDS32_BUILTIN(scmple16, "v_scmple16", V_SCMPLE16) + NDS32_BUILTIN(ucmplt16, "ucmplt16", UCMPLT16) + NDS32_BUILTIN(ucmplt16, "v_ucmplt16", V_UCMPLT16) + NDS32_BUILTIN(ucmplt16, "ucmple16", UCMPLE16) + NDS32_BUILTIN(ucmplt16, "v_ucmple16", V_UCMPLE16) + NDS32_BUILTIN(cmpeq8, "cmpeq8", CMPEQ8) + NDS32_BUILTIN(cmpeq8, "v_scmpeq8", V_SCMPEQ8) + NDS32_BUILTIN(cmpeq8, "v_ucmpeq8", V_UCMPEQ8) + NDS32_BUILTIN(scmplt8, "scmplt8", SCMPLT8) + NDS32_BUILTIN(scmplt8, "v_scmplt8", V_SCMPLT8) + NDS32_BUILTIN(scmple8, "scmple8", SCMPLE8) + NDS32_BUILTIN(scmple8, "v_scmple8", V_SCMPLE8) + NDS32_BUILTIN(ucmplt8, "ucmplt8", UCMPLT8) + NDS32_BUILTIN(ucmplt8, "v_ucmplt8", V_UCMPLT8) + NDS32_BUILTIN(ucmplt8, "ucmple8", UCMPLE8) + NDS32_BUILTIN(ucmplt8, "v_ucmple8", V_UCMPLE8) + NDS32_BUILTIN(sminv2hi3, "smin16", SMIN16) + NDS32_BUILTIN(sminv2hi3, "v_smin16", V_SMIN16) + NDS32_BUILTIN(uminv2hi3, "umin16", UMIN16) + NDS32_BUILTIN(uminv2hi3, "v_umin16", V_UMIN16) + NDS32_BUILTIN(smaxv2hi3, "smax16", SMAX16) + NDS32_BUILTIN(smaxv2hi3, "v_smax16", V_SMAX16) + NDS32_BUILTIN(umaxv2hi3, "umax16", UMAX16) + NDS32_BUILTIN(umaxv2hi3, "v_umax16", V_UMAX16) + NDS32_BUILTIN(khm16, "khm16", KHM16) + NDS32_BUILTIN(khm16, "v_khm16", V_KHM16) + NDS32_BUILTIN(khmx16, "khmx16", KHMX16) + NDS32_BUILTIN(khmx16, "v_khmx16", V_KHMX16) + NDS32_BUILTIN(sminv4qi3, "smin8", SMIN8) + NDS32_BUILTIN(sminv4qi3, "v_smin8", V_SMIN8) + NDS32_BUILTIN(uminv4qi3, "umin8", UMIN8) + NDS32_BUILTIN(uminv4qi3, "v_umin8", V_UMIN8) + NDS32_BUILTIN(smaxv4qi3, "smax8", SMAX8) + NDS32_BUILTIN(smaxv4qi3, "v_smax8", V_SMAX8) + NDS32_BUILTIN(umaxv4qi3, "umax8", UMAX8) + NDS32_BUILTIN(umaxv4qi3, "v_umax8", V_UMAX8) + NDS32_BUILTIN(raddsi3, "raddw", RADDW) + NDS32_BUILTIN(uraddsi3, "uraddw", URADDW) + NDS32_BUILTIN(rsubsi3, "rsubw", RSUBW) + NDS32_BUILTIN(ursubsi3, "ursubw", URSUBW) + NDS32_BUILTIN(sraiu, "sra_u", SRA_U) + NDS32_BUILTIN(kssl, "ksll", KSLL) + NDS32_BUILTIN(pkbb, "pkbb16", PKBB16) + NDS32_BUILTIN(pkbb, "v_pkbb16", V_PKBB16) + NDS32_BUILTIN(pkbt, "pkbt16", PKBT16) + NDS32_BUILTIN(pkbt, "v_pkbt16", V_PKBT16) + NDS32_BUILTIN(pktb, "pktb16", PKTB16) + NDS32_BUILTIN(pktb, "v_pktb16", V_PKTB16) + NDS32_BUILTIN(pktt, "pktt16", PKTT16) + NDS32_BUILTIN(pktt, "v_pktt16", V_PKTT16) + NDS32_BUILTIN(smulsi3_highpart, "smmul", SMMUL) + NDS32_BUILTIN(smmul_round, "smmul_u", SMMUL_U) + NDS32_BUILTIN(smmwb, "smmwb", SMMWB) + NDS32_BUILTIN(smmwb, "v_smmwb", V_SMMWB) + NDS32_BUILTIN(smmwb_round, "smmwb_u", SMMWB_U) + NDS32_BUILTIN(smmwb_round, "v_smmwb_u", V_SMMWB_U) + NDS32_BUILTIN(smmwt, "smmwt", SMMWT) + NDS32_BUILTIN(smmwt, "v_smmwt", V_SMMWT) + NDS32_BUILTIN(smmwt_round, "smmwt_u", SMMWT_U) + NDS32_BUILTIN(smmwt_round, "v_smmwt_u", V_SMMWT_U) + NDS32_BUILTIN(smbb, "smbb", SMBB) + NDS32_BUILTIN(smbb, "v_smbb", V_SMBB) + NDS32_BUILTIN(smbt, "smbt", SMBT) + NDS32_BUILTIN(smbt, "v_smbt", V_SMBT) + NDS32_BUILTIN(smtt, "smtt", SMTT) + NDS32_BUILTIN(smtt, "v_smtt", V_SMTT) + NDS32_BUILTIN(kmda, "kmda", KMDA) + NDS32_BUILTIN(kmda, "v_kmda", V_KMDA) + NDS32_BUILTIN(kmxda, "kmxda", KMXDA) + NDS32_BUILTIN(kmxda, "v_kmxda", V_KMXDA) + NDS32_BUILTIN(smds, "smds", SMDS) + NDS32_BUILTIN(smds, "v_smds", V_SMDS) + NDS32_BUILTIN(smdrs, "smdrs", SMDRS) + NDS32_BUILTIN(smdrs, "v_smdrs", V_SMDRS) + NDS32_BUILTIN(smxdsv, "smxds", SMXDS) + NDS32_BUILTIN(smxdsv, "v_smxds", V_SMXDS) + NDS32_BUILTIN(smal1, "smal", SMAL) + NDS32_BUILTIN(smal1, "v_smal", V_SMAL) + NDS32_BUILTIN(bitrev, "bitrev", BITREV) + NDS32_BUILTIN(wext, "wext", WEXT) + NDS32_BUILTIN(adddi3, "sadd64", SADD64) + NDS32_BUILTIN(adddi3, "uadd64", UADD64) + NDS32_BUILTIN(radddi3, "radd64", RADD64) + NDS32_BUILTIN(uradddi3, "uradd64", URADD64) + NDS32_BUILTIN(kadddi3, "kadd64", KADD64) + NDS32_BUILTIN(ukadddi3, "ukadd64", UKADD64) + NDS32_BUILTIN(subdi3, "ssub64", SSUB64) + NDS32_BUILTIN(subdi3, "usub64", USUB64) + NDS32_BUILTIN(rsubdi3, "rsub64", RSUB64) + NDS32_BUILTIN(ursubdi3, "ursub64", URSUB64) + NDS32_BUILTIN(ksubdi3, "ksub64", KSUB64) + NDS32_BUILTIN(uksubdi3, "uksub64", UKSUB64) + NDS32_BUILTIN(smul16, "smul16", SMUL16) + NDS32_BUILTIN(smul16, "v_smul16", V_SMUL16) + NDS32_BUILTIN(smulx16, "smulx16", SMULX16) + NDS32_BUILTIN(smulx16, "v_smulx16", V_SMULX16) + NDS32_BUILTIN(umul16, "umul16", UMUL16) + NDS32_BUILTIN(umul16, "v_umul16", V_UMUL16) + NDS32_BUILTIN(umulx16, "umulx16", UMULX16) + NDS32_BUILTIN(umulx16, "v_umulx16", V_UMULX16) + NDS32_BUILTIN(kwmmul, "kwmmul", KWMMUL) + NDS32_BUILTIN(kwmmul_round, "kwmmul_u", KWMMUL_U) + NDS32_NO_TARGET_BUILTIN(unaligned_storev2hi, + "put_unaligned_u16x2", UASTORE_U16) + NDS32_NO_TARGET_BUILTIN(unaligned_storev2hi, + "put_unaligned_s16x2", UASTORE_S16) + NDS32_NO_TARGET_BUILTIN(unaligned_storev4qi, "put_unaligned_u8x4", UASTORE_U8) + NDS32_NO_TARGET_BUILTIN(unaligned_storev4qi, "put_unaligned_s8x4", UASTORE_S8) }; /* Two-argument intrinsics with an immediate second argument. */ @@ -617,6 +860,22 @@ static struct builtin_description bdesc_2argimm[] = NDS32_BUILTIN(unspec_clips, "clips", CLIPS) NDS32_NO_TARGET_BUILTIN(unspec_teqz, "teqz", TEQZ) NDS32_NO_TARGET_BUILTIN(unspec_tnez, "tnez", TNEZ) + NDS32_BUILTIN(ashrv2hi3, "srl16", SRL16) + NDS32_BUILTIN(ashrv2hi3, "v_srl16", V_SRL16) + NDS32_BUILTIN(srl16_round, "srl16_u", SRL16_U) + NDS32_BUILTIN(srl16_round, "v_srl16_u", V_SRL16_U) + NDS32_BUILTIN(kslli16, "ksll16", KSLL16) + NDS32_BUILTIN(kslli16, "v_ksll16", V_KSLL16) + NDS32_BUILTIN(sclip16, "sclip16", SCLIP16) + NDS32_BUILTIN(sclip16, "v_sclip16", V_SCLIP16) + NDS32_BUILTIN(uclip16, "uclip16", UCLIP16) + NDS32_BUILTIN(uclip16, "v_uclip16", V_UCLIP16) + NDS32_BUILTIN(sraiu, "sra_u", SRA_U) + NDS32_BUILTIN(kssl, "ksll", KSLL) + NDS32_BUILTIN(bitrev, "bitrev", BITREV) + NDS32_BUILTIN(wext, "wext", WEXT) + NDS32_BUILTIN(uclip32, "uclip32", UCLIP32) + NDS32_BUILTIN(sclip32, "sclip32", SCLIP32) }; /* Intrinsics that take three arguments. */ @@ -625,6 +884,67 @@ static struct builtin_description bdesc_3arg[] = NDS32_BUILTIN(unspec_pbsada, "pbsada", PBSADA) NDS32_NO_TARGET_BUILTIN(bse, "bse", BSE) NDS32_NO_TARGET_BUILTIN(bsp, "bsp", BSP) + NDS32_BUILTIN(kmabb, "kmabb", KMABB) + NDS32_BUILTIN(kmabb, "v_kmabb", V_KMABB) + NDS32_BUILTIN(kmabt, "kmabt", KMABT) + NDS32_BUILTIN(kmabt, "v_kmabt", V_KMABT) + NDS32_BUILTIN(kmatt, "kmatt", KMATT) + NDS32_BUILTIN(kmatt, "v_kmatt", V_KMATT) + NDS32_BUILTIN(kmada, "kmada", KMADA) + NDS32_BUILTIN(kmada, "v_kmada", V_KMADA) + NDS32_BUILTIN(kmaxda, "kmaxda", KMAXDA) + NDS32_BUILTIN(kmaxda, "v_kmaxda", V_KMAXDA) + NDS32_BUILTIN(kmads, "kmads", KMADS) + NDS32_BUILTIN(kmads, "v_kmads", V_KMADS) + NDS32_BUILTIN(kmadrs, "kmadrs", KMADRS) + NDS32_BUILTIN(kmadrs, "v_kmadrs", V_KMADRS) + NDS32_BUILTIN(kmaxds, "kmaxds", KMAXDS) + NDS32_BUILTIN(kmaxds, "v_kmaxds", V_KMAXDS) + NDS32_BUILTIN(kmsda, "kmsda", KMSDA) + NDS32_BUILTIN(kmsda, "v_kmsda", V_KMSDA) + NDS32_BUILTIN(kmsxda, "kmsxda", KMSXDA) + NDS32_BUILTIN(kmsxda, "v_kmsxda", V_KMSXDA) + NDS32_BUILTIN(bpick1, "bpick", BPICK) + NDS32_BUILTIN(smar64_1, "smar64", SMAR64) + NDS32_BUILTIN(smsr64, "smsr64", SMSR64) + NDS32_BUILTIN(umar64_1, "umar64", UMAR64) + NDS32_BUILTIN(umsr64, "umsr64", UMSR64) + NDS32_BUILTIN(kmar64_1, "kmar64", KMAR64) + NDS32_BUILTIN(kmsr64, "kmsr64", KMSR64) + NDS32_BUILTIN(ukmar64_1, "ukmar64", UKMAR64) + NDS32_BUILTIN(ukmsr64, "ukmsr64", UKMSR64) + NDS32_BUILTIN(smalbb, "smalbb", SMALBB) + NDS32_BUILTIN(smalbb, "v_smalbb", V_SMALBB) + NDS32_BUILTIN(smalbt, "smalbt", SMALBT) + NDS32_BUILTIN(smalbt, "v_smalbt", V_SMALBT) + NDS32_BUILTIN(smaltt, "smaltt", SMALTT) + NDS32_BUILTIN(smaltt, "v_smaltt", V_SMALTT) + NDS32_BUILTIN(smalda1, "smalda", SMALDA) + NDS32_BUILTIN(smalda1, "v_smalda", V_SMALDA) + NDS32_BUILTIN(smalxda1, "smalxda", SMALXDA) + NDS32_BUILTIN(smalxda1, "v_smalxda", V_SMALXDA) + NDS32_BUILTIN(smalds1, "smalds", SMALDS) + NDS32_BUILTIN(smalds1, "v_smalds", V_SMALDS) + NDS32_BUILTIN(smaldrs3, "smaldrs", SMALDRS) + NDS32_BUILTIN(smaldrs3, "v_smaldrs", V_SMALDRS) + NDS32_BUILTIN(smalxds1, "smalxds", SMALXDS) + NDS32_BUILTIN(smalxds1, "v_smalxds", V_SMALXDS) + NDS32_BUILTIN(smslda1, "smslda", SMSLDA) + NDS32_BUILTIN(smslda1, "v_smslda", V_SMSLDA) + NDS32_BUILTIN(smslxda1, "smslxda", SMSLXDA) + NDS32_BUILTIN(smslxda1, "v_smslxda", V_SMSLXDA) + NDS32_BUILTIN(kmmawb, "kmmawb", KMMAWB) + NDS32_BUILTIN(kmmawb, "v_kmmawb", V_KMMAWB) + NDS32_BUILTIN(kmmawb_round, "kmmawb_u", KMMAWB_U) + NDS32_BUILTIN(kmmawb_round, "v_kmmawb_u", V_KMMAWB_U) + NDS32_BUILTIN(kmmawt, "kmmawt", KMMAWT) + NDS32_BUILTIN(kmmawt, "v_kmmawt", V_KMMAWT) + NDS32_BUILTIN(kmmawt_round, "kmmawt_u", KMMAWT_U) + NDS32_BUILTIN(kmmawt_round, "v_kmmawt_u", V_KMMAWT_U) + NDS32_BUILTIN(kmmac, "kmmac", KMMAC) + NDS32_BUILTIN(kmmac_round, "kmmac_u", KMMAC_U) + NDS32_BUILTIN(kmmsb, "kmmsb", KMMSB) + NDS32_BUILTIN(kmmsb_round, "kmmsb_u", KMMSB_U) }; /* Three-argument intrinsics with an immediate third argument. */ @@ -634,6 +954,7 @@ static struct builtin_description bdesc_3argimm[] = NDS32_NO_TARGET_BUILTIN(prefetch_hw, "prefetch_hw", DPREF_HW) NDS32_NO_TARGET_BUILTIN(prefetch_w, "prefetch_w", DPREF_W) NDS32_NO_TARGET_BUILTIN(prefetch_dw, "prefetch_dw", DPREF_DW) + NDS32_BUILTIN(insb, "insb", INSB) }; /* Intrinsics that load a value. */ @@ -676,6 +997,11 @@ nds32_expand_builtin_impl (tree exp, unsigned i; struct builtin_description *d; + if (!NDS32_EXT_DSP_P () + && fcode > NDS32_BUILTIN_DSP_BEGIN + && fcode < NDS32_BUILTIN_DSP_END) + error ("don't support DSP extension instructions"); + switch (fcode) { /* FPU Register Transfer. */ @@ -812,6 +1138,9 @@ nds32_expand_builtin_impl (tree exp, case NDS32_BUILTIN_CCTL_L1D_WBALL_ONE_LVL: emit_insn (gen_cctl_l1d_wball_one_lvl()); return target; + case NDS32_BUILTIN_CLROV: + emit_insn (gen_unspec_volatile_clrov ()); + return target; case NDS32_BUILTIN_STANDBY_NO_WAKE_GRANT: emit_insn (gen_unspec_standby_no_wake_grant ()); return target; @@ -947,10 +1276,18 @@ nds32_init_builtins_impl (void) NDS32_BUILTIN_ ## CODE, BUILT_IN_MD, NULL, NULL_TREE) /* Looking for return type and argument can be found in tree.h file. */ + tree ptr_char_type_node = build_pointer_type (char_type_node); tree ptr_uchar_type_node = build_pointer_type (unsigned_char_type_node); tree ptr_ushort_type_node = build_pointer_type (short_unsigned_type_node); + tree ptr_short_type_node = build_pointer_type (short_integer_type_node); tree ptr_uint_type_node = build_pointer_type (unsigned_type_node); tree ptr_ulong_type_node = build_pointer_type (long_long_unsigned_type_node); + tree v4qi_type_node = build_vector_type (intQI_type_node, 4); + tree u_v4qi_type_node = build_vector_type (unsigned_intQI_type_node, 4); + tree v2hi_type_node = build_vector_type (intHI_type_node, 2); + tree u_v2hi_type_node = build_vector_type (unsigned_intHI_type_node, 2); + tree v2si_type_node = build_vector_type (intSI_type_node, 2); + tree u_v2si_type_node = build_vector_type (unsigned_intSI_type_node, 2); /* Cache. */ ADD_NDS32_BUILTIN1 ("isync", void, ptr_uint, ISYNC); @@ -1050,6 +1387,31 @@ nds32_init_builtins_impl (void) ADD_NDS32_BUILTIN2 ("se_ffmism", integer, unsigned, unsigned, FFMISM); ADD_NDS32_BUILTIN2 ("se_flmism", integer, unsigned, unsigned, FLMISM); + /* SATURATION */ + ADD_NDS32_BUILTIN2 ("kaddw", integer, integer, integer, KADDW); + ADD_NDS32_BUILTIN2 ("ksubw", integer, integer, integer, KSUBW); + ADD_NDS32_BUILTIN2 ("kaddh", integer, integer, integer, KADDH); + ADD_NDS32_BUILTIN2 ("ksubh", integer, integer, integer, KSUBH); + ADD_NDS32_BUILTIN2 ("kdmbb", integer, unsigned, unsigned, KDMBB); + ADD_NDS32_BUILTIN2 ("v_kdmbb", integer, v2hi, v2hi, V_KDMBB); + ADD_NDS32_BUILTIN2 ("kdmbt", integer, unsigned, unsigned, KDMBT); + ADD_NDS32_BUILTIN2 ("v_kdmbt", integer, v2hi, v2hi, V_KDMBT); + ADD_NDS32_BUILTIN2 ("kdmtb", integer, unsigned, unsigned, KDMTB); + ADD_NDS32_BUILTIN2 ("v_kdmtb", integer, v2hi, v2hi, V_KDMTB); + ADD_NDS32_BUILTIN2 ("kdmtt", integer, unsigned, unsigned, KDMTT); + ADD_NDS32_BUILTIN2 ("v_kdmtt", integer, v2hi, v2hi, V_KDMTT); + ADD_NDS32_BUILTIN2 ("khmbb", integer, unsigned, unsigned, KHMBB); + ADD_NDS32_BUILTIN2 ("v_khmbb", integer, v2hi, v2hi, V_KHMBB); + ADD_NDS32_BUILTIN2 ("khmbt", integer, unsigned, unsigned, KHMBT); + ADD_NDS32_BUILTIN2 ("v_khmbt", integer, v2hi, v2hi, V_KHMBT); + ADD_NDS32_BUILTIN2 ("khmtb", integer, unsigned, unsigned, KHMTB); + ADD_NDS32_BUILTIN2 ("v_khmtb", integer, v2hi, v2hi, V_KHMTB); + ADD_NDS32_BUILTIN2 ("khmtt", integer, unsigned, unsigned, KHMTT); + ADD_NDS32_BUILTIN2 ("v_khmtt", integer, v2hi, v2hi, V_KHMTT); + ADD_NDS32_BUILTIN2 ("kslraw", integer, integer, integer, KSLRAW); + ADD_NDS32_BUILTIN2 ("kslraw_u", integer, integer, integer, KSLRAW_U); + ADD_NDS32_BUILTIN0 ("rdov", unsigned, RDOV); + ADD_NDS32_BUILTIN0 ("clrov", void, CLROV); /* ROTR */ ADD_NDS32_BUILTIN2 ("rotr", unsigned, unsigned, unsigned, ROTR); @@ -1109,4 +1471,384 @@ nds32_init_builtins_impl (void) ADD_NDS32_BUILTIN0 ("enable_unaligned", void, ENABLE_UNALIGNED); ADD_NDS32_BUILTIN0 ("disable_unaligned", void, DISABLE_UNALIGNED); + /* DSP Extension: SIMD 16bit Add and Subtract. */ + ADD_NDS32_BUILTIN2 ("add16", unsigned, unsigned, unsigned, ADD16); + ADD_NDS32_BUILTIN2 ("v_uadd16", u_v2hi, u_v2hi, u_v2hi, V_UADD16); + ADD_NDS32_BUILTIN2 ("v_sadd16", v2hi, v2hi, v2hi, V_SADD16); + ADD_NDS32_BUILTIN2 ("radd16", unsigned, unsigned, unsigned, RADD16); + ADD_NDS32_BUILTIN2 ("v_radd16", v2hi, v2hi, v2hi, V_RADD16); + ADD_NDS32_BUILTIN2 ("uradd16", unsigned, unsigned, unsigned, URADD16); + ADD_NDS32_BUILTIN2 ("v_uradd16", u_v2hi, u_v2hi, u_v2hi, V_URADD16); + ADD_NDS32_BUILTIN2 ("kadd16", unsigned, unsigned, unsigned, KADD16); + ADD_NDS32_BUILTIN2 ("v_kadd16", v2hi, v2hi, v2hi, V_KADD16); + ADD_NDS32_BUILTIN2 ("ukadd16", unsigned, unsigned, unsigned, UKADD16); + ADD_NDS32_BUILTIN2 ("v_ukadd16", u_v2hi, u_v2hi, u_v2hi, V_UKADD16); + ADD_NDS32_BUILTIN2 ("sub16", unsigned, unsigned, unsigned, SUB16); + ADD_NDS32_BUILTIN2 ("v_usub16", u_v2hi, u_v2hi, u_v2hi, V_USUB16); + ADD_NDS32_BUILTIN2 ("v_ssub16", v2hi, v2hi, v2hi, V_SSUB16); + ADD_NDS32_BUILTIN2 ("rsub16", unsigned, unsigned, unsigned, RSUB16); + ADD_NDS32_BUILTIN2 ("v_rsub16", v2hi, v2hi, v2hi, V_RSUB16); + ADD_NDS32_BUILTIN2 ("ursub16", unsigned, unsigned, unsigned, URSUB16); + ADD_NDS32_BUILTIN2 ("v_ursub16", u_v2hi, u_v2hi, u_v2hi, V_URSUB16); + ADD_NDS32_BUILTIN2 ("ksub16", unsigned, unsigned, unsigned, KSUB16); + ADD_NDS32_BUILTIN2 ("v_ksub16", v2hi, v2hi, v2hi, V_KSUB16); + ADD_NDS32_BUILTIN2 ("uksub16", unsigned, unsigned, unsigned, UKSUB16); + ADD_NDS32_BUILTIN2 ("v_uksub16", u_v2hi, u_v2hi, u_v2hi, V_UKSUB16); + ADD_NDS32_BUILTIN2 ("cras16", unsigned, unsigned, unsigned, CRAS16); + ADD_NDS32_BUILTIN2 ("v_ucras16", u_v2hi, u_v2hi, u_v2hi, V_UCRAS16); + ADD_NDS32_BUILTIN2 ("v_scras16", v2hi, v2hi, v2hi, V_SCRAS16); + ADD_NDS32_BUILTIN2 ("rcras16", unsigned, unsigned, unsigned, RCRAS16); + ADD_NDS32_BUILTIN2 ("v_rcras16", v2hi, v2hi, v2hi, V_RCRAS16); + ADD_NDS32_BUILTIN2 ("urcras16", unsigned, unsigned, unsigned, URCRAS16); + ADD_NDS32_BUILTIN2 ("v_urcras16", u_v2hi, u_v2hi, u_v2hi, V_URCRAS16); + ADD_NDS32_BUILTIN2 ("kcras16", unsigned, unsigned, unsigned, KCRAS16); + ADD_NDS32_BUILTIN2 ("v_kcras16", v2hi, v2hi, v2hi, V_KCRAS16); + ADD_NDS32_BUILTIN2 ("ukcras16", unsigned, unsigned, unsigned, UKCRAS16); + ADD_NDS32_BUILTIN2 ("v_ukcras16", u_v2hi, u_v2hi, u_v2hi, V_UKCRAS16); + ADD_NDS32_BUILTIN2 ("crsa16", unsigned, unsigned, unsigned, CRSA16); + ADD_NDS32_BUILTIN2 ("v_ucrsa16", u_v2hi, u_v2hi, u_v2hi, V_UCRSA16); + ADD_NDS32_BUILTIN2 ("v_scrsa16", v2hi, v2hi, v2hi, V_SCRSA16); + ADD_NDS32_BUILTIN2 ("rcrsa16", unsigned, unsigned, unsigned, RCRSA16); + ADD_NDS32_BUILTIN2 ("v_rcrsa16", v2hi, v2hi, v2hi, V_RCRSA16); + ADD_NDS32_BUILTIN2 ("urcrsa16", unsigned, unsigned, unsigned, URCRSA16); + ADD_NDS32_BUILTIN2 ("v_urcrsa16", u_v2hi, u_v2hi, u_v2hi, V_URCRSA16); + ADD_NDS32_BUILTIN2 ("kcrsa16", unsigned, unsigned, unsigned, KCRSA16); + ADD_NDS32_BUILTIN2 ("v_kcrsa16", v2hi, v2hi, v2hi, V_KCRSA16); + ADD_NDS32_BUILTIN2 ("ukcrsa16", unsigned, unsigned, unsigned, UKCRSA16); + ADD_NDS32_BUILTIN2 ("v_ukcrsa16", u_v2hi, u_v2hi, u_v2hi, V_UKCRSA16); + + /* DSP Extension: SIMD 8bit Add and Subtract. */ + ADD_NDS32_BUILTIN2 ("add8", integer, integer, integer, ADD8); + ADD_NDS32_BUILTIN2 ("v_uadd8", u_v4qi, u_v4qi, u_v4qi, V_UADD8); + ADD_NDS32_BUILTIN2 ("v_sadd8", v4qi, v4qi, v4qi, V_SADD8); + ADD_NDS32_BUILTIN2 ("radd8", unsigned, unsigned, unsigned, RADD8); + ADD_NDS32_BUILTIN2 ("v_radd8", v4qi, v4qi, v4qi, V_RADD8); + ADD_NDS32_BUILTIN2 ("uradd8", unsigned, unsigned, unsigned, URADD8); + ADD_NDS32_BUILTIN2 ("v_uradd8", u_v4qi, u_v4qi, u_v4qi, V_URADD8); + ADD_NDS32_BUILTIN2 ("kadd8", unsigned, unsigned, unsigned, KADD8); + ADD_NDS32_BUILTIN2 ("v_kadd8", v4qi, v4qi, v4qi, V_KADD8); + ADD_NDS32_BUILTIN2 ("ukadd8", unsigned, unsigned, unsigned, UKADD8); + ADD_NDS32_BUILTIN2 ("v_ukadd8", u_v4qi, u_v4qi, u_v4qi, V_UKADD8); + ADD_NDS32_BUILTIN2 ("sub8", integer, integer, integer, SUB8); + ADD_NDS32_BUILTIN2 ("v_usub8", u_v4qi, u_v4qi, u_v4qi, V_USUB8); + ADD_NDS32_BUILTIN2 ("v_ssub8", v4qi, v4qi, v4qi, V_SSUB8); + ADD_NDS32_BUILTIN2 ("rsub8", unsigned, unsigned, unsigned, RSUB8); + ADD_NDS32_BUILTIN2 ("v_rsub8", v4qi, v4qi, v4qi, V_RSUB8); + ADD_NDS32_BUILTIN2 ("ursub8", unsigned, unsigned, unsigned, URSUB8); + ADD_NDS32_BUILTIN2 ("v_ursub8", u_v4qi, u_v4qi, u_v4qi, V_URSUB8); + ADD_NDS32_BUILTIN2 ("ksub8", unsigned, unsigned, unsigned, KSUB8); + ADD_NDS32_BUILTIN2 ("v_ksub8", v4qi, v4qi, v4qi, V_KSUB8); + ADD_NDS32_BUILTIN2 ("uksub8", unsigned, unsigned, unsigned, UKSUB8); + ADD_NDS32_BUILTIN2 ("v_uksub8", u_v4qi, u_v4qi, u_v4qi, V_UKSUB8); + + /* DSP Extension: SIMD 16bit Shift. */ + ADD_NDS32_BUILTIN2 ("sra16", unsigned, unsigned, unsigned, SRA16); + ADD_NDS32_BUILTIN2 ("v_sra16", v2hi, v2hi, unsigned, V_SRA16); + ADD_NDS32_BUILTIN2 ("sra16_u", unsigned, unsigned, unsigned, SRA16_U); + ADD_NDS32_BUILTIN2 ("v_sra16_u", v2hi, v2hi, unsigned, V_SRA16_U); + ADD_NDS32_BUILTIN2 ("srl16", unsigned, unsigned, unsigned, SRL16); + ADD_NDS32_BUILTIN2 ("v_srl16", u_v2hi, u_v2hi, unsigned, V_SRL16); + ADD_NDS32_BUILTIN2 ("srl16_u", unsigned, unsigned, unsigned, SRL16_U); + ADD_NDS32_BUILTIN2 ("v_srl16_u", u_v2hi, u_v2hi, unsigned, V_SRL16_U); + ADD_NDS32_BUILTIN2 ("sll16", unsigned, unsigned, unsigned, SLL16); + ADD_NDS32_BUILTIN2 ("v_sll16", u_v2hi, u_v2hi, unsigned, V_SLL16); + ADD_NDS32_BUILTIN2 ("ksll16", unsigned, unsigned, unsigned, KSLL16); + ADD_NDS32_BUILTIN2 ("v_ksll16", v2hi, v2hi, unsigned, V_KSLL16); + ADD_NDS32_BUILTIN2 ("kslra16", unsigned, unsigned, unsigned, KSLRA16); + ADD_NDS32_BUILTIN2 ("v_kslra16", v2hi, v2hi, unsigned, V_KSLRA16); + ADD_NDS32_BUILTIN2 ("kslra16_u", unsigned, unsigned, unsigned, KSLRA16_U); + ADD_NDS32_BUILTIN2 ("v_kslra16_u", v2hi, v2hi, unsigned, V_KSLRA16_U); + + /* DSP Extension: 16bit Compare. */ + ADD_NDS32_BUILTIN2 ("cmpeq16", unsigned, unsigned, unsigned, CMPEQ16); + ADD_NDS32_BUILTIN2 ("v_scmpeq16", u_v2hi, v2hi, v2hi, V_SCMPEQ16); + ADD_NDS32_BUILTIN2 ("v_ucmpeq16", u_v2hi, u_v2hi, u_v2hi, V_UCMPEQ16); + ADD_NDS32_BUILTIN2 ("scmplt16", unsigned, unsigned, unsigned, SCMPLT16); + ADD_NDS32_BUILTIN2 ("v_scmplt16", u_v2hi, v2hi, v2hi, V_SCMPLT16); + ADD_NDS32_BUILTIN2 ("scmple16", unsigned, unsigned, unsigned, SCMPLE16); + ADD_NDS32_BUILTIN2 ("v_scmple16", u_v2hi, v2hi, v2hi, V_SCMPLE16); + ADD_NDS32_BUILTIN2 ("ucmplt16", unsigned, unsigned, unsigned, UCMPLT16); + ADD_NDS32_BUILTIN2 ("v_ucmplt16", u_v2hi, u_v2hi, u_v2hi, V_UCMPLT16); + ADD_NDS32_BUILTIN2 ("ucmple16", unsigned, unsigned, unsigned, UCMPLE16); + ADD_NDS32_BUILTIN2 ("v_ucmple16", u_v2hi, u_v2hi, u_v2hi, V_UCMPLE16); + + /* DSP Extension: 8bit Compare. */ + ADD_NDS32_BUILTIN2 ("cmpeq8", unsigned, unsigned, unsigned, CMPEQ8); + ADD_NDS32_BUILTIN2 ("v_scmpeq8", u_v4qi, v4qi, v4qi, V_SCMPEQ8); + ADD_NDS32_BUILTIN2 ("v_ucmpeq8", u_v4qi, u_v4qi, u_v4qi, V_UCMPEQ8); + ADD_NDS32_BUILTIN2 ("scmplt8", unsigned, unsigned, unsigned, SCMPLT8); + ADD_NDS32_BUILTIN2 ("v_scmplt8", u_v4qi, v4qi, v4qi, V_SCMPLT8); + ADD_NDS32_BUILTIN2 ("scmple8", unsigned, unsigned, unsigned, SCMPLE8); + ADD_NDS32_BUILTIN2 ("v_scmple8", u_v4qi, v4qi, v4qi, V_SCMPLE8); + ADD_NDS32_BUILTIN2 ("ucmplt8", unsigned, unsigned, unsigned, UCMPLT8); + ADD_NDS32_BUILTIN2 ("v_ucmplt8", u_v4qi, u_v4qi, u_v4qi, V_UCMPLT8); + ADD_NDS32_BUILTIN2 ("ucmple8", unsigned, unsigned, unsigned, UCMPLE8); + ADD_NDS32_BUILTIN2 ("v_ucmple8", u_v4qi, u_v4qi, u_v4qi, V_UCMPLE8); + + /* DSP Extension: SIMD 16bit MISC. */ + ADD_NDS32_BUILTIN2 ("smin16", unsigned, unsigned, unsigned, SMIN16); + ADD_NDS32_BUILTIN2 ("v_smin16", v2hi, v2hi, v2hi, V_SMIN16); + ADD_NDS32_BUILTIN2 ("umin16", unsigned, unsigned, unsigned, UMIN16); + ADD_NDS32_BUILTIN2 ("v_umin16", u_v2hi, u_v2hi, u_v2hi, V_UMIN16); + ADD_NDS32_BUILTIN2 ("smax16", unsigned, unsigned, unsigned, SMAX16); + ADD_NDS32_BUILTIN2 ("v_smax16", v2hi, v2hi, v2hi, V_SMAX16); + ADD_NDS32_BUILTIN2 ("umax16", unsigned, unsigned, unsigned, UMAX16); + ADD_NDS32_BUILTIN2 ("v_umax16", u_v2hi, u_v2hi, u_v2hi, V_UMAX16); + ADD_NDS32_BUILTIN2 ("sclip16", unsigned, unsigned, unsigned, SCLIP16); + ADD_NDS32_BUILTIN2 ("v_sclip16", v2hi, v2hi, unsigned, V_SCLIP16); + ADD_NDS32_BUILTIN2 ("uclip16", unsigned, unsigned, unsigned, UCLIP16); + ADD_NDS32_BUILTIN2 ("v_uclip16", v2hi, v2hi, unsigned, V_UCLIP16); + ADD_NDS32_BUILTIN2 ("khm16", unsigned, unsigned, unsigned, KHM16); + ADD_NDS32_BUILTIN2 ("v_khm16", v2hi, v2hi, v2hi, V_KHM16); + ADD_NDS32_BUILTIN2 ("khmx16", unsigned, unsigned, unsigned, KHMX16); + ADD_NDS32_BUILTIN2 ("v_khmx16", v2hi, v2hi, v2hi, V_KHMX16); + ADD_NDS32_BUILTIN1 ("kabs16", unsigned, unsigned, KABS16); + ADD_NDS32_BUILTIN1 ("v_kabs16", v2hi, v2hi, V_KABS16); + ADD_NDS32_BUILTIN2 ("smul16", long_long_unsigned, unsigned, unsigned, SMUL16); + ADD_NDS32_BUILTIN2 ("v_smul16", v2si, v2hi, v2hi, V_SMUL16); + ADD_NDS32_BUILTIN2 ("smulx16", + long_long_unsigned, unsigned, unsigned, SMULX16); + ADD_NDS32_BUILTIN2 ("v_smulx16", v2si, v2hi, v2hi, V_SMULX16); + ADD_NDS32_BUILTIN2 ("umul16", long_long_unsigned, unsigned, unsigned, UMUL16); + ADD_NDS32_BUILTIN2 ("v_umul16", u_v2si, u_v2hi, u_v2hi, V_UMUL16); + ADD_NDS32_BUILTIN2 ("umulx16", + long_long_unsigned, unsigned, unsigned, UMULX16); + ADD_NDS32_BUILTIN2 ("v_umulx16", u_v2si, u_v2hi, u_v2hi, V_UMULX16); + + /* DSP Extension: SIMD 8bit MISC. */ + ADD_NDS32_BUILTIN2 ("smin8", unsigned, unsigned, unsigned, SMIN8); + ADD_NDS32_BUILTIN2 ("v_smin8", v4qi, v4qi, v4qi, V_SMIN8); + ADD_NDS32_BUILTIN2 ("umin8", unsigned, unsigned, unsigned, UMIN8); + ADD_NDS32_BUILTIN2 ("v_umin8", u_v4qi, u_v4qi, u_v4qi, V_UMIN8); + ADD_NDS32_BUILTIN2 ("smax8", unsigned, unsigned, unsigned, SMAX8); + ADD_NDS32_BUILTIN2 ("v_smax8", v4qi, v4qi, v4qi, V_SMAX8); + ADD_NDS32_BUILTIN2 ("umax8", unsigned, unsigned, unsigned, UMAX8); + ADD_NDS32_BUILTIN2 ("v_umax8", u_v4qi, u_v4qi, u_v4qi, V_UMAX8); + ADD_NDS32_BUILTIN1 ("kabs8", unsigned, unsigned, KABS8); + ADD_NDS32_BUILTIN1 ("v_kabs8", v4qi, v4qi, V_KABS8); + + /* DSP Extension: 8bit Unpacking. */ + ADD_NDS32_BUILTIN1 ("sunpkd810", unsigned, unsigned, SUNPKD810); + ADD_NDS32_BUILTIN1 ("v_sunpkd810", v2hi, v4qi, V_SUNPKD810); + ADD_NDS32_BUILTIN1 ("sunpkd820", unsigned, unsigned, SUNPKD820); + ADD_NDS32_BUILTIN1 ("v_sunpkd820", v2hi, v4qi, V_SUNPKD820); + ADD_NDS32_BUILTIN1 ("sunpkd830", unsigned, unsigned, SUNPKD830); + ADD_NDS32_BUILTIN1 ("v_sunpkd830", v2hi, v4qi, V_SUNPKD830); + ADD_NDS32_BUILTIN1 ("sunpkd831", unsigned, unsigned, SUNPKD831); + ADD_NDS32_BUILTIN1 ("v_sunpkd831", v2hi, v4qi, V_SUNPKD831); + ADD_NDS32_BUILTIN1 ("zunpkd810", unsigned, unsigned, ZUNPKD810); + ADD_NDS32_BUILTIN1 ("v_zunpkd810", u_v2hi, u_v4qi, V_ZUNPKD810); + ADD_NDS32_BUILTIN1 ("zunpkd820", unsigned, unsigned, ZUNPKD820); + ADD_NDS32_BUILTIN1 ("v_zunpkd820", u_v2hi, u_v4qi, V_ZUNPKD820); + ADD_NDS32_BUILTIN1 ("zunpkd830", unsigned, unsigned, ZUNPKD830); + ADD_NDS32_BUILTIN1 ("v_zunpkd830", u_v2hi, u_v4qi, V_ZUNPKD830); + ADD_NDS32_BUILTIN1 ("zunpkd831", unsigned, unsigned, ZUNPKD831); + ADD_NDS32_BUILTIN1 ("v_zunpkd831", u_v2hi, u_v4qi, V_ZUNPKD831); + + /* DSP Extension: 32bit Add and Subtract. */ + ADD_NDS32_BUILTIN2 ("raddw", integer, integer, integer, RADDW); + ADD_NDS32_BUILTIN2 ("uraddw", unsigned, unsigned, unsigned, URADDW); + ADD_NDS32_BUILTIN2 ("rsubw", integer, integer, integer, RSUBW); + ADD_NDS32_BUILTIN2 ("ursubw", unsigned, unsigned, unsigned, URSUBW); + + /* DSP Extension: 32bit Shift. */ + ADD_NDS32_BUILTIN2 ("sra_u", integer, integer, unsigned, SRA_U); + ADD_NDS32_BUILTIN2 ("ksll", integer, integer, unsigned, KSLL); + + /* DSP Extension: 16bit Packing. */ + ADD_NDS32_BUILTIN2 ("pkbb16", unsigned, unsigned, unsigned, PKBB16); + ADD_NDS32_BUILTIN2 ("v_pkbb16", u_v2hi, u_v2hi, u_v2hi, V_PKBB16); + ADD_NDS32_BUILTIN2 ("pkbt16", unsigned, unsigned, unsigned, PKBT16); + ADD_NDS32_BUILTIN2 ("v_pkbt16", u_v2hi, u_v2hi, u_v2hi, V_PKBT16); + ADD_NDS32_BUILTIN2 ("pktb16", unsigned, unsigned, unsigned, PKTB16); + ADD_NDS32_BUILTIN2 ("v_pktb16", u_v2hi, u_v2hi, u_v2hi, V_PKTB16); + ADD_NDS32_BUILTIN2 ("pktt16", unsigned, unsigned, unsigned, PKTT16); + ADD_NDS32_BUILTIN2 ("v_pktt16", u_v2hi, u_v2hi, u_v2hi, V_PKTT16); + + /* DSP Extension: Signed MSW 32x32 Multiply and ADD. */ + ADD_NDS32_BUILTIN2 ("smmul", integer, integer, integer, SMMUL); + ADD_NDS32_BUILTIN2 ("smmul_u", integer, integer, integer, SMMUL_U); + ADD_NDS32_BUILTIN3 ("kmmac", integer, integer, integer, integer, KMMAC); + ADD_NDS32_BUILTIN3 ("kmmac_u", integer, integer, integer, integer, KMMAC_U); + ADD_NDS32_BUILTIN3 ("kmmsb", integer, integer, integer, integer, KMMSB); + ADD_NDS32_BUILTIN3 ("kmmsb_u", integer, integer, integer, integer, KMMSB_U); + ADD_NDS32_BUILTIN2 ("kwmmul", integer, integer, integer, KWMMUL); + ADD_NDS32_BUILTIN2 ("kwmmul_u", integer, integer, integer, KWMMUL_U); + + /* DSP Extension: Most Significant Word 32x16 Multiply and ADD. */ + ADD_NDS32_BUILTIN2 ("smmwb", integer, integer, unsigned, SMMWB); + ADD_NDS32_BUILTIN2 ("v_smmwb", integer, integer, v2hi, V_SMMWB); + ADD_NDS32_BUILTIN2 ("smmwb_u", integer, integer, unsigned, SMMWB_U); + ADD_NDS32_BUILTIN2 ("v_smmwb_u", integer, integer, v2hi, V_SMMWB_U); + ADD_NDS32_BUILTIN2 ("smmwt", integer, integer, unsigned, SMMWT); + ADD_NDS32_BUILTIN2 ("v_smmwt", integer, integer, v2hi, V_SMMWT); + ADD_NDS32_BUILTIN2 ("smmwt_u", integer, integer, unsigned, SMMWT_U); + ADD_NDS32_BUILTIN2 ("v_smmwt_u", integer, integer, v2hi, V_SMMWT_U); + ADD_NDS32_BUILTIN3 ("kmmawb", integer, integer, integer, unsigned, KMMAWB); + ADD_NDS32_BUILTIN3 ("v_kmmawb", integer, integer, integer, v2hi, V_KMMAWB); + ADD_NDS32_BUILTIN3 ("kmmawb_u", + integer, integer, integer, unsigned, KMMAWB_U); + ADD_NDS32_BUILTIN3 ("v_kmmawb_u", + integer, integer, integer, v2hi, V_KMMAWB_U); + ADD_NDS32_BUILTIN3 ("kmmawt", integer, integer, integer, unsigned, KMMAWT); + ADD_NDS32_BUILTIN3 ("v_kmmawt", integer, integer, integer, v2hi, V_KMMAWT); + ADD_NDS32_BUILTIN3 ("kmmawt_u", + integer, integer, integer, unsigned, KMMAWT_U); + ADD_NDS32_BUILTIN3 ("v_kmmawt_u", + integer, integer, integer, v2hi, V_KMMAWT_U); + + /* DSP Extension: Signed 16bit Multiply with ADD/Subtract. */ + ADD_NDS32_BUILTIN2 ("smbb", integer, unsigned, unsigned, SMBB); + ADD_NDS32_BUILTIN2 ("v_smbb", integer, v2hi, v2hi, V_SMBB); + ADD_NDS32_BUILTIN2 ("smbt", integer, unsigned, unsigned, SMBT); + ADD_NDS32_BUILTIN2 ("v_smbt", integer, v2hi, v2hi, V_SMBT); + ADD_NDS32_BUILTIN2 ("smtt", integer, unsigned, unsigned, SMTT); + ADD_NDS32_BUILTIN2 ("v_smtt", integer, v2hi, v2hi, V_SMTT); + ADD_NDS32_BUILTIN2 ("kmda", integer, unsigned, unsigned, KMDA); + ADD_NDS32_BUILTIN2 ("v_kmda", integer, v2hi, v2hi, V_KMDA); + ADD_NDS32_BUILTIN2 ("kmxda", integer, unsigned, unsigned, KMXDA); + ADD_NDS32_BUILTIN2 ("v_kmxda", integer, v2hi, v2hi, V_KMXDA); + ADD_NDS32_BUILTIN2 ("smds", integer, unsigned, unsigned, SMDS); + ADD_NDS32_BUILTIN2 ("v_smds", integer, v2hi, v2hi, V_SMDS); + ADD_NDS32_BUILTIN2 ("smdrs", integer, unsigned, unsigned, SMDRS); + ADD_NDS32_BUILTIN2 ("v_smdrs", integer, v2hi, v2hi, V_SMDRS); + ADD_NDS32_BUILTIN2 ("smxds", integer, unsigned, unsigned, SMXDS); + ADD_NDS32_BUILTIN2 ("v_smxds", integer, v2hi, v2hi, V_SMXDS); + ADD_NDS32_BUILTIN3 ("kmabb", integer, integer, unsigned, unsigned, KMABB); + ADD_NDS32_BUILTIN3 ("v_kmabb", integer, integer, v2hi, v2hi, V_KMABB); + ADD_NDS32_BUILTIN3 ("kmabt", integer, integer, unsigned, unsigned, KMABT); + ADD_NDS32_BUILTIN3 ("v_kmabt", integer, integer, v2hi, v2hi, V_KMABT); + ADD_NDS32_BUILTIN3 ("kmatt", integer, integer, unsigned, unsigned, KMATT); + ADD_NDS32_BUILTIN3 ("v_kmatt", integer, integer, v2hi, v2hi, V_KMATT); + ADD_NDS32_BUILTIN3 ("kmada", integer, integer, unsigned, unsigned, KMADA); + ADD_NDS32_BUILTIN3 ("v_kmada", integer, integer, v2hi, v2hi, V_KMADA); + ADD_NDS32_BUILTIN3 ("kmaxda", integer, integer, unsigned, unsigned, KMAXDA); + ADD_NDS32_BUILTIN3 ("v_kmaxda", integer, integer, v2hi, v2hi, V_KMAXDA); + ADD_NDS32_BUILTIN3 ("kmads", integer, integer, unsigned, unsigned, KMADS); + ADD_NDS32_BUILTIN3 ("v_kmads", integer, integer, v2hi, v2hi, V_KMADS); + ADD_NDS32_BUILTIN3 ("kmadrs", integer, integer, unsigned, unsigned, KMADRS); + ADD_NDS32_BUILTIN3 ("v_kmadrs", integer, integer, v2hi, v2hi, V_KMADRS); + ADD_NDS32_BUILTIN3 ("kmaxds", integer, integer, unsigned, unsigned, KMAXDS); + ADD_NDS32_BUILTIN3 ("v_kmaxds", integer, integer, v2hi, v2hi, V_KMAXDS); + ADD_NDS32_BUILTIN3 ("kmsda", integer, integer, unsigned, unsigned, KMSDA); + ADD_NDS32_BUILTIN3 ("v_kmsda", integer, integer, v2hi, v2hi, V_KMSDA); + ADD_NDS32_BUILTIN3 ("kmsxda", integer, integer, unsigned, unsigned, KMSXDA); + ADD_NDS32_BUILTIN3 ("v_kmsxda", integer, integer, v2hi, v2hi, V_KMSXDA); + + /* DSP Extension: Signed 16bit Multiply with 64bit ADD/Subtract. */ + ADD_NDS32_BUILTIN2 ("smal", long_long_integer, + long_long_integer, unsigned, SMAL); + ADD_NDS32_BUILTIN2 ("v_smal", long_long_integer, + long_long_integer, v2hi, V_SMAL); + + /* DSP Extension: 32bit MISC. */ + ADD_NDS32_BUILTIN2 ("bitrev", unsigned, unsigned, unsigned, BITREV); + ADD_NDS32_BUILTIN2 ("wext", unsigned, long_long_integer, unsigned, WEXT); + ADD_NDS32_BUILTIN3 ("bpick", unsigned, unsigned, unsigned, unsigned, BPICK); + ADD_NDS32_BUILTIN3 ("insb", unsigned, unsigned, unsigned, unsigned, INSB); + + /* DSP Extension: 64bit Add and Subtract. */ + ADD_NDS32_BUILTIN2 ("sadd64", long_long_integer, + long_long_integer, long_long_integer, SADD64); + ADD_NDS32_BUILTIN2 ("uadd64", long_long_unsigned, + long_long_unsigned, long_long_unsigned, UADD64); + ADD_NDS32_BUILTIN2 ("radd64", long_long_integer, + long_long_integer, long_long_integer, RADD64); + ADD_NDS32_BUILTIN2 ("uradd64", long_long_unsigned, + long_long_unsigned, long_long_unsigned, URADD64); + ADD_NDS32_BUILTIN2 ("kadd64", long_long_integer, + long_long_integer, long_long_integer, KADD64); + ADD_NDS32_BUILTIN2 ("ukadd64", long_long_unsigned, + long_long_unsigned, long_long_unsigned, UKADD64); + ADD_NDS32_BUILTIN2 ("ssub64", long_long_integer, + long_long_integer, long_long_integer, SSUB64); + ADD_NDS32_BUILTIN2 ("usub64", long_long_unsigned, + long_long_unsigned, long_long_unsigned, USUB64); + ADD_NDS32_BUILTIN2 ("rsub64", long_long_integer, + long_long_integer, long_long_integer, RSUB64); + ADD_NDS32_BUILTIN2 ("ursub64", long_long_unsigned, + long_long_unsigned, long_long_unsigned, URSUB64); + ADD_NDS32_BUILTIN2 ("ksub64", long_long_integer, + long_long_integer, long_long_integer, KSUB64); + ADD_NDS32_BUILTIN2 ("uksub64", long_long_unsigned, + long_long_unsigned, long_long_unsigned, UKSUB64); + + /* DSP Extension: 32bit Multiply with 64bit Add/Subtract. */ + ADD_NDS32_BUILTIN3 ("smar64", long_long_integer, + long_long_integer, integer, integer, SMAR64); + ADD_NDS32_BUILTIN3 ("smsr64", long_long_integer, + long_long_integer, integer, integer, SMSR64); + ADD_NDS32_BUILTIN3 ("umar64", long_long_unsigned, + long_long_unsigned, unsigned, unsigned, UMAR64); + ADD_NDS32_BUILTIN3 ("umsr64", long_long_unsigned, + long_long_unsigned, unsigned, unsigned, UMSR64); + ADD_NDS32_BUILTIN3 ("kmar64", long_long_integer, + long_long_integer, integer, integer, KMAR64); + ADD_NDS32_BUILTIN3 ("kmsr64", long_long_integer, + long_long_integer, integer, integer, KMSR64); + ADD_NDS32_BUILTIN3 ("ukmar64", long_long_unsigned, + long_long_unsigned, unsigned, unsigned, UKMAR64); + ADD_NDS32_BUILTIN3 ("ukmsr64", long_long_unsigned, + long_long_unsigned, unsigned, unsigned, UKMSR64); + + /* DSP Extension: Signed 16bit Multiply with 64bit Add/Subtract. */ + ADD_NDS32_BUILTIN3 ("smalbb", long_long_integer, + long_long_integer, unsigned, unsigned, SMALBB); + ADD_NDS32_BUILTIN3 ("v_smalbb", long_long_integer, + long_long_integer, v2hi, v2hi, V_SMALBB); + ADD_NDS32_BUILTIN3 ("smalbt", long_long_integer, + long_long_integer, unsigned, unsigned, SMALBT); + ADD_NDS32_BUILTIN3 ("v_smalbt", long_long_integer, + long_long_integer, v2hi, v2hi, V_SMALBT); + ADD_NDS32_BUILTIN3 ("smaltt", long_long_integer, + long_long_integer, unsigned, unsigned, SMALTT); + ADD_NDS32_BUILTIN3 ("v_smaltt", long_long_integer, + long_long_integer, v2hi, v2hi, V_SMALTT); + ADD_NDS32_BUILTIN3 ("smalda", long_long_integer, + long_long_integer, unsigned, unsigned, SMALDA); + ADD_NDS32_BUILTIN3 ("v_smalda", long_long_integer, + long_long_integer, v2hi, v2hi, V_SMALDA); + ADD_NDS32_BUILTIN3 ("smalxda", long_long_integer, + long_long_integer, unsigned, unsigned, SMALXDA); + ADD_NDS32_BUILTIN3 ("v_smalxda", long_long_integer, + long_long_integer, v2hi, v2hi, V_SMALXDA); + ADD_NDS32_BUILTIN3 ("smalds", long_long_integer, + long_long_integer, unsigned, unsigned, SMALDS); + ADD_NDS32_BUILTIN3 ("v_smalds", long_long_integer, + long_long_integer, v2hi, v2hi, V_SMALDS); + ADD_NDS32_BUILTIN3 ("smaldrs", long_long_integer, + long_long_integer, unsigned, unsigned, SMALDRS); + ADD_NDS32_BUILTIN3 ("v_smaldrs", long_long_integer, + long_long_integer, v2hi, v2hi, V_SMALDRS); + ADD_NDS32_BUILTIN3 ("smalxds", long_long_integer, + long_long_integer, unsigned, unsigned, SMALXDS); + ADD_NDS32_BUILTIN3 ("v_smalxds", long_long_integer, + long_long_integer, v2hi, v2hi, V_SMALXDS); + ADD_NDS32_BUILTIN3 ("smslda", long_long_integer, + long_long_integer, unsigned, unsigned, SMSLDA); + ADD_NDS32_BUILTIN3 ("v_smslda", long_long_integer, + long_long_integer, v2hi, v2hi, V_SMSLDA); + ADD_NDS32_BUILTIN3 ("smslxda", long_long_integer, + long_long_integer, unsigned, unsigned, SMSLXDA); + ADD_NDS32_BUILTIN3 ("v_smslxda", long_long_integer, + long_long_integer, v2hi, v2hi, V_SMSLXDA); + + /* DSP Extension: augmented baseline. */ + ADD_NDS32_BUILTIN2 ("uclip32", unsigned, integer, unsigned, UCLIP32); + ADD_NDS32_BUILTIN2 ("sclip32", integer, integer, unsigned, SCLIP32); + ADD_NDS32_BUILTIN1 ("kabs", integer, integer, KABS); + + /* DSP Extension: vector type unaligned Load/Store */ + ADD_NDS32_BUILTIN1 ("get_unaligned_u16x2", u_v2hi, ptr_ushort, UALOAD_U16); + ADD_NDS32_BUILTIN1 ("get_unaligned_s16x2", v2hi, ptr_short, UALOAD_S16); + ADD_NDS32_BUILTIN1 ("get_unaligned_u8x4", u_v4qi, ptr_uchar, UALOAD_U8); + ADD_NDS32_BUILTIN1 ("get_unaligned_s8x4", v4qi, ptr_char, UALOAD_S8); + ADD_NDS32_BUILTIN2 ("put_unaligned_u16x2", void, ptr_ushort, + u_v2hi, UASTORE_U16); + ADD_NDS32_BUILTIN2 ("put_unaligned_s16x2", void, ptr_short, + v2hi, UASTORE_S16); + ADD_NDS32_BUILTIN2 ("put_unaligned_u8x4", void, ptr_uchar, + u_v4qi, UASTORE_U8); + ADD_NDS32_BUILTIN2 ("put_unaligned_s8x4", void, ptr_char, + v4qi, UASTORE_S8); } diff --git a/gcc/config/nds32/nds32-intrinsic.md b/gcc/config/nds32/nds32-intrinsic.md index 24e7c0b..c70a6fc 100644 --- a/gcc/config/nds32/nds32-intrinsic.md +++ b/gcc/config/nds32/nds32-intrinsic.md @@ -1037,6 +1037,187 @@ (set_attr "length" "4")] ) +;; SATURATION + +(define_insn "unspec_kaddw" + [(set (match_operand:SI 0 "register_operand" "=r") + (ss_plus:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "register_operand" "r")))] + "" + "kaddw\t%0, %1, %2" + [(set_attr "type" "alu") + (set_attr "length" "4")] +) + +(define_insn "unspec_ksubw" + [(set (match_operand:SI 0 "register_operand" "=r") + (ss_minus:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "register_operand" "r")))] + "" + "ksubw\t%0, %1, %2" + [(set_attr "type" "alu") + (set_attr "length" "4")] +) + +(define_insn "unspec_kaddh" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "register_operand" "r")] UNSPEC_KADDH))] + "" + "kaddh\t%0, %1, %2" + [(set_attr "type" "alu") + (set_attr "length" "4")] +) + +(define_insn "unspec_ksubh" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "register_operand" "r")] UNSPEC_KSUBH))] + "" + "ksubh\t%0, %1, %2" + [(set_attr "type" "alu") + (set_attr "length" "4")] +) + +(define_insn "unspec_kaddh_dsp" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(plus:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "register_operand" "r")) + (const_int 15)] UNSPEC_CLIPS))] + "NDS32_EXT_DSP_P ()" + "kaddh\t%0, %1, %2" + [(set_attr "type" "alu") + (set_attr "length" "4")] +) + +(define_insn "unspec_ksubh_dsp" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(minus:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "register_operand" "r")) + (const_int 15)] UNSPEC_CLIPS))] + "NDS32_EXT_DSP_P ()" + "ksubh\t%0, %1, %2" + [(set_attr "type" "alu") + (set_attr "length" "4")] +) + +(define_insn "unspec_kdmbb" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (unspec:V2HI [(match_operand:V2HI 1 "register_operand" "r") + (match_operand:V2HI 2 "register_operand" "r")] UNSPEC_KDMBB))] + "" + "kdmbb\t%0, %1, %2" + [(set_attr "type" "mul") + (set_attr "length" "4")] +) + +(define_insn "unspec_kdmbt" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (unspec:V2HI [(match_operand:V2HI 1 "register_operand" "r") + (match_operand:V2HI 2 "register_operand" "r")] UNSPEC_KDMBT))] + "" + "kdmbt\t%0, %1, %2" + [(set_attr "type" "mul") + (set_attr "length" "4")] +) + +(define_insn "unspec_kdmtb" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (unspec:V2HI [(match_operand:V2HI 1 "register_operand" "r") + (match_operand:V2HI 2 "register_operand" "r")] UNSPEC_KDMTB))] + "" + "kdmtb\t%0, %1, %2" + [(set_attr "type" "mul") + (set_attr "length" "4")] +) + +(define_insn "unspec_kdmtt" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (unspec:V2HI [(match_operand:V2HI 1 "register_operand" "r") + (match_operand:V2HI 2 "register_operand" "r")] UNSPEC_KDMTT))] + "" + "kdmtt\t%0, %1, %2" + [(set_attr "type" "mul") + (set_attr "length" "4")] +) + +(define_insn "unspec_khmbb" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (unspec:V2HI [(match_operand:V2HI 1 "register_operand" "r") + (match_operand:V2HI 2 "register_operand" "r")] UNSPEC_KHMBB))] + "" + "khmbb\t%0, %1, %2" + [(set_attr "type" "mul") + (set_attr "length" "4")] +) + +(define_insn "unspec_khmbt" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (unspec:V2HI [(match_operand:V2HI 1 "register_operand" "r") + (match_operand:V2HI 2 "register_operand" "r")] UNSPEC_KHMBT))] + "" + "khmbt\t%0, %1, %2" + [(set_attr "type" "mul") + (set_attr "length" "4")] +) + +(define_insn "unspec_khmtb" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (unspec:V2HI [(match_operand:V2HI 1 "register_operand" "r") + (match_operand:V2HI 2 "register_operand" "r")] UNSPEC_KHMTB))] + "" + "khmtb\t%0, %1, %2" + [(set_attr "type" "mul") + (set_attr "length" "4")] +) + +(define_insn "unspec_khmtt" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (unspec:V2HI [(match_operand:V2HI 1 "register_operand" "r") + (match_operand:V2HI 2 "register_operand" "r")] UNSPEC_KHMTT))] + "" + "khmtt\t%0, %1, %2" + [(set_attr "type" "mul") + (set_attr "length" "4")] +) + +(define_insn "unspec_kslraw" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "register_operand" "r")] UNSPEC_KSLRAW))] + "" + "kslraw\t%0, %1, %2" + [(set_attr "type" "alu") + (set_attr "length" "4")] +) + +(define_insn "unspec_kslrawu" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "register_operand" "r")] UNSPEC_KSLRAWU))] + "" + "kslraw.u\t%0, %1, %2" + [(set_attr "type" "alu") + (set_attr "length" "4")] +) + +(define_insn "unspec_volatile_rdov" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_RDOV))] + "" + "rdov\t%0" + [(set_attr "type" "misc") + (set_attr "length" "4")] +) + +(define_insn "unspec_volatile_clrov" + [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_CLROV)] + "" + "clrov" + [(set_attr "type" "misc") + (set_attr "length" "4")] +) + ;; System (define_insn "unspec_sva" @@ -1415,22 +1596,17 @@ if (TARGET_ISA_V3M) nds32_expand_unaligned_store (operands, DImode); else - emit_insn (gen_unaligned_store_dw (operands[0], operands[1])); + emit_insn (gen_unaligned_store_dw (gen_rtx_MEM (DImode, operands[0]), + operands[1])); DONE; }) (define_insn "unaligned_store_dw" - [(set (mem:DI (match_operand:SI 0 "register_operand" "r")) - (unspec:DI [(match_operand:DI 1 "register_operand" "r")] UNSPEC_UASTORE_DW))] + [(set (match_operand:DI 0 "nds32_lmw_smw_base_operand" "=Umw") + (unspec:DI [(match_operand:DI 1 "register_operand" " r")] UNSPEC_UASTORE_DW))] "" { - rtx otherops[3]; - otherops[0] = gen_rtx_REG (SImode, REGNO (operands[1])); - otherops[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1); - otherops[2] = operands[0]; - - output_asm_insn ("smw.bi\t%0, [%2], %1, 0", otherops); - return ""; + return nds32_output_smw_double_word (operands); } [(set_attr "type" "store") (set_attr "length" "4")] @@ -1495,4 +1671,15 @@ DONE; }) +;; abs alias kabs + +(define_insn "unspec_kabs" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand:SI 1 "register_operand" "r")] UNSPEC_KABS))] + "" + "kabs\t%0, %1" + [(set_attr "type" "alu") + (set_attr "length" "4")] +) + ;; ------------------------------------------------------------------------ diff --git a/gcc/config/nds32/nds32-isr.c b/gcc/config/nds32/nds32-isr.c index 2c3aac7..db67a0e 100644 --- a/gcc/config/nds32/nds32-isr.c +++ b/gcc/config/nds32/nds32-isr.c @@ -43,7 +43,260 @@ We use an array to record essential information for each vector. */ static struct nds32_isr_info nds32_isr_vectors[NDS32_N_ISR_VECTORS]; -/* ------------------------------------------------------------------------ */ +/* ------------------------------------------------------------- */ +/* FIXME: + FOR BACKWARD COMPATIBILITY, we need to support following patterns: + + __attribute__((interrupt("XXX;YYY;id=ZZZ"))) + __attribute__((exception("XXX;YYY;id=ZZZ"))) + __attribute__((reset("vectors=XXX;nmi_func=YYY;warm_func=ZZZ"))) + + We provide several functions to parse the strings. */ + +static void +nds32_interrupt_attribute_parse_string (const char *original_str, + const char *func_name, + unsigned int s_level) +{ + char target_str[100]; + enum nds32_isr_save_reg save_reg; + enum nds32_isr_nested_type nested_type; + + char *save_all_regs_str, *save_caller_regs_str; + char *nested_str, *not_nested_str, *ready_nested_str, *critical_str; + char *id_str, *value_str; + + /* Copy original string into a character array so that + the string APIs can handle it. */ + strcpy (target_str, original_str); + + /* 1. Detect 'save_all_regs' : NDS32_SAVE_ALL + 'save_caller_regs' : NDS32_PARTIAL_SAVE */ + save_all_regs_str = strstr (target_str, "save_all_regs"); + save_caller_regs_str = strstr (target_str, "save_caller_regs"); + + /* Note that if no argument is found, + use NDS32_PARTIAL_SAVE by default. */ + if (save_all_regs_str) + save_reg = NDS32_SAVE_ALL; + else if (save_caller_regs_str) + save_reg = NDS32_PARTIAL_SAVE; + else + save_reg = NDS32_PARTIAL_SAVE; + + /* 2. Detect 'nested' : NDS32_NESTED + 'not_nested' : NDS32_NOT_NESTED + 'ready_nested' : NDS32_NESTED_READY + 'critical' : NDS32_CRITICAL */ + nested_str = strstr (target_str, "nested"); + not_nested_str = strstr (target_str, "not_nested"); + ready_nested_str = strstr (target_str, "ready_nested"); + critical_str = strstr (target_str, "critical"); + + /* Note that if no argument is found, + use NDS32_NOT_NESTED by default. + Also, since 'not_nested' and 'ready_nested' both contains + 'nested' string, we check 'nested' with lowest priority. */ + if (not_nested_str) + nested_type = NDS32_NOT_NESTED; + else if (ready_nested_str) + nested_type = NDS32_NESTED_READY; + else if (nested_str) + nested_type = NDS32_NESTED; + else if (critical_str) + nested_type = NDS32_CRITICAL; + else + nested_type = NDS32_NOT_NESTED; + + /* 3. Traverse each id value and set corresponding information. */ + id_str = strstr (target_str, "id="); + + /* If user forgets to assign 'id', issue an error message. */ + if (id_str == NULL) + error ("require id argument in the string"); + /* Extract the value_str first. */ + id_str = strtok (id_str, "="); + value_str = strtok (NULL, ";"); + + /* Pick up the first id value token. */ + value_str = strtok (value_str, ","); + while (value_str != NULL) + { + int i; + i = atoi (value_str); + + /* For interrupt(0..63), the actual vector number is (9..72). */ + i = i + 9; + if (i < 9 || i > 72) + error ("invalid id value for interrupt attribute"); + + /* Setup nds32_isr_vectors[] array. */ + nds32_isr_vectors[i].category = NDS32_ISR_INTERRUPT; + strcpy (nds32_isr_vectors[i].func_name, func_name); + nds32_isr_vectors[i].save_reg = save_reg; + nds32_isr_vectors[i].nested_type = nested_type; + nds32_isr_vectors[i].security_level = s_level; + + /* Fetch next token. */ + value_str = strtok (NULL, ","); + } + + return; +} + +static void +nds32_exception_attribute_parse_string (const char *original_str, + const char *func_name, + unsigned int s_level) +{ + char target_str[100]; + enum nds32_isr_save_reg save_reg; + enum nds32_isr_nested_type nested_type; + + char *save_all_regs_str, *save_caller_regs_str; + char *nested_str, *not_nested_str, *ready_nested_str, *critical_str; + char *id_str, *value_str; + + /* Copy original string into a character array so that + the string APIs can handle it. */ + strcpy (target_str, original_str); + + /* 1. Detect 'save_all_regs' : NDS32_SAVE_ALL + 'save_caller_regs' : NDS32_PARTIAL_SAVE */ + save_all_regs_str = strstr (target_str, "save_all_regs"); + save_caller_regs_str = strstr (target_str, "save_caller_regs"); + + /* Note that if no argument is found, + use NDS32_PARTIAL_SAVE by default. */ + if (save_all_regs_str) + save_reg = NDS32_SAVE_ALL; + else if (save_caller_regs_str) + save_reg = NDS32_PARTIAL_SAVE; + else + save_reg = NDS32_PARTIAL_SAVE; + + /* 2. Detect 'nested' : NDS32_NESTED + 'not_nested' : NDS32_NOT_NESTED + 'ready_nested' : NDS32_NESTED_READY + 'critical' : NDS32_CRITICAL */ + nested_str = strstr (target_str, "nested"); + not_nested_str = strstr (target_str, "not_nested"); + ready_nested_str = strstr (target_str, "ready_nested"); + critical_str = strstr (target_str, "critical"); + + /* Note that if no argument is found, + use NDS32_NOT_NESTED by default. + Also, since 'not_nested' and 'ready_nested' both contains + 'nested' string, we check 'nested' with lowest priority. */ + if (not_nested_str) + nested_type = NDS32_NOT_NESTED; + else if (ready_nested_str) + nested_type = NDS32_NESTED_READY; + else if (nested_str) + nested_type = NDS32_NESTED; + else if (critical_str) + nested_type = NDS32_CRITICAL; + else + nested_type = NDS32_NOT_NESTED; + + /* 3. Traverse each id value and set corresponding information. */ + id_str = strstr (target_str, "id="); + + /* If user forgets to assign 'id', issue an error message. */ + if (id_str == NULL) + error ("require id argument in the string"); + /* Extract the value_str first. */ + id_str = strtok (id_str, "="); + value_str = strtok (NULL, ";"); + + /* Pick up the first id value token. */ + value_str = strtok (value_str, ","); + while (value_str != NULL) + { + int i; + i = atoi (value_str); + + /* For exception(1..8), the actual vector number is (1..8). */ + if (i < 1 || i > 8) + error ("invalid id value for exception attribute"); + + /* Setup nds32_isr_vectors[] array. */ + nds32_isr_vectors[i].category = NDS32_ISR_EXCEPTION; + strcpy (nds32_isr_vectors[i].func_name, func_name); + nds32_isr_vectors[i].save_reg = save_reg; + nds32_isr_vectors[i].nested_type = nested_type; + nds32_isr_vectors[i].security_level = s_level; + + /* Fetch next token. */ + value_str = strtok (NULL, ","); + } + + return; +} + +static void +nds32_reset_attribute_parse_string (const char *original_str, + const char *func_name) +{ + char target_str[100]; + char *vectors_str, *nmi_str, *warm_str, *value_str; + + /* Deal with reset attribute. Its vector number is always 0. */ + nds32_isr_vectors[0].category = NDS32_ISR_RESET; + + + /* 1. Parse 'vectors=XXXX'. */ + + /* Copy original string into a character array so that + the string APIs can handle it. */ + strcpy (target_str, original_str); + vectors_str = strstr (target_str, "vectors="); + /* The total vectors = interrupt + exception numbers + reset. + There are 8 exception and 1 reset in nds32 architecture. + If user forgets to assign 'vectors', user default 16 interrupts. */ + if (vectors_str != NULL) + { + /* Extract the value_str. */ + vectors_str = strtok (vectors_str, "="); + value_str = strtok (NULL, ";"); + nds32_isr_vectors[0].total_n_vectors = atoi (value_str) + 8 + 1; + } + else + nds32_isr_vectors[0].total_n_vectors = 16 + 8 + 1; + strcpy (nds32_isr_vectors[0].func_name, func_name); + + + /* 2. Parse 'nmi_func=YYYY'. */ + + /* Copy original string into a character array so that + the string APIs can handle it. */ + strcpy (target_str, original_str); + nmi_str = strstr (target_str, "nmi_func="); + if (nmi_str != NULL) + { + /* Extract the value_str. */ + nmi_str = strtok (nmi_str, "="); + value_str = strtok (NULL, ";"); + strcpy (nds32_isr_vectors[0].nmi_name, value_str); + } + + /* 3. Parse 'warm_func=ZZZZ'. */ + + /* Copy original string into a character array so that + the string APIs can handle it. */ + strcpy (target_str, original_str); + warm_str = strstr (target_str, "warm_func="); + if (warm_str != NULL) + { + /* Extract the value_str. */ + warm_str = strtok (warm_str, "="); + value_str = strtok (NULL, ";"); + strcpy (nds32_isr_vectors[0].warm_name, value_str); + } + + return; +} +/* ------------------------------------------------------------- */ /* A helper function to emit section head template. */ static void @@ -79,6 +332,15 @@ nds32_emit_isr_jmptbl_section (int vector_id) char section_name[100]; char symbol_name[100]; + /* A critical isr does not need jump table section because + its behavior is not performed by two-level handler. */ + if (nds32_isr_vectors[vector_id].nested_type == NDS32_CRITICAL) + { + fprintf (asm_out_file, "\t! The vector %02d is a critical isr !\n", + vector_id); + return; + } + /* Prepare jmptbl section and symbol name. */ snprintf (section_name, sizeof (section_name), ".nds32_jmptbl.%02d", vector_id); @@ -99,7 +361,6 @@ nds32_emit_isr_vector_section (int vector_id) const char *c_str = "CATEGORY"; const char *sr_str = "SR"; const char *nt_str = "NT"; - const char *vs_str = "VS"; char first_level_handler_name[100]; char section_name[100]; char symbol_name[100]; @@ -147,30 +408,47 @@ nds32_emit_isr_vector_section (int vector_id) case NDS32_NESTED_READY: nt_str = "nr"; break; + case NDS32_CRITICAL: + /* The critical isr is not performed by two-level handler. */ + nt_str = ""; + break; } - /* Currently we have 4-byte or 16-byte size for each vector. - If it is 4-byte, the first level handler name has suffix string "_4b". */ - vs_str = (nds32_isr_vector_size == 4) ? "_4b" : ""; - /* Now we can create first level handler name. */ - snprintf (first_level_handler_name, sizeof (first_level_handler_name), - "_nds32_%s_%s_%s%s", c_str, sr_str, nt_str, vs_str); + if (nds32_isr_vectors[vector_id].security_level == 0) + { + /* For security level 0, use normal first level handler name. */ + snprintf (first_level_handler_name, sizeof (first_level_handler_name), + "_nds32_%s_%s_%s", c_str, sr_str, nt_str); + } + else + { + /* For security level 1-3, use corresponding spl_1, spl_2, or spl_3. */ + snprintf (first_level_handler_name, sizeof (first_level_handler_name), + "_nds32_spl_%d", nds32_isr_vectors[vector_id].security_level); + } /* Prepare vector section and symbol name. */ snprintf (section_name, sizeof (section_name), ".nds32_vector.%02d", vector_id); snprintf (symbol_name, sizeof (symbol_name), - "_nds32_vector_%02d%s", vector_id, vs_str); + "_nds32_vector_%02d", vector_id); /* Everything is ready. We can start emit vector section content. */ nds32_emit_section_head_template (section_name, symbol_name, floor_log2 (nds32_isr_vector_size), false); - /* According to the vector size, the instructions in the - vector section may be different. */ - if (nds32_isr_vector_size == 4) + /* First we check if it is a critical isr. + If so, jump to user handler directly; otherwise, the instructions + in the vector section may be different according to the vector size. */ + if (nds32_isr_vectors[vector_id].nested_type == NDS32_CRITICAL) + { + /* This block is for critical isr. Jump to user handler directly. */ + fprintf (asm_out_file, "\tj\t%s ! jump to user handler directly\n", + nds32_isr_vectors[vector_id].func_name); + } + else if (nds32_isr_vector_size == 4) { /* This block is for 4-byte vector size. Hardware $VID support is necessary and only one instruction @@ -239,13 +517,11 @@ nds32_emit_isr_reset_content (void) { unsigned int i; unsigned int total_n_vectors; - const char *vs_str; char reset_handler_name[100]; char section_name[100]; char symbol_name[100]; total_n_vectors = nds32_isr_vectors[0].total_n_vectors; - vs_str = (nds32_isr_vector_size == 4) ? "_4b" : ""; fprintf (asm_out_file, "\t! RESET HANDLER CONTENT - BEGIN !\n"); @@ -261,7 +537,7 @@ nds32_emit_isr_reset_content (void) /* Emit vector references. */ fprintf (asm_out_file, "\t ! references to vector section entries\n"); for (i = 0; i < total_n_vectors; i++) - fprintf (asm_out_file, "\t.word\t_nds32_vector_%02d%s\n", i, vs_str); + fprintf (asm_out_file, "\t.word\t_nds32_vector_%02d\n", i); /* Emit jmptbl_00 section. */ snprintf (section_name, sizeof (section_name), ".nds32_jmptbl.00"); @@ -275,9 +551,9 @@ nds32_emit_isr_reset_content (void) /* Emit vector_00 section. */ snprintf (section_name, sizeof (section_name), ".nds32_vector.00"); - snprintf (symbol_name, sizeof (symbol_name), "_nds32_vector_00%s", vs_str); + snprintf (symbol_name, sizeof (symbol_name), "_nds32_vector_00"); snprintf (reset_handler_name, sizeof (reset_handler_name), - "_nds32_reset%s", vs_str); + "_nds32_reset"); fprintf (asm_out_file, "\t! ....................................\n"); nds32_emit_section_head_template (section_name, symbol_name, @@ -323,12 +599,12 @@ void nds32_check_isr_attrs_conflict (tree func_decl, tree func_attrs) { int save_all_p, partial_save_p; - int nested_p, not_nested_p, nested_ready_p; + int nested_p, not_nested_p, nested_ready_p, critical_p; int intr_p, excp_p, reset_p; /* Initialize variables. */ save_all_p = partial_save_p = 0; - nested_p = not_nested_p = nested_ready_p = 0; + nested_p = not_nested_p = nested_ready_p = critical_p = 0; intr_p = excp_p = reset_p = 0; /* We must check at MOST one attribute to set save-reg. */ @@ -347,8 +623,10 @@ nds32_check_isr_attrs_conflict (tree func_decl, tree func_attrs) not_nested_p = 1; if (lookup_attribute ("nested_ready", func_attrs)) nested_ready_p = 1; + if (lookup_attribute ("critical", func_attrs)) + critical_p = 1; - if ((nested_p + not_nested_p + nested_ready_p) > 1) + if ((nested_p + not_nested_p + nested_ready_p + critical_p) > 1) error ("multiple nested types attributes to function %qD", func_decl); /* We must check at MOST one attribute to @@ -362,6 +640,17 @@ nds32_check_isr_attrs_conflict (tree func_decl, tree func_attrs) if ((intr_p + excp_p + reset_p) > 1) error ("multiple interrupt attributes to function %qD", func_decl); + + /* Do not allow isr attributes under linux toolchain. */ + if (TARGET_LINUX_ABI && intr_p) + error ("cannot use interrupt attributes to function %qD " + "under linux toolchain", func_decl); + if (TARGET_LINUX_ABI && excp_p) + error ("cannot use exception attributes to function %qD " + "under linux toolchain", func_decl); + if (TARGET_LINUX_ABI && reset_p) + error ("cannot use reset attributes to function %qD " + "under linux toolchain", func_decl); } /* Function to construct isr vectors information array. @@ -373,15 +662,21 @@ nds32_construct_isr_vectors_information (tree func_attrs, const char *func_name) { tree save_all, partial_save; - tree nested, not_nested, nested_ready; + tree nested, not_nested, nested_ready, critical; tree intr, excp, reset; + tree secure; + tree security_level_list; + tree security_level; + unsigned int s_level; + save_all = lookup_attribute ("save_all", func_attrs); partial_save = lookup_attribute ("partial_save", func_attrs); nested = lookup_attribute ("nested", func_attrs); not_nested = lookup_attribute ("not_nested", func_attrs); nested_ready = lookup_attribute ("nested_ready", func_attrs); + critical = lookup_attribute ("critical", func_attrs); intr = lookup_attribute ("interrupt", func_attrs); excp = lookup_attribute ("exception", func_attrs); @@ -391,6 +686,63 @@ nds32_construct_isr_vectors_information (tree func_attrs, if (!intr && !excp && !reset) return; + /* At first, we need to retrieve security level. */ + secure = lookup_attribute ("secure", func_attrs); + if (secure != NULL) + { + security_level_list = TREE_VALUE (secure); + security_level = TREE_VALUE (security_level_list); + s_level = TREE_INT_CST_LOW (security_level); + } + else + { + /* If there is no secure attribute, the security level is set by + nds32_isr_secure_level, which is controlled by -misr-secure=X option. + By default nds32_isr_secure_level should be 0. */ + s_level = nds32_isr_secure_level; + } + + /* ------------------------------------------------------------- */ + /* FIXME: + FOR BACKWARD COMPATIBILITY, we need to support following patterns: + + __attribute__((interrupt("XXX;YYY;id=ZZZ"))) + __attribute__((exception("XXX;YYY;id=ZZZ"))) + __attribute__((reset("vectors=XXX;nmi_func=YYY;warm_func=ZZZ"))) + + If interrupt/exception/reset appears and its argument is a + STRING_CST, we will parse string with some auxiliary functions + which set necessary isr information in the nds32_isr_vectors[] array. + After that, we can return immediately to avoid new-syntax isr + information construction. */ + if (intr != NULL_TREE + && TREE_CODE (TREE_VALUE (TREE_VALUE (intr))) == STRING_CST) + { + tree string_arg = TREE_VALUE (TREE_VALUE (intr)); + nds32_interrupt_attribute_parse_string (TREE_STRING_POINTER (string_arg), + func_name, + s_level); + return; + } + if (excp != NULL_TREE + && TREE_CODE (TREE_VALUE (TREE_VALUE (excp))) == STRING_CST) + { + tree string_arg = TREE_VALUE (TREE_VALUE (excp)); + nds32_exception_attribute_parse_string (TREE_STRING_POINTER (string_arg), + func_name, + s_level); + return; + } + if (reset != NULL_TREE + && TREE_CODE (TREE_VALUE (TREE_VALUE (reset))) == STRING_CST) + { + tree string_arg = TREE_VALUE (TREE_VALUE (reset)); + nds32_reset_attribute_parse_string (TREE_STRING_POINTER (string_arg), + func_name); + return; + } + /* ------------------------------------------------------------- */ + /* If we are here, either we have interrupt/exception, or reset attribute. */ if (intr || excp) @@ -417,6 +769,9 @@ nds32_construct_isr_vectors_information (tree func_attrs, /* Add vector_number_offset to get actual vector number. */ vector_id = TREE_INT_CST_LOW (id) + vector_number_offset; + /* Set security level. */ + nds32_isr_vectors[vector_id].security_level = s_level; + /* Enable corresponding vector and set function name. */ nds32_isr_vectors[vector_id].category = (intr) ? (NDS32_ISR_INTERRUPT) @@ -436,6 +791,8 @@ nds32_construct_isr_vectors_information (tree func_attrs, nds32_isr_vectors[vector_id].nested_type = NDS32_NOT_NESTED; else if (nested_ready) nds32_isr_vectors[vector_id].nested_type = NDS32_NESTED_READY; + else if (critical) + nds32_isr_vectors[vector_id].nested_type = NDS32_CRITICAL; /* Advance to next id. */ id_list = TREE_CHAIN (id_list); @@ -492,7 +849,6 @@ nds32_construct_isr_vectors_information (tree func_attrs, } } -/* A helper function to handle isr stuff at the beginning of asm file. */ void nds32_asm_file_start_for_isr (void) { @@ -505,15 +861,14 @@ nds32_asm_file_start_for_isr (void) strcpy (nds32_isr_vectors[i].func_name, ""); nds32_isr_vectors[i].save_reg = NDS32_PARTIAL_SAVE; nds32_isr_vectors[i].nested_type = NDS32_NOT_NESTED; + nds32_isr_vectors[i].security_level = 0; nds32_isr_vectors[i].total_n_vectors = 0; strcpy (nds32_isr_vectors[i].nmi_name, ""); strcpy (nds32_isr_vectors[i].warm_name, ""); } } -/* A helper function to handle isr stuff at the end of asm file. */ -void -nds32_asm_file_end_for_isr (void) +void nds32_asm_file_end_for_isr (void) { int i; @@ -547,6 +902,8 @@ nds32_asm_file_end_for_isr (void) /* Found one vector which is interupt or exception. Output its jmptbl and vector section content. */ fprintf (asm_out_file, "\t! interrupt/exception vector %02d\n", i); + fprintf (asm_out_file, "\t! security level: %d\n", + nds32_isr_vectors[i].security_level); fprintf (asm_out_file, "\t! ------------------------------------\n"); nds32_emit_isr_jmptbl_section (i); fprintf (asm_out_file, "\t! ....................................\n"); @@ -580,4 +937,65 @@ nds32_isr_function_p (tree func) || (t_reset != NULL_TREE)); } -/* ------------------------------------------------------------------------ */ +/* Return true if FUNC is a isr function with critical attribute. */ +bool +nds32_isr_function_critical_p (tree func) +{ + tree t_intr; + tree t_excp; + tree t_critical; + + tree attrs; + + if (TREE_CODE (func) != FUNCTION_DECL) + abort (); + + attrs = DECL_ATTRIBUTES (func); + + t_intr = lookup_attribute ("interrupt", attrs); + t_excp = lookup_attribute ("exception", attrs); + + t_critical = lookup_attribute ("critical", attrs); + + /* If both interrupt and exception attribute does not appear, + we can return false immediately. */ + if ((t_intr == NULL_TREE) && (t_excp == NULL_TREE)) + return false; + + /* Here we can guarantee either interrupt or ecxception attribute + does exist, so further check critical attribute. + If it also appears, we can return true. */ + if (t_critical != NULL_TREE) + return true; + + /* ------------------------------------------------------------- */ + /* FIXME: + FOR BACKWARD COMPATIBILITY, we need to handle string type. + If the string 'critical' appears in the interrupt/exception + string argument, we can return true. */ + if (t_intr != NULL_TREE || t_excp != NULL_TREE) + { + char target_str[100]; + char *critical_str; + tree t_check; + tree string_arg; + + t_check = t_intr ? t_intr : t_excp; + if (TREE_CODE (TREE_VALUE (TREE_VALUE (t_check))) == STRING_CST) + { + string_arg = TREE_VALUE (TREE_VALUE (t_check)); + strcpy (target_str, TREE_STRING_POINTER (string_arg)); + critical_str = strstr (target_str, "critical"); + + /* Found 'critical' string, so return true. */ + if (critical_str) + return true; + } + } + /* ------------------------------------------------------------- */ + + /* Other cases, this isr function is not critical type. */ + return false; +} + +/* ------------------------------------------------------------- */ diff --git a/gcc/config/nds32/nds32-linux.opt b/gcc/config/nds32/nds32-linux.opt new file mode 100644 index 0000000..75ccd76 --- /dev/null +++ b/gcc/config/nds32/nds32-linux.opt @@ -0,0 +1,16 @@ +mcmodel= +Target RejectNegative Joined Enum(nds32_cmodel_type) Var(nds32_cmodel_option) Init(CMODEL_LARGE) +Specify the address generation strategy for code model. + +Enum +Name(nds32_cmodel_type) Type(enum nds32_cmodel_type) +Known cmodel types (for use with the -mcmodel= option): + +EnumValue +Enum(nds32_cmodel_type) String(small) Value(CMODEL_SMALL) + +EnumValue +Enum(nds32_cmodel_type) String(medium) Value(CMODEL_MEDIUM) + +EnumValue +Enum(nds32_cmodel_type) String(large) Value(CMODEL_LARGE) diff --git a/gcc/config/nds32/nds32-md-auxiliary.c b/gcc/config/nds32/nds32-md-auxiliary.c index 720e85a..3040bde 100644 --- a/gcc/config/nds32/nds32-md-auxiliary.c +++ b/gcc/config/nds32/nds32-md-auxiliary.c @@ -39,6 +39,9 @@ #include "expr.h" #include "emit-rtl.h" #include "explow.h" +#include "stringpool.h" +#include "attribs.h" + /* ------------------------------------------------------------------------ */ @@ -261,6 +264,118 @@ output_cond_branch_compare_zero (int code, const char *suffix, output_asm_insn (pattern, operands); } +static void +nds32_split_shiftrtdi3 (rtx dst, rtx src, rtx shiftamount, bool logic_shift_p) +{ + rtx src_high_part; + rtx dst_high_part, dst_low_part; + + dst_high_part = nds32_di_high_part_subreg (dst); + src_high_part = nds32_di_high_part_subreg (src); + dst_low_part = nds32_di_low_part_subreg (dst); + + if (CONST_INT_P (shiftamount)) + { + if (INTVAL (shiftamount) < 32) + { + if (logic_shift_p) + { + emit_insn (gen_uwext (dst_low_part, src, + shiftamount)); + emit_insn (gen_lshrsi3 (dst_high_part, src_high_part, + shiftamount)); + } + else + { + emit_insn (gen_wext (dst_low_part, src, + shiftamount)); + emit_insn (gen_ashrsi3 (dst_high_part, src_high_part, + shiftamount)); + } + } + else + { + rtx new_shift_amout = gen_int_mode(INTVAL (shiftamount) - 32, SImode); + + if (logic_shift_p) + { + emit_insn (gen_lshrsi3 (dst_low_part, src_high_part, + new_shift_amout)); + emit_move_insn (dst_high_part, const0_rtx); + } + else + { + emit_insn (gen_ashrsi3 (dst_low_part, src_high_part, + new_shift_amout)); + emit_insn (gen_ashrsi3 (dst_high_part, src_high_part, + GEN_INT (31))); + } + } + } + else + { + rtx dst_low_part_l32, dst_high_part_l32; + rtx dst_low_part_g32, dst_high_part_g32; + rtx new_shift_amout, select_reg; + dst_low_part_l32 = gen_reg_rtx (SImode); + dst_high_part_l32 = gen_reg_rtx (SImode); + dst_low_part_g32 = gen_reg_rtx (SImode); + dst_high_part_g32 = gen_reg_rtx (SImode); + new_shift_amout = gen_reg_rtx (SImode); + select_reg = gen_reg_rtx (SImode); + + emit_insn (gen_andsi3 (shiftamount, shiftamount, GEN_INT (0x3f))); + + if (logic_shift_p) + { + /* + if (shiftamount < 32) + dst_low_part = wext (src, shiftamount) + dst_high_part = src_high_part >> shiftamount + else + dst_low_part = src_high_part >> (shiftamount & 0x1f) + dst_high_part = 0 + */ + emit_insn (gen_uwext (dst_low_part_l32, src, shiftamount)); + emit_insn (gen_lshrsi3 (dst_high_part_l32, src_high_part, + shiftamount)); + + emit_insn (gen_andsi3 (new_shift_amout, shiftamount, GEN_INT (0x1f))); + emit_insn (gen_lshrsi3 (dst_low_part_g32, src_high_part, + new_shift_amout)); + emit_move_insn (dst_high_part_g32, const0_rtx); + } + else + { + /* + if (shiftamount < 32) + dst_low_part = wext (src, shiftamount) + dst_high_part = src_high_part >> shiftamount + else + dst_low_part = src_high_part >> (shiftamount & 0x1f) + # shift 31 for sign extend + dst_high_part = src_high_part >> 31 + */ + emit_insn (gen_wext (dst_low_part_l32, src, shiftamount)); + emit_insn (gen_ashrsi3 (dst_high_part_l32, src_high_part, + shiftamount)); + + emit_insn (gen_andsi3 (new_shift_amout, shiftamount, GEN_INT (0x1f))); + emit_insn (gen_ashrsi3 (dst_low_part_g32, src_high_part, + new_shift_amout)); + emit_insn (gen_ashrsi3 (dst_high_part_g32, src_high_part, + GEN_INT (31))); + } + + emit_insn (gen_slt_compare (select_reg, shiftamount, GEN_INT (32))); + + emit_insn (gen_cmovnsi (dst_low_part, select_reg, + dst_low_part_l32, dst_low_part_g32)); + emit_insn (gen_cmovnsi (dst_high_part, select_reg, + dst_high_part_l32, dst_high_part_g32)); + } +} + /* ------------------------------------------------------------------------ */ /* Auxiliary function for expand RTL pattern. */ @@ -1195,8 +1310,166 @@ nds32_emit_v3pop_fpr_callee_saved (int base) } } +enum nds32_expand_result_type +nds32_expand_extv (rtx *operands) +{ + gcc_assert (CONST_INT_P (operands[2]) && CONST_INT_P (operands[3])); + HOST_WIDE_INT width = INTVAL (operands[2]); + HOST_WIDE_INT bitpos = INTVAL (operands[3]); + rtx dst = operands[0]; + rtx src = operands[1]; + + if (MEM_P (src) + && width == 32 + && (bitpos % BITS_PER_UNIT) == 0 + && GET_MODE_BITSIZE (GET_MODE (dst)) == width) + { + rtx newmem = adjust_address (src, GET_MODE (dst), + bitpos / BITS_PER_UNIT); + + rtx base_addr = force_reg (Pmode, XEXP (newmem, 0)); + + emit_insn (gen_unaligned_loadsi (dst, base_addr)); + + return EXPAND_DONE; + } + return EXPAND_FAIL; +} + +enum nds32_expand_result_type +nds32_expand_insv (rtx *operands) +{ + gcc_assert (CONST_INT_P (operands[1]) && CONST_INT_P (operands[2])); + HOST_WIDE_INT width = INTVAL (operands[1]); + HOST_WIDE_INT bitpos = INTVAL (operands[2]); + rtx dst = operands[0]; + rtx src = operands[3]; + + if (MEM_P (dst) + && width == 32 + && (bitpos % BITS_PER_UNIT) == 0 + && GET_MODE_BITSIZE (GET_MODE (src)) == width) + { + rtx newmem = adjust_address (dst, GET_MODE (src), + bitpos / BITS_PER_UNIT); + + rtx base_addr = force_reg (Pmode, XEXP (newmem, 0)); + + emit_insn (gen_unaligned_storesi (base_addr, src)); + + return EXPAND_DONE; + } + return EXPAND_FAIL; +} + /* ------------------------------------------------------------------------ */ +/* Function to generate PC relative jump table. + Refer to nds32.md for more details. + + The following is the sample for the case that diff value + can be presented in '.short' size. + + addi $r1, $r1, -(case_lower_bound) + slti $ta, $r1, (case_number) + beqz $ta, .L_skip_label + + la $ta, .L35 ! get jump table address + lh $r1, [$ta + $r1 << 1] ! load symbol diff from jump table entry + addi $ta, $r1, $ta + jr5 $ta + + ! jump table entry + L35: + .short .L25-.L35 + .short .L26-.L35 + .short .L27-.L35 + .short .L28-.L35 + .short .L29-.L35 + .short .L30-.L35 + .short .L31-.L35 + .short .L32-.L35 + .short .L33-.L35 + .short .L34-.L35 */ +const char * +nds32_output_casesi_pc_relative (rtx *operands) +{ + machine_mode mode; + rtx diff_vec; + + diff_vec = PATTERN (NEXT_INSN (as_a (operands[1]))); + + gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC); + + /* Step C: "t <-- operands[1]". */ + if (flag_pic) + { + output_asm_insn ("sethi\t$ta, hi20(%l1@GOTOFF)", operands); + output_asm_insn ("ori\t$ta, $ta, lo12(%l1@GOTOFF)", operands); + output_asm_insn ("add\t$ta, $ta, $gp", operands); + } + else + output_asm_insn ("la\t$ta, %l1", operands); + + /* Get the mode of each element in the difference vector. */ + mode = GET_MODE (diff_vec); + + /* Step D: "z <-- (mem (plus (operands[0] << m) t))", + where m is 0, 1, or 2 to load address-diff value from table. */ + switch (mode) + { + case E_QImode: + output_asm_insn ("lb\t%2, [$ta + %0 << 0]", operands); + break; + case E_HImode: + output_asm_insn ("lh\t%2, [$ta + %0 << 1]", operands); + break; + case E_SImode: + output_asm_insn ("lw\t%2, [$ta + %0 << 2]", operands); + break; + default: + gcc_unreachable (); + } + + /* Step E: "t <-- z + t". + Add table label_ref with address-diff value to + obtain target case address. */ + output_asm_insn ("add\t$ta, %2, $ta", operands); + + /* Step F: jump to target with register t. */ + if (TARGET_16_BIT) + return "jr5\t$ta"; + else + return "jr\t$ta"; +} + +/* Function to generate normal jump table. */ +const char * +nds32_output_casesi (rtx *operands) +{ + /* Step C: "t <-- operands[1]". */ + if (flag_pic) + { + output_asm_insn ("sethi\t$ta, hi20(%l1@GOTOFF)", operands); + output_asm_insn ("ori\t$ta, $ta, lo12(%l1@GOTOFF)", operands); + output_asm_insn ("add\t$ta, $ta, $gp", operands); + } + else + output_asm_insn ("la\t$ta, %l1", operands); + + /* Step D: "z <-- (mem (plus (operands[0] << 2) t))". */ + output_asm_insn ("lw\t%2, [$ta + %0 << 2]", operands); + + /* No need to perform Step E, which is only used for + pc relative jump table. */ + + /* Step F: jump to target with register z. */ + if (TARGET_16_BIT) + return "jr5\t%2"; + else + return "jr\t%2"; +} + /* Function to return memory format. */ enum nds32_16bit_address_type nds32_mem_format (rtx op) @@ -1757,11 +2030,8 @@ nds32_output_stack_push (rtx par_rtx) /* If we step here, we are going to do v3push or multiple push operation. */ - /* The v3push/v3pop instruction should only be applied on - none-isr and none-variadic function. */ - if (TARGET_V3PUSH - && !nds32_isr_function_p (current_function_decl) - && (cfun->machine->va_args_size == 0)) + /* Refer to nds32.h, where we comment when push25/pop25 are available. */ + if (NDS32_V3PUSH_AVAILABLE_P) { /* For stack v3push: operands[0]: Re @@ -1881,11 +2151,8 @@ nds32_output_stack_pop (rtx par_rtx ATTRIBUTE_UNUSED) /* If we step here, we are going to do v3pop or multiple pop operation. */ - /* The v3push/v3pop instruction should only be applied on - none-isr and none-variadic function. */ - if (TARGET_V3PUSH - && !nds32_isr_function_p (current_function_decl) - && (cfun->machine->va_args_size == 0)) + /* Refer to nds32.h, where we comment when push25/pop25 are available. */ + if (NDS32_V3PUSH_AVAILABLE_P) { /* For stack v3pop: operands[0]: Re @@ -2022,77 +2289,6 @@ nds32_output_return (void) return ""; } -/* Function to generate PC relative jump table. - Refer to nds32.md for more details. - - The following is the sample for the case that diff value - can be presented in '.short' size. - - addi $r1, $r1, -(case_lower_bound) - slti $ta, $r1, (case_number) - beqz $ta, .L_skip_label - - la $ta, .L35 ! get jump table address - lh $r1, [$ta + $r1 << 1] ! load symbol diff from jump table entry - addi $ta, $r1, $ta - jr5 $ta - - ! jump table entry - L35: - .short .L25-.L35 - .short .L26-.L35 - .short .L27-.L35 - .short .L28-.L35 - .short .L29-.L35 - .short .L30-.L35 - .short .L31-.L35 - .short .L32-.L35 - .short .L33-.L35 - .short .L34-.L35 */ -const char * -nds32_output_casesi_pc_relative (rtx *operands) -{ - machine_mode mode; - rtx diff_vec; - - diff_vec = PATTERN (NEXT_INSN (as_a (operands[1]))); - - gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC); - - /* Step C: "t <-- operands[1]". */ - output_asm_insn ("la\t$ta, %l1", operands); - - /* Get the mode of each element in the difference vector. */ - mode = GET_MODE (diff_vec); - - /* Step D: "z <-- (mem (plus (operands[0] << m) t))", - where m is 0, 1, or 2 to load address-diff value from table. */ - switch (mode) - { - case E_QImode: - output_asm_insn ("lb\t%2, [$ta + %0 << 0]", operands); - break; - case E_HImode: - output_asm_insn ("lh\t%2, [$ta + %0 << 1]", operands); - break; - case E_SImode: - output_asm_insn ("lw\t%2, [$ta + %0 << 2]", operands); - break; - default: - gcc_unreachable (); - } - - /* Step E: "t <-- z + t". - Add table label_ref with address-diff value to - obtain target case address. */ - output_asm_insn ("add\t$ta, %2, $ta", operands); - - /* Step F: jump to target with register t. */ - if (TARGET_16_BIT) - return "jr5\t$ta"; - else - return "jr\t$ta"; -} /* output a float load instruction */ const char * @@ -2250,52 +2446,51 @@ nds32_output_float_store (rtx *operands) return ""; } -/* Function to generate normal jump table. */ const char * -nds32_output_casesi (rtx *operands) +nds32_output_smw_single_word (rtx *operands) { - /* Step C: "t <-- operands[1]". */ - output_asm_insn ("la\t$ta, %l1", operands); - - /* Step D: "z <-- (mem (plus (operands[0] << 2) t))". */ - output_asm_insn ("lw\t%2, [$ta + %0 << 2]", operands); - - /* No need to perform Step E, which is only used for - pc relative jump table. */ + char buff[100]; + unsigned regno; + int enable4; + bool update_base_p; + rtx base_addr = operands[0]; + rtx base_reg; + rtx otherops[2]; - /* Step F: jump to target with register z. */ - if (TARGET_16_BIT) - return "jr5\t%2"; + if (REG_P (XEXP (base_addr, 0))) + { + update_base_p = false; + base_reg = XEXP (base_addr, 0); + } else - return "jr\t%2"; -} + { + update_base_p = true; + base_reg = XEXP (XEXP (base_addr, 0), 0); + } -/* Auxiliary functions for lwm/smw. */ -bool -nds32_valid_smw_lwm_base_p (rtx op) -{ - rtx base_addr; + const char *update_base = update_base_p ? "m" : ""; - if (!MEM_P (op)) - return false; + regno = REGNO (operands[1]); - base_addr = XEXP (op, 0); + otherops[0] = base_reg; + otherops[1] = operands[1]; - if (REG_P (base_addr)) - return true; + if (regno >= 28) + { + enable4 = nds32_regno_to_enable4 (regno); + sprintf (buff, "smw.bi%s\t$sp, [%%0], $sp, %x", update_base, enable4); + } else { - if (GET_CODE (base_addr) == POST_INC - && REG_P (XEXP (base_addr, 0))) - return true; + sprintf (buff, "smw.bi%s\t%%1, [%%0], %%1", update_base); } - - return false; + output_asm_insn (buff, otherops); + return ""; } /* ------------------------------------------------------------------------ */ const char * -nds32_output_smw_single_word (rtx *operands) +nds32_output_smw_double_word (rtx *operands) { char buff[100]; unsigned regno; @@ -2303,7 +2498,7 @@ nds32_output_smw_single_word (rtx *operands) bool update_base_p; rtx base_addr = operands[0]; rtx base_reg; - rtx otherops[2]; + rtx otherops[3]; if (REG_P (XEXP (base_addr, 0))) { @@ -2322,15 +2517,22 @@ nds32_output_smw_single_word (rtx *operands) otherops[0] = base_reg; otherops[1] = operands[1]; + otherops[2] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);; if (regno >= 28) { - enable4 = nds32_regno_to_enable4 (regno); + enable4 = nds32_regno_to_enable4 (regno) + | nds32_regno_to_enable4 (regno + 1); sprintf (buff, "smw.bi%s\t$sp, [%%0], $sp, %x", update_base, enable4); } + else if (regno == 27) + { + enable4 = nds32_regno_to_enable4 (regno + 1); + sprintf (buff, "smw.bi%s\t%%1, [%%0], %%1, %x", update_base, enable4); + } else { - sprintf (buff, "smw.bi%s\t%%1, [%%0], %%1", update_base); + sprintf (buff, "smw.bi%s\t%%1, [%%0], %%2", update_base); } output_asm_insn (buff, otherops); return ""; @@ -2415,16 +2617,17 @@ nds32_expand_unaligned_load (rtx *operands, enum machine_mode mode) if (mode == DImode) { /* Load doubleword, we need two registers to access. */ - reg[0] = simplify_gen_subreg (SImode, operands[0], - GET_MODE (operands[0]), 0); - reg[1] = simplify_gen_subreg (SImode, operands[0], - GET_MODE (operands[0]), 4); + reg[0] = nds32_di_low_part_subreg (operands[0]); + reg[1] = nds32_di_high_part_subreg (operands[0]); /* A register only store 4 byte. */ width = GET_MODE_SIZE (SImode) - 1; } else { - reg[0] = operands[0]; + if (VECTOR_MODE_P (mode)) + reg[0] = gen_reg_rtx (SImode); + else + reg[0] = operands[0]; } for (num_reg = (mode == DImode) ? 2 : 1; num_reg > 0; num_reg--) @@ -2466,6 +2669,8 @@ nds32_expand_unaligned_load (rtx *operands, enum machine_mode mode) offset = offset + offset_adj; } } + if (VECTOR_MODE_P (mode)) + convert_move (operands[0], reg[0], false); } void @@ -2499,16 +2704,20 @@ nds32_expand_unaligned_store (rtx *operands, enum machine_mode mode) if (mode == DImode) { /* Load doubleword, we need two registers to access. */ - reg[0] = simplify_gen_subreg (SImode, operands[1], - GET_MODE (operands[1]), 0); - reg[1] = simplify_gen_subreg (SImode, operands[1], - GET_MODE (operands[1]), 4); + reg[0] = nds32_di_low_part_subreg (operands[1]); + reg[1] = nds32_di_high_part_subreg (operands[1]); /* A register only store 4 byte. */ width = GET_MODE_SIZE (SImode) - 1; } else { - reg[0] = operands[1]; + if (VECTOR_MODE_P (mode)) + { + reg[0] = gen_reg_rtx (SImode); + convert_move (reg[0], operands[1], false); + } + else + reg[0] = operands[1]; } for (num_reg = (mode == DImode) ? 2 : 1; num_reg > 0; num_reg--) @@ -2765,6 +2974,36 @@ nds32_output_cbranchsi4_greater_less_zero (rtx_insn *insn, rtx *operands) return ""; } +const char * +nds32_output_unpkd8 (rtx output, rtx input, + rtx high_idx_rtx, rtx low_idx_rtx, + bool signed_p) +{ + char pattern[100]; + rtx output_operands[2]; + HOST_WIDE_INT high_idx, low_idx; + high_idx = INTVAL (high_idx_rtx); + low_idx = INTVAL (low_idx_rtx); + + gcc_assert (high_idx >= 0 && high_idx <= 3); + gcc_assert (low_idx >= 0 && low_idx <= 3); + + /* We only have 10, 20, 30 and 31. */ + if ((low_idx != 0 || high_idx == 0) && + !(low_idx == 1 && high_idx == 3)) + return "#"; + + char sign_char = signed_p ? 's' : 'z'; + + sprintf (pattern, + "%cunpkd8" HOST_WIDE_INT_PRINT_DEC HOST_WIDE_INT_PRINT_DEC "\t%%0, %%1", + sign_char, high_idx, low_idx); + output_operands[0] = output; + output_operands[1] = input; + output_asm_insn (pattern, output_operands); + return ""; +} + /* Return true if SYMBOL_REF X binds locally. */ static bool @@ -2782,22 +3021,15 @@ nds32_output_call (rtx insn, rtx *operands, rtx symbol, const char *long_call, char pattern[100]; bool noreturn_p; - if (GET_CODE (symbol) == CONST) - { - symbol= XEXP (symbol, 0); - - if (GET_CODE (symbol) == PLUS) - symbol = XEXP (symbol, 0); - } - - gcc_assert (GET_CODE (symbol) == SYMBOL_REF - || REG_P (symbol)); - if (nds32_long_call_p (symbol)) strcpy (pattern, long_call); else strcpy (pattern, call); + if (flag_pic && CONSTANT_P (symbol) + && !nds32_symbol_binds_local_p (symbol)) + strcat (pattern, "@PLT"); + if (align_p) strcat (pattern, "\n\t.align 2"); @@ -2815,6 +3047,91 @@ nds32_output_call (rtx insn, rtx *operands, rtx symbol, const char *long_call, return ""; } +bool +nds32_need_split_sms_p (rtx in0_idx0, rtx in1_idx0, + rtx in0_idx1, rtx in1_idx1) +{ + /* smds or smdrs. */ + if (INTVAL (in0_idx0) == INTVAL (in1_idx0) + && INTVAL (in0_idx1) == INTVAL (in1_idx1) + && INTVAL (in0_idx0) != INTVAL (in0_idx1)) + return false; + + /* smxds. */ + if (INTVAL (in0_idx0) != INTVAL (in0_idx1) + && INTVAL (in1_idx0) != INTVAL (in1_idx1)) + return false; + + return true; +} + +const char * +nds32_output_sms (rtx in0_idx0, rtx in1_idx0, + rtx in0_idx1, rtx in1_idx1) +{ + if (nds32_need_split_sms_p (in0_idx0, in1_idx0, + in0_idx1, in1_idx1)) + return "#"; + /* out = in0[in0_idx0] * in1[in1_idx0] - in0[in0_idx1] * in1[in1_idx1] */ + + /* smds or smdrs. */ + if (INTVAL (in0_idx0) == INTVAL (in1_idx0) + && INTVAL (in0_idx1) == INTVAL (in1_idx1) + && INTVAL (in0_idx0) != INTVAL (in0_idx1)) + { + if (INTVAL (in0_idx0) == 0) + { + if (TARGET_BIG_ENDIAN) + return "smds\t%0, %1, %2"; + else + return "smdrs\t%0, %1, %2"; + } + else + { + if (TARGET_BIG_ENDIAN) + return "smdrs\t%0, %1, %2"; + else + return "smds\t%0, %1, %2"; + } + } + + if (INTVAL (in0_idx0) != INTVAL (in0_idx1) + && INTVAL (in1_idx0) != INTVAL (in1_idx1)) + { + if (INTVAL (in0_idx0) == 1) + { + if (TARGET_BIG_ENDIAN) + return "smxds\t%0, %2, %1"; + else + return "smxds\t%0, %1, %2"; + } + else + { + if (TARGET_BIG_ENDIAN) + return "smxds\t%0, %1, %2"; + else + return "smxds\t%0, %2, %1"; + } + } + + gcc_unreachable (); + return ""; +} + +void +nds32_split_sms (rtx out, rtx in0, rtx in1, + rtx in0_idx0, rtx in1_idx0, + rtx in0_idx1, rtx in1_idx1) +{ + rtx result0 = gen_reg_rtx (SImode); + rtx result1 = gen_reg_rtx (SImode); + emit_insn (gen_mulhisi3v (result0, in0, in1, + in0_idx0, in1_idx0)); + emit_insn (gen_mulhisi3v (result1, in0, in1, + in0_idx1, in1_idx1)); + emit_insn (gen_subsi3 (out, result0, result1)); +} + /* Spilt a doubleword instrucion to two single word instructions. */ void nds32_spilt_doubleword (rtx *operands, bool load_p) @@ -2924,11 +3241,516 @@ nds32_spilt_doubleword (rtx *operands, bool load_p) } } +void +nds32_split_ashiftdi3 (rtx dst, rtx src, rtx shiftamount) +{ + rtx src_high_part, src_low_part; + rtx dst_high_part, dst_low_part; + + dst_high_part = nds32_di_high_part_subreg (dst); + dst_low_part = nds32_di_low_part_subreg (dst); + + src_high_part = nds32_di_high_part_subreg (src); + src_low_part = nds32_di_low_part_subreg (src); + + /* We need to handle shift more than 32 bit!!!! */ + if (CONST_INT_P (shiftamount)) + { + if (INTVAL (shiftamount) < 32) + { + rtx ext_start; + ext_start = gen_int_mode(32 - INTVAL (shiftamount), SImode); + + emit_insn (gen_wext (dst_high_part, src, ext_start)); + emit_insn (gen_ashlsi3 (dst_low_part, src_low_part, shiftamount)); + } + else + { + rtx new_shift_amout = gen_int_mode(INTVAL (shiftamount) - 32, SImode); + + emit_insn (gen_ashlsi3 (dst_high_part, src_low_part, + new_shift_amout)); + + emit_move_insn (dst_low_part, GEN_INT (0)); + } + } + else + { + rtx dst_low_part_l32, dst_high_part_l32; + rtx dst_low_part_g32, dst_high_part_g32; + rtx new_shift_amout, select_reg; + dst_low_part_l32 = gen_reg_rtx (SImode); + dst_high_part_l32 = gen_reg_rtx (SImode); + dst_low_part_g32 = gen_reg_rtx (SImode); + dst_high_part_g32 = gen_reg_rtx (SImode); + new_shift_amout = gen_reg_rtx (SImode); + select_reg = gen_reg_rtx (SImode); + + rtx ext_start; + ext_start = gen_reg_rtx (SImode); + + /* + if (shiftamount < 32) + dst_low_part = src_low_part << shiftamout + dst_high_part = wext (src, 32 - shiftamount) + # wext can't handle wext (src, 32) since it's only take rb[0:4] + # for extract. + dst_high_part = shiftamount == 0 ? src_high_part : dst_high_part + else + dst_low_part = 0 + dst_high_part = src_low_part << shiftamount & 0x1f + */ + + emit_insn (gen_subsi3 (ext_start, + gen_int_mode (32, SImode), + shiftamount)); + emit_insn (gen_wext (dst_high_part_l32, src, ext_start)); + + /* Handle for shiftamout == 0. */ + emit_insn (gen_cmovzsi (dst_high_part_l32, shiftamount, + src_high_part, dst_high_part_l32)); + + emit_insn (gen_ashlsi3 (dst_low_part_l32, src_low_part, shiftamount)); + + emit_move_insn (dst_low_part_g32, const0_rtx); + emit_insn (gen_andsi3 (new_shift_amout, shiftamount, GEN_INT (0x1f))); + emit_insn (gen_ashlsi3 (dst_high_part_g32, src_low_part, + new_shift_amout)); + + emit_insn (gen_slt_compare (select_reg, shiftamount, GEN_INT (32))); + + emit_insn (gen_cmovnsi (dst_low_part, select_reg, + dst_low_part_l32, dst_low_part_g32)); + emit_insn (gen_cmovnsi (dst_high_part, select_reg, + dst_high_part_l32, dst_high_part_g32)); + } +} + +void +nds32_split_ashiftrtdi3 (rtx dst, rtx src, rtx shiftamount) +{ + nds32_split_shiftrtdi3 (dst, src, shiftamount, false); +} + +void +nds32_split_lshiftrtdi3 (rtx dst, rtx src, rtx shiftamount) +{ + nds32_split_shiftrtdi3 (dst, src, shiftamount, true); +} + +void +nds32_split_rotatertdi3 (rtx dst, rtx src, rtx shiftamount) +{ + rtx dst_low_part_l32, dst_high_part_l32; + rtx dst_low_part_g32, dst_high_part_g32; + rtx select_reg, low5bit, low5bit_inv, minus32sa; + rtx dst_low_part_g32_tmph; + rtx dst_low_part_g32_tmpl; + rtx dst_high_part_l32_tmph; + rtx dst_high_part_l32_tmpl; + + rtx src_low_part, src_high_part; + rtx dst_high_part, dst_low_part; + + shiftamount = force_reg (SImode, shiftamount); + + emit_insn (gen_andsi3 (shiftamount, + shiftamount, + gen_int_mode (0x3f, SImode))); + + dst_high_part = nds32_di_high_part_subreg (dst); + dst_low_part = nds32_di_low_part_subreg (dst); + + src_high_part = nds32_di_high_part_subreg (src); + src_low_part = nds32_di_low_part_subreg (src); + + dst_low_part_l32 = gen_reg_rtx (SImode); + dst_high_part_l32 = gen_reg_rtx (SImode); + dst_low_part_g32 = gen_reg_rtx (SImode); + dst_high_part_g32 = gen_reg_rtx (SImode); + low5bit = gen_reg_rtx (SImode); + low5bit_inv = gen_reg_rtx (SImode); + minus32sa = gen_reg_rtx (SImode); + select_reg = gen_reg_rtx (SImode); + + dst_low_part_g32_tmph = gen_reg_rtx (SImode); + dst_low_part_g32_tmpl = gen_reg_rtx (SImode); + + dst_high_part_l32_tmph = gen_reg_rtx (SImode); + dst_high_part_l32_tmpl = gen_reg_rtx (SImode); + + emit_insn (gen_slt_compare (select_reg, shiftamount, GEN_INT (32))); + + /* if shiftamount < 32 + dst_low_part = wext(src, shiftamount) + else + dst_low_part = ((src_high_part >> (shiftamount & 0x1f)) + | (src_low_part << (32 - (shiftamount & 0x1f)))) + */ + emit_insn (gen_andsi3 (low5bit, shiftamount, gen_int_mode (0x1f, SImode))); + emit_insn (gen_subsi3 (low5bit_inv, gen_int_mode (32, SImode), low5bit)); + + emit_insn (gen_wext (dst_low_part_l32, src, shiftamount)); + + emit_insn (gen_lshrsi3 (dst_low_part_g32_tmpl, src_high_part, low5bit)); + emit_insn (gen_ashlsi3 (dst_low_part_g32_tmph, src_low_part, low5bit_inv)); + + emit_insn (gen_iorsi3 (dst_low_part_g32, + dst_low_part_g32_tmpl, + dst_low_part_g32_tmph)); + + emit_insn (gen_cmovnsi (dst_low_part, select_reg, + dst_low_part_l32, dst_low_part_g32)); + + /* if shiftamount < 32 + dst_high_part = ((src_high_part >> shiftamount) + | (src_low_part << (32 - shiftamount))) + dst_high_part = shiftamount == 0 ? src_high_part : dst_high_part + else + dst_high_part = wext(src, shiftamount & 0x1f) + */ + + emit_insn (gen_subsi3 (minus32sa, gen_int_mode (32, SImode), shiftamount)); + + emit_insn (gen_lshrsi3 (dst_high_part_l32_tmpl, src_high_part, shiftamount)); + emit_insn (gen_ashlsi3 (dst_high_part_l32_tmph, src_low_part, minus32sa)); + + emit_insn (gen_iorsi3 (dst_high_part_l32, + dst_high_part_l32_tmpl, + dst_high_part_l32_tmph)); + + emit_insn (gen_cmovzsi (dst_high_part_l32, shiftamount, + src_high_part, dst_high_part_l32)); + + emit_insn (gen_wext (dst_high_part_g32, src, low5bit)); + + emit_insn (gen_cmovnsi (dst_high_part, select_reg, + dst_high_part_l32, dst_high_part_g32)); +} + +/* Return true if OP contains a symbol reference. */ +bool +symbolic_reference_mentioned_p (rtx op) +{ + const char *fmt; + int i; + + if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF) + return true; + + fmt = GET_RTX_FORMAT (GET_CODE (op)); + for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--) + { + if (fmt[i] == 'E') + { + int j; + + for (j = XVECLEN (op, i) - 1; j >= 0; j--) + if (symbolic_reference_mentioned_p (XVECEXP (op, i, j))) + return true; + } + + else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i))) + return true; + } + + return false; +} + +/* Expand PIC code for @GOTOFF and @GOT. + + Example for @GOTOFF: + + la $r0, symbol@GOTOFF + -> sethi $ta, hi20(symbol@GOTOFF) + ori $ta, $ta, lo12(symbol@GOTOFF) + add $r0, $ta, $gp + + Example for @GOT: + + la $r0, symbol@GOT + -> sethi $ta, hi20(symbol@GOT) + ori $ta, $ta, lo12(symbol@GOT) + lw $r0, [$ta + $gp] +*/ +rtx +nds32_legitimize_pic_address (rtx x) +{ + rtx addr = x; + rtx reg = gen_reg_rtx (Pmode); + rtx pat; + + if (GET_CODE (x) == LABEL_REF + || (GET_CODE (x) == SYMBOL_REF + && (CONSTANT_POOL_ADDRESS_P (x) + || SYMBOL_REF_LOCAL_P (x)))) + { + addr = gen_rtx_UNSPEC (SImode, gen_rtvec (1, x), UNSPEC_GOTOFF); + addr = gen_rtx_CONST (SImode, addr); + emit_insn (gen_sethi (reg, addr)); + emit_insn (gen_lo_sum (reg, reg, addr)); + x = gen_rtx_PLUS (Pmode, reg, pic_offset_table_rtx); + } + else if (GET_CODE (x) == SYMBOL_REF) + { + addr = gen_rtx_UNSPEC (SImode, gen_rtvec (1, x), UNSPEC_GOT); + addr = gen_rtx_CONST (SImode, addr); + emit_insn (gen_sethi (reg, addr)); + emit_insn (gen_lo_sum (reg, reg, addr)); + + x = gen_const_mem (SImode, gen_rtx_PLUS (Pmode, pic_offset_table_rtx, + reg)); + } + else if (GET_CODE (x) == CONST) + { + /* We don't split constant in expand_pic_move because GOTOFF can combine + the addend with the symbol. */ + addr = XEXP (x, 0); + gcc_assert (GET_CODE (addr) == PLUS); + + rtx op0 = XEXP (addr, 0); + rtx op1 = XEXP (addr, 1); + + if ((GET_CODE (op0) == LABEL_REF + || (GET_CODE (op0) == SYMBOL_REF + && (CONSTANT_POOL_ADDRESS_P (op0) + || SYMBOL_REF_LOCAL_P (op0)))) + && GET_CODE (op1) == CONST_INT) + { + pat = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), UNSPEC_GOTOFF); + pat = gen_rtx_PLUS (Pmode, pat, op1); + pat = gen_rtx_CONST (Pmode, pat); + emit_insn (gen_sethi (reg, pat)); + emit_insn (gen_lo_sum (reg, reg, pat)); + x = gen_rtx_PLUS (Pmode, reg, pic_offset_table_rtx); + } + else if (GET_CODE (op0) == SYMBOL_REF + && GET_CODE (op1) == CONST_INT) + { + /* This is a constant offset from a @GOT symbol reference. */ + addr = gen_rtx_UNSPEC (SImode, gen_rtvec (1, op0), UNSPEC_GOT); + addr = gen_rtx_CONST (SImode, addr); + emit_insn (gen_sethi (reg, addr)); + emit_insn (gen_lo_sum (reg, reg, addr)); + addr = gen_const_mem (SImode, gen_rtx_PLUS (Pmode, + pic_offset_table_rtx, + reg)); + emit_move_insn (reg, addr); + if (satisfies_constraint_Is15 (op1)) + x = gen_rtx_PLUS (Pmode, reg, op1); + else + { + rtx tmp_reg = gen_reg_rtx (SImode); + emit_insn (gen_movsi (tmp_reg, op1)); + x = gen_rtx_PLUS (Pmode, reg, tmp_reg); + } + } + else + { + /* Don't handle this pattern. */ + debug_rtx (x); + gcc_unreachable (); + } + } + return x; +} + +void +nds32_expand_pic_move (rtx *operands) +{ + rtx src; + + src = nds32_legitimize_pic_address (operands[1]); + emit_move_insn (operands[0], src); +} + +/* Expand ICT symbol. + Example for @ICT and ICT model=large: + + la $r0, symbol@ICT + -> sethi $rt, hi20(symbol@ICT) + lwi $r0, [$rt + lo12(symbol@ICT)] + +*/ +rtx +nds32_legitimize_ict_address (rtx x) +{ + rtx symbol = x; + rtx addr = x; + rtx reg = gen_reg_rtx (Pmode); + gcc_assert (GET_CODE (x) == SYMBOL_REF + && nds32_indirect_call_referenced_p (x)); + + addr = gen_rtx_UNSPEC (SImode, gen_rtvec (1, symbol), UNSPEC_ICT); + addr = gen_rtx_CONST (SImode, addr); + emit_insn (gen_sethi (reg, addr)); + + x = gen_const_mem (SImode, gen_rtx_LO_SUM (Pmode, reg, addr)); + + return x; +} + +void +nds32_expand_ict_move (rtx *operands) +{ + rtx src = operands[1]; + + src = nds32_legitimize_ict_address (src); + + emit_move_insn (operands[0], src); +} + +/* Return true X is a indirect call symbol. */ +bool +nds32_indirect_call_referenced_p (rtx x) +{ + if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_ICT) + x = XVECEXP (x, 0, 0); + + if (GET_CODE (x) == SYMBOL_REF) + { + tree decl = SYMBOL_REF_DECL (x); + + return decl + && (lookup_attribute("indirect_call", + DECL_ATTRIBUTES(decl)) + != NULL); + } + + return false; +} + /* Return true X is need use long call. */ bool nds32_long_call_p (rtx symbol) { - return TARGET_CMODEL_LARGE; + if (nds32_indirect_call_referenced_p (symbol)) + return TARGET_ICT_MODEL_LARGE; + else + return TARGET_CMODEL_LARGE; +} + +/* Return true if X contains a thread-local symbol. */ +bool +nds32_tls_referenced_p (rtx x) +{ + if (!targetm.have_tls) + return false; + + if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS) + x = XEXP (XEXP (x, 0), 0); + + if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x)) + return true; + + return false; +} + +/* ADDR contains a thread-local SYMBOL_REF. Generate code to compute + this (thread-local) address. */ +rtx +nds32_legitimize_tls_address (rtx x) +{ + rtx tmp_reg; + rtx tp_reg = gen_rtx_REG (Pmode, TP_REGNUM); + rtx pat, insns, reg0; + + if (GET_CODE (x) == SYMBOL_REF) + switch (SYMBOL_REF_TLS_MODEL (x)) + { + case TLS_MODEL_GLOBAL_DYNAMIC: + case TLS_MODEL_LOCAL_DYNAMIC: + /* Emit UNSPEC_TLS_DESC rather than expand rtl directly because spill + may destroy the define-use chain anylysis to insert relax_hint. */ + if (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_GLOBAL_DYNAMIC) + pat = gen_rtx_UNSPEC (SImode, gen_rtvec (1, x), UNSPEC_TLSGD); + else + pat = gen_rtx_UNSPEC (SImode, gen_rtvec (1, x), UNSPEC_TLSLD); + + pat = gen_rtx_CONST (SImode, pat); + reg0 = gen_rtx_REG (Pmode, 0); + /* If we can confirm all clobber reigsters, it doesn't have to use call + instruction. */ + insns = emit_call_insn (gen_tls_desc (pat, GEN_INT (0))); + use_reg (&CALL_INSN_FUNCTION_USAGE (insns), pic_offset_table_rtx); + RTL_CONST_CALL_P (insns) = 1; + tmp_reg = gen_reg_rtx (SImode); + emit_move_insn (tmp_reg, reg0); + x = tmp_reg; + break; + + case TLS_MODEL_INITIAL_EXEC: + pat = gen_rtx_UNSPEC (SImode, gen_rtvec (1, x), UNSPEC_TLSIE); + tmp_reg = gen_reg_rtx (SImode); + pat = gen_rtx_CONST (SImode, pat); + emit_insn (gen_tls_ie (tmp_reg, pat, GEN_INT (0))); + if (flag_pic) + emit_use (pic_offset_table_rtx); + x = gen_rtx_PLUS (Pmode, tmp_reg, tp_reg); + break; + + case TLS_MODEL_LOCAL_EXEC: + /* Expand symbol_ref@TPOFF': + sethi $ta, hi20(symbol_ref@TPOFF) + ori $ta, $ta, lo12(symbol_ref@TPOFF) + add $r0, $ta, $tp */ + tmp_reg = gen_reg_rtx (SImode); + pat = gen_rtx_UNSPEC (SImode, gen_rtvec (1, x), UNSPEC_TLSLE); + pat = gen_rtx_CONST (SImode, pat); + emit_insn (gen_sethi (tmp_reg, pat)); + emit_insn (gen_lo_sum (tmp_reg, tmp_reg, pat)); + x = gen_rtx_PLUS (Pmode, tmp_reg, tp_reg); + break; + + default: + gcc_unreachable (); + } + else if (GET_CODE (x) == CONST) + { + rtx base, addend; + split_const (x, &base, &addend); + + if (SYMBOL_REF_TLS_MODEL (base) == TLS_MODEL_LOCAL_EXEC) + { + /* Expand symbol_ref@TPOFF': + sethi $ta, hi20(symbol_ref@TPOFF + addend) + ori $ta, $ta, lo12(symbol_ref@TPOFF + addend) + add $r0, $ta, $tp */ + tmp_reg = gen_reg_rtx (SImode); + pat = gen_rtx_UNSPEC (SImode, gen_rtvec (1, base), UNSPEC_TLSLE); + pat = gen_rtx_PLUS (SImode, pat, addend); + pat = gen_rtx_CONST (SImode, pat); + emit_insn (gen_sethi (tmp_reg, pat)); + emit_insn (gen_lo_sum (tmp_reg, tmp_reg, pat)); + x = gen_rtx_PLUS (Pmode, tmp_reg, tp_reg); + } + } + + return x; +} + +void +nds32_expand_tls_move (rtx *operands) +{ + rtx src = operands[1]; + rtx base, addend; + + if (CONSTANT_P (src)) + split_const (src, &base, &addend); + + if (SYMBOL_REF_TLS_MODEL (base) == TLS_MODEL_LOCAL_EXEC) + src = nds32_legitimize_tls_address (src); + else + { + src = nds32_legitimize_tls_address (base); + if (addend != const0_rtx) + { + src = gen_rtx_PLUS (SImode, src, addend); + src = force_operand (src, operands[0]); + } + } + + emit_move_insn (operands[0], src); } void @@ -2976,3 +3798,105 @@ nds32_expand_constant (machine_mode mode, HOST_WIDE_INT val, emit_move_insn (target, gen_rtx_fmt_ee (AND, mode, source, temp)); } } + +/* Auxiliary functions for lwm/smw. */ +bool +nds32_valid_smw_lwm_base_p (rtx op) +{ + rtx base_addr; + + if (!MEM_P (op)) + return false; + + base_addr = XEXP (op, 0); + + if (REG_P (base_addr)) + return true; + else + { + if (GET_CODE (base_addr) == POST_INC + && REG_P (XEXP (base_addr, 0))) + return true; + } + + return false; +} + +/* Auxiliary functions for manipulation DI mode. */ +rtx nds32_di_high_part_subreg(rtx reg) +{ + unsigned high_part_offset = subreg_highpart_offset (SImode, DImode); + + return simplify_gen_subreg ( + SImode, reg, + DImode, high_part_offset); +} + +rtx nds32_di_low_part_subreg(rtx reg) +{ + unsigned low_part_offset = subreg_lowpart_offset (SImode, DImode); + + return simplify_gen_subreg ( + SImode, reg, + DImode, low_part_offset); +} + +/* ------------------------------------------------------------------------ */ + +/* Auxiliary function for output TLS patterns. */ + +const char * +nds32_output_tls_desc (rtx *operands) +{ + char pattern[1000]; + + if (TARGET_RELAX_HINT) + snprintf (pattern, sizeof (pattern), + ".relax_hint %%1\n\tsethi $r0, hi20(%%0)\n\t" + ".relax_hint %%1\n\tori $r0, $r0, lo12(%%0)\n\t" + ".relax_hint %%1\n\tlw $r15, [$r0 + $gp]\n\t" + ".relax_hint %%1\n\tadd $r0, $r0, $gp\n\t" + ".relax_hint %%1\n\tjral $r15"); + else + snprintf (pattern, sizeof (pattern), + "sethi $r0, hi20(%%0)\n\t" + "ori $r0, $r0, lo12(%%0)\n\t" + "lw $r15, [$r0 + $gp]\n\t" + "add $r0, $r0, $gp\n\t" + "jral $r15"); + output_asm_insn (pattern, operands); + return ""; +} + +const char * +nds32_output_tls_ie (rtx *operands) +{ + char pattern[1000]; + + if (flag_pic) + { + if (TARGET_RELAX_HINT) + snprintf (pattern, sizeof (pattern), + ".relax_hint %%2\n\tsethi %%0, hi20(%%1)\n\t" + ".relax_hint %%2\n\tori %%0, %%0, lo12(%%1)\n\t" + ".relax_hint %%2\n\tlw %%0, [%%0 + $gp]"); + else + snprintf (pattern, sizeof (pattern), + "sethi %%0, hi20(%%1)\n\t" + "ori %%0, %%0, lo12(%%1)\n\t" + "lw %%0, [%%0 + $gp]"); + } + else + { + if (TARGET_RELAX_HINT) + snprintf (pattern, sizeof (pattern), + ".relax_hint %%2\n\tsethi %%0, hi20(%%1)\n\t" + ".relax_hint %%2\n\tlwi %%0, [%%0 + lo12(%%1)]"); + else + snprintf (pattern, sizeof (pattern), + "sethi %%0, hi20(%%1)\n\t" + "lwi %%0, [%%0 + lo12(%%1)]"); + } + output_asm_insn (pattern, operands); + return ""; +} diff --git a/gcc/config/nds32/nds32-memory-manipulation.c b/gcc/config/nds32/nds32-memory-manipulation.c index 8dea130..f6140e6 100644 --- a/gcc/config/nds32/nds32-memory-manipulation.c +++ b/gcc/config/nds32/nds32-memory-manipulation.c @@ -257,8 +257,124 @@ static bool nds32_expand_movmemsi_loop_known_size (rtx dstmem, rtx srcmem, rtx size, rtx alignment) { - return nds32_expand_movmemsi_loop_unknown_size (dstmem, srcmem, - size, alignment); + rtx dst_base_reg, src_base_reg; + rtx dst_itr, src_itr; + rtx dstmem_m, srcmem_m, dst_itr_m, src_itr_m; + rtx dst_end; + rtx double_word_mode_loop, byte_mode_loop; + rtx tmp; + int start_regno; + bool align_to_4_bytes = (INTVAL (alignment) & 3) == 0; + unsigned HOST_WIDE_INT total_bytes = UINTVAL (size); + + if (TARGET_ISA_V3M && !align_to_4_bytes) + return 0; + + if (TARGET_REDUCED_REGS) + start_regno = 2; + else + start_regno = 16; + + dst_itr = gen_reg_rtx (Pmode); + src_itr = gen_reg_rtx (Pmode); + dst_end = gen_reg_rtx (Pmode); + tmp = gen_reg_rtx (QImode); + + double_word_mode_loop = gen_label_rtx (); + byte_mode_loop = gen_label_rtx (); + + dst_base_reg = copy_to_mode_reg (Pmode, XEXP (dstmem, 0)); + src_base_reg = copy_to_mode_reg (Pmode, XEXP (srcmem, 0)); + + if (total_bytes < 8) + { + /* Emit total_bytes less than 8 loop version of movmem. + add $dst_end, $dst, $size + move $dst_itr, $dst + .Lbyte_mode_loop: + lbi.bi $tmp, [$src_itr], #1 + sbi.bi $tmp, [$dst_itr], #1 + ! Not readch upper bound. Loop. + bne $dst_itr, $dst_end, .Lbyte_mode_loop */ + + /* add $dst_end, $dst, $size */ + dst_end = expand_binop (Pmode, add_optab, dst_base_reg, size, + NULL_RTX, 0, OPTAB_WIDEN); + /* move $dst_itr, $dst + move $src_itr, $src */ + emit_move_insn (dst_itr, dst_base_reg); + emit_move_insn (src_itr, src_base_reg); + + /* .Lbyte_mode_loop: */ + emit_label (byte_mode_loop); + + /* lbi.bi $tmp, [$src_itr], #1 */ + nds32_emit_post_inc_load_store (tmp, src_itr, QImode, true); + + /* sbi.bi $tmp, [$dst_itr], #1 */ + nds32_emit_post_inc_load_store (tmp, dst_itr, QImode, false); + /* ! Not readch upper bound. Loop. + bne $dst_itr, $dst_end, .Lbyte_mode_loop */ + emit_cmp_and_jump_insns (dst_itr, dst_end, NE, NULL, + SImode, 1, byte_mode_loop); + return true; + } + else if (total_bytes % 8 == 0) + { + /* Emit multiple of 8 loop version of movmem. + + add $dst_end, $dst, $size + move $dst_itr, $dst + move $src_itr, $src + + .Ldouble_word_mode_loop: + lmw.bim $tmp-begin, [$src_itr], $tmp-end, #0 ! $src_itr' = $src_itr + smw.bim $tmp-begin, [$dst_itr], $tmp-end, #0 ! $dst_itr' = $dst_itr + ! move will delete after register allocation + move $src_itr, $src_itr' + move $dst_itr, $dst_itr' + ! Not readch upper bound. Loop. + bne $double_word_end, $dst_itr, .Ldouble_word_mode_loop */ + + /* add $dst_end, $dst, $size */ + dst_end = expand_binop (Pmode, add_optab, dst_base_reg, size, + NULL_RTX, 0, OPTAB_WIDEN); + + /* move $dst_itr, $dst + move $src_itr, $src */ + emit_move_insn (dst_itr, dst_base_reg); + emit_move_insn (src_itr, src_base_reg); + + /* .Ldouble_word_mode_loop: */ + emit_label (double_word_mode_loop); + /* lmw.bim $tmp-begin, [$src_itr], $tmp-end, #0 ! $src_itr' = $src_itr + smw.bim $tmp-begin, [$dst_itr], $tmp-end, #0 ! $dst_itr' = $dst_itr */ + src_itr_m = src_itr; + dst_itr_m = dst_itr; + srcmem_m = srcmem; + dstmem_m = dstmem; + nds32_emit_mem_move_block (start_regno, 2, + &dst_itr_m, &dstmem_m, + &src_itr_m, &srcmem_m, + true); + /* move $src_itr, $src_itr' + move $dst_itr, $dst_itr' */ + emit_move_insn (dst_itr, dst_itr_m); + emit_move_insn (src_itr, src_itr_m); + + /* ! Not readch upper bound. Loop. + bne $double_word_end, $dst_itr, .Ldouble_word_mode_loop */ + emit_cmp_and_jump_insns (dst_end, dst_itr, NE, NULL, + Pmode, 1, double_word_mode_loop); + } + else + { + /* Handle size greater than 8, and not a multiple of 8. */ + return nds32_expand_movmemsi_loop_unknown_size (dstmem, srcmem, + size, alignment); + } + + return true; } static bool @@ -433,10 +549,8 @@ nds32_expand_movmemsi (rtx dstmem, rtx srcmem, rtx total_bytes, rtx alignment) /* Auxiliary function for expand setmem pattern. */ static rtx -nds32_gen_dup_4_byte_to_word_value (rtx value) +nds32_gen_dup_4_byte_to_word_value_aux (rtx value, rtx value4word) { - rtx value4word = gen_reg_rtx (SImode); - gcc_assert (GET_MODE (value) == QImode || CONST_INT_P (value)); if (CONST_INT_P (value)) @@ -449,36 +563,74 @@ nds32_gen_dup_4_byte_to_word_value (rtx value) } else { - /* ! prepare word - andi $tmp1, $value, 0xff ! $tmp1 <- 0x000000ab - slli $tmp2, $tmp1, 8 ! $tmp2 <- 0x0000ab00 - or $tmp3, $tmp1, $tmp2 ! $tmp3 <- 0x0000abab - slli $tmp4, $tmp3, 16 ! $tmp4 <- 0xabab0000 - or $val4word, $tmp3, $tmp4 ! $value4word <- 0xabababab */ - - rtx tmp1, tmp2, tmp3, tmp4, final_value; - tmp1 = expand_binop (SImode, and_optab, value, - gen_int_mode (0xff, SImode), - NULL_RTX, 0, OPTAB_WIDEN); - tmp2 = expand_binop (SImode, ashl_optab, tmp1, - gen_int_mode (8, SImode), - NULL_RTX, 0, OPTAB_WIDEN); - tmp3 = expand_binop (SImode, ior_optab, tmp1, tmp2, - NULL_RTX, 0, OPTAB_WIDEN); - tmp4 = expand_binop (SImode, ashl_optab, tmp3, - gen_int_mode (16, SImode), - NULL_RTX, 0, OPTAB_WIDEN); - - final_value = expand_binop (SImode, ior_optab, tmp3, tmp4, - NULL_RTX, 0, OPTAB_WIDEN); - emit_move_insn (value4word, final_value); + if (NDS32_EXT_DSP_P ()) + { + /* ! prepare word + insb $tmp, $value, 1 ! $tmp <- 0x0000abab + pkbb16 $tmp6, $tmp2, $tmp2 ! $value4word <- 0xabababab */ + rtx tmp = gen_reg_rtx (SImode); + + convert_move (tmp, value, true); + + emit_insn ( + gen_insvsi_internal (tmp, gen_int_mode (0x8, SImode), tmp)); + + emit_insn (gen_pkbbsi_1 (value4word, tmp, tmp)); + } + else + { + /* ! prepare word + andi $tmp1, $value, 0xff ! $tmp1 <- 0x000000ab + slli $tmp2, $tmp1, 8 ! $tmp2 <- 0x0000ab00 + or $tmp3, $tmp1, $tmp2 ! $tmp3 <- 0x0000abab + slli $tmp4, $tmp3, 16 ! $tmp4 <- 0xabab0000 + or $val4word, $tmp3, $tmp4 ! $value4word <- 0xabababab */ + + rtx tmp1, tmp2, tmp3, tmp4; + tmp1 = expand_binop (SImode, and_optab, value, + gen_int_mode (0xff, SImode), + NULL_RTX, 0, OPTAB_WIDEN); + tmp2 = expand_binop (SImode, ashl_optab, tmp1, + gen_int_mode (8, SImode), + NULL_RTX, 0, OPTAB_WIDEN); + tmp3 = expand_binop (SImode, ior_optab, tmp1, tmp2, + NULL_RTX, 0, OPTAB_WIDEN); + tmp4 = expand_binop (SImode, ashl_optab, tmp3, + gen_int_mode (16, SImode), + NULL_RTX, 0, OPTAB_WIDEN); + + emit_insn (gen_iorsi3 (value4word, tmp3, tmp4)); + } } return value4word; } static rtx -emit_setmem_word_loop (rtx itr, rtx size, rtx value) +nds32_gen_dup_4_byte_to_word_value (rtx value) +{ + rtx value4word = gen_reg_rtx (SImode); + nds32_gen_dup_4_byte_to_word_value_aux (value, value4word); + + return value4word; +} + +static rtx +nds32_gen_dup_8_byte_to_double_word_value (rtx value) +{ + rtx value4doubleword = gen_reg_rtx (DImode); + + nds32_gen_dup_4_byte_to_word_value_aux ( + value, nds32_di_low_part_subreg(value4doubleword)); + + emit_move_insn (nds32_di_high_part_subreg(value4doubleword), + nds32_di_low_part_subreg(value4doubleword)); + return value4doubleword; +} + + +static rtx +emit_setmem_doubleword_loop (rtx itr, rtx size, rtx value) { rtx word_mode_label = gen_label_rtx (); rtx word_mode_end_label = gen_label_rtx (); @@ -487,9 +639,9 @@ emit_setmem_word_loop (rtx itr, rtx size, rtx value) rtx word_mode_end = gen_reg_rtx (SImode); rtx size_for_word = gen_reg_rtx (SImode); - /* and $size_for_word, $size, #~3 */ + /* and $size_for_word, $size, #~0x7 */ size_for_word = expand_binop (SImode, and_optab, size, - gen_int_mode (~3, SImode), + gen_int_mode (~0x7, SImode), NULL_RTX, 0, OPTAB_WIDEN); emit_move_insn (byte_mode_size, size); @@ -501,8 +653,8 @@ emit_setmem_word_loop (rtx itr, rtx size, rtx value) word_mode_end = expand_binop (Pmode, add_optab, itr, size_for_word, NULL_RTX, 0, OPTAB_WIDEN); - /* andi $byte_mode_size, $size, 3 */ - byte_mode_size_tmp = expand_binop (SImode, and_optab, size, GEN_INT (3), + /* andi $byte_mode_size, $size, 0x7 */ + byte_mode_size_tmp = expand_binop (SImode, and_optab, size, GEN_INT (0x7), NULL_RTX, 0, OPTAB_WIDEN); emit_move_insn (byte_mode_size, byte_mode_size_tmp); @@ -512,9 +664,9 @@ emit_setmem_word_loop (rtx itr, rtx size, rtx value) /* ! word-mode set loop smw.bim $value4word, [$dst_itr], $value4word, 0 bne $word_mode_end, $dst_itr, .Lword_mode */ - emit_insn (gen_unaligned_store_update_base_w (itr, - itr, - value)); + emit_insn (gen_unaligned_store_update_base_dw (itr, + itr, + value)); emit_cmp_and_jump_insns (word_mode_end, itr, NE, NULL, Pmode, 1, word_mode_label); @@ -566,7 +718,7 @@ emit_setmem_byte_loop (rtx itr, rtx size, rtx value, bool need_end) static bool nds32_expand_setmem_loop (rtx dstmem, rtx size, rtx value) { - rtx value4word; + rtx value4doubleword; rtx value4byte; rtx dst; rtx byte_mode_size; @@ -609,7 +761,7 @@ nds32_expand_setmem_loop (rtx dstmem, rtx size, rtx value) or $tmp3, $tmp1, $tmp2 ! $tmp3 <- 0x0000abab slli $tmp4, $tmp3, 16 ! $tmp4 <- 0xabab0000 or $val4word, $tmp3, $tmp4 ! $value4word <- 0xabababab */ - value4word = nds32_gen_dup_4_byte_to_word_value (value); + value4doubleword = nds32_gen_dup_8_byte_to_double_word_value (value); /* and $size_for_word, $size, #-4 beqz $size_for_word, .Lword_mode_end @@ -622,7 +774,7 @@ nds32_expand_setmem_loop (rtx dstmem, rtx size, rtx value) smw.bim $value4word, [$dst], $value4word, 0 bne $word_mode_end, $dst, .Lword_mode .Lword_mode_end: */ - byte_mode_size = emit_setmem_word_loop (dst, size, value4word); + byte_mode_size = emit_setmem_doubleword_loop (dst, size, value4doubleword); /* beqz $byte_mode_size, .Lend add $byte_mode_end, $dst, $byte_mode_size @@ -633,8 +785,8 @@ nds32_expand_setmem_loop (rtx dstmem, rtx size, rtx value) bne $byte_mode_end, $dst, .Lbyte_mode .Lend: */ - value4byte = simplify_gen_subreg (QImode, value4word, SImode, - subreg_lowpart_offset (QImode, SImode)); + value4byte = simplify_gen_subreg (QImode, value4doubleword, DImode, + subreg_lowpart_offset (QImode, DImode)); emit_setmem_byte_loop (dst, byte_mode_size, value4byte, false); @@ -651,14 +803,15 @@ nds32_expand_setmem_loop_v3m (rtx dstmem, rtx size, rtx value) rtx byte_loop_size = gen_reg_rtx (SImode); rtx remain_size = gen_reg_rtx (SImode); rtx new_base_reg; - rtx value4byte, value4word; + rtx value4byte, value4doubleword; rtx byte_mode_size; rtx last_byte_loop_label = gen_label_rtx (); size = force_reg (SImode, size); - value4word = nds32_gen_dup_4_byte_to_word_value (value); - value4byte = simplify_gen_subreg (QImode, value4word, SImode, 0); + value4doubleword = nds32_gen_dup_8_byte_to_double_word_value (value); + value4byte = simplify_gen_subreg (QImode, value4doubleword, DImode, + subreg_lowpart_offset (QImode, DImode)); emit_move_insn (byte_loop_size, size); emit_move_insn (byte_loop_base, base_reg); @@ -686,9 +839,9 @@ nds32_expand_setmem_loop_v3m (rtx dstmem, rtx size, rtx value) emit_insn (gen_subsi3 (remain_size, size, need_align_bytes)); /* Set memory word by word. */ - byte_mode_size = emit_setmem_word_loop (new_base_reg, - remain_size, - value4word); + byte_mode_size = emit_setmem_doubleword_loop (new_base_reg, + remain_size, + value4doubleword); emit_move_insn (byte_loop_base, new_base_reg); emit_move_insn (byte_loop_size, byte_mode_size); diff --git a/gcc/config/nds32/nds32-multiple.md b/gcc/config/nds32/nds32-multiple.md index a8f7717..80746b1 100644 --- a/gcc/config/nds32/nds32-multiple.md +++ b/gcc/config/nds32/nds32-multiple.md @@ -2854,6 +2854,25 @@ (set_attr "length" "4")] ) +(define_expand "unaligned_store_update_base_dw" + [(parallel [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI (match_operand:SI 1 "register_operand" "0") (const_int 8))) + (set (mem:DI (match_dup 1)) + (unspec:DI [(match_operand:DI 2 "register_operand" "r")] UNSPEC_UASTORE_DW))])] + "" +{ + /* DO NOT emit unaligned_store_w_m immediately since web pass don't + recognize post_inc, try it again after GCC 5.0. + REF: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63156 */ + emit_insn (gen_unaligned_store_dw (gen_rtx_MEM (DImode, operands[1]), operands[2])); + emit_insn (gen_addsi3 (operands[0], operands[1], gen_int_mode (8, Pmode))); + DONE; +} + [(set_attr "type" "store_multiple") + (set_attr "combo" "2") + (set_attr "length" "4")] +) + (define_insn "*stmsi25" [(match_parallel 0 "nds32_store_multiple_operation" [(set (mem:SI (match_operand:SI 1 "register_operand" "r")) diff --git a/gcc/config/nds32/nds32-n10.md b/gcc/config/nds32/nds32-n10.md new file mode 100644 index 0000000..0dd76da --- /dev/null +++ b/gcc/config/nds32/nds32-n10.md @@ -0,0 +1,439 @@ +;; Pipeline descriptions of Andes NDS32 cpu for GNU compiler +;; Copyright (C) 2012-2018 Free Software Foundation, Inc. +;; Contributed by Andes Technology Corporation. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. +;; +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + + +;; ------------------------------------------------------------------------ +;; Define N10 pipeline settings. +;; ------------------------------------------------------------------------ + +(define_automaton "nds32_n10_machine") + +;; ------------------------------------------------------------------------ +;; Pipeline Stages +;; ------------------------------------------------------------------------ +;; IF - Instruction Fetch +;; II - Instruction Issue / Instruction Decode +;; EX - Instruction Execution +;; MM - Memory Execution +;; WB - Instruction Retire / Result Write-Back + +(define_cpu_unit "n10_ii" "nds32_n10_machine") +(define_cpu_unit "n10_ex" "nds32_n10_machine") +(define_cpu_unit "n10_mm" "nds32_n10_machine") +(define_cpu_unit "n10_wb" "nds32_n10_machine") +(define_cpu_unit "n10f_iq" "nds32_n10_machine") +(define_cpu_unit "n10f_rf" "nds32_n10_machine") +(define_cpu_unit "n10f_e1" "nds32_n10_machine") +(define_cpu_unit "n10f_e2" "nds32_n10_machine") +(define_cpu_unit "n10f_e3" "nds32_n10_machine") +(define_cpu_unit "n10f_e4" "nds32_n10_machine") + +(define_insn_reservation "nds_n10_unknown" 1 + (and (eq_attr "type" "unknown") + (eq_attr "pipeline_model" "n10")) + "n10_ii, n10_ex, n10_mm, n10_wb") + +(define_insn_reservation "nds_n10_misc" 1 + (and (eq_attr "type" "misc") + (eq_attr "pipeline_model" "n10")) + "n10_ii, n10_ex, n10_mm, n10_wb") + +(define_insn_reservation "nds_n10_mmu" 1 + (and (eq_attr "type" "mmu") + (eq_attr "pipeline_model" "n10")) + "n10_ii, n10_ex, n10_mm, n10_wb") + +(define_insn_reservation "nds_n10_alu" 1 + (and (eq_attr "type" "alu") + (eq_attr "pipeline_model" "n10")) + "n10_ii, n10_ex, n10_mm, n10_wb") + +(define_insn_reservation "nds_n10_alu_shift" 1 + (and (eq_attr "type" "alu_shift") + (eq_attr "pipeline_model" "n10")) + "n10_ii, n10_ii+n10_ex, n10_ex+n10_mm, n10_mm+n10_wb, n10_wb") + +(define_insn_reservation "nds_n10_pbsad" 1 + (and (eq_attr "type" "pbsad") + (eq_attr "pipeline_model" "n10")) + "n10_ii, n10_ex*3, n10_mm, n10_wb") + +(define_insn_reservation "nds_n10_pbsada" 1 + (and (eq_attr "type" "pbsada") + (eq_attr "pipeline_model" "n10")) + "n10_ii, n10_ex*3, n10_mm, n10_wb") + +(define_insn_reservation "nds_n10_load" 1 + (and (match_test "nds32::load_single_p (insn)") + (eq_attr "pipeline_model" "n10")) + "n10_ii, n10_ex, n10_mm, n10_wb") + +(define_insn_reservation "nds_n10_store" 1 + (and (match_test "nds32::store_single_p (insn)") + (eq_attr "pipeline_model" "n10")) + "n10_ii, n10_ex, n10_mm, n10_wb") + +(define_insn_reservation "nds_n10_load_multiple_1" 1 + (and (eq_attr "pipeline_model" "n10") + (and (eq_attr "type" "load_multiple") + (eq_attr "combo" "1"))) + "n10_ii, n10_ex, n10_mm, n10_wb") + +(define_insn_reservation "nds_n10_load_multiple_2" 1 + (and (eq_attr "pipeline_model" "n10") + (ior (and (eq_attr "type" "load_multiple") + (eq_attr "combo" "2")) + (match_test "nds32::load_double_p (insn)"))) + "n10_ii, n10_ii+n10_ex, n10_ex+n10_mm, n10_mm+n10_wb, n10_wb") + +(define_insn_reservation "nds_n10_load_multiple_3" 1 + (and (eq_attr "pipeline_model" "n10") + (and (eq_attr "type" "load_multiple") + (eq_attr "combo" "3"))) + "n10_ii, n10_ii+n10_ex, n10_ii+n10_ex+n10_mm, n10_ex+n10_mm+n10_wb, n10_mm+n10_wb, n10_wb") + +(define_insn_reservation "nds_n10_load_multiple_4" 1 + (and (eq_attr "pipeline_model" "n10") + (and (eq_attr "type" "load_multiple") + (eq_attr "combo" "4"))) + "n10_ii, n10_ii+n10_ex, n10_ii+n10_ex+n10_mm, n10_ii+n10_ex+n10_mm+n10_wb, n10_ex+n10_mm+n10_wb, n10_mm+n10_wb, n10_wb") + +(define_insn_reservation "nds_n10_load_multiple_5" 1 + (and (eq_attr "pipeline_model" "n10") + (and (eq_attr "type" "load_multiple") + (eq_attr "combo" "5"))) + "n10_ii, n10_ii+n10_ex, n10_ii+n10_ex+n10_mm, (n10_ii+n10_ex+n10_mm+n10_wb)*2, n10_ex+n10_mm+n10_wb, n10_mm+n10_wb, n10_wb") + +(define_insn_reservation "nds_n10_load_multiple_6" 1 + (and (eq_attr "pipeline_model" "n10") + (and (eq_attr "type" "load_multiple") + (eq_attr "combo" "6"))) + "n10_ii, n10_ii+n10_ex, n10_ii+n10_ex+n10_mm, (n10_ii+n10_ex+n10_mm+n10_wb)*3, n10_ex+n10_mm+n10_wb, n10_mm+n10_wb, n10_wb") + +(define_insn_reservation "nds_n10_load_multiple_7" 1 + (and (eq_attr "pipeline_model" "n10") + (and (eq_attr "type" "load_multiple") + (eq_attr "combo" "7"))) + "n10_ii, n10_ii+n10_ex, n10_ii+n10_ex+n10_mm, (n10_ii+n10_ex+n10_mm+n10_wb)*4, n10_ex+n10_mm+n10_wb, n10_mm+n10_wb, n10_wb") + +(define_insn_reservation "nds_n10_load_multiple_N" 1 + (and (eq_attr "pipeline_model" "n10") + (and (eq_attr "type" "load_multiple") + (match_test "get_attr_combo (insn) >= 8"))) + "n10_ii, n10_ii+n10_ex, n10_ii+n10_ex+n10_mm, (n10_ii+n10_ex+n10_mm+n10_wb)*5, n10_ex+n10_mm+n10_wb, n10_mm+n10_wb, n10_wb") + +(define_insn_reservation "nds_n10_store_multiple_1" 1 + (and (eq_attr "pipeline_model" "n10") + (and (eq_attr "type" "store_multiple") + (eq_attr "combo" "1"))) + "n10_ii, n10_ex, n10_mm, n10_wb") + +(define_insn_reservation "nds_n10_store_multiple_2" 1 + (and (eq_attr "pipeline_model" "n10") + (ior (and (eq_attr "type" "store_multiple") + (eq_attr "combo" "2")) + (match_test "nds32::store_double_p (insn)"))) + "n10_ii, n10_ii+n10_ex, n10_ex+n10_mm, n10_mm+n10_wb, n10_wb") + +(define_insn_reservation "nds_n10_store_multiple_3" 1 + (and (eq_attr "pipeline_model" "n10") + (and (eq_attr "type" "store_multiple") + (eq_attr "combo" "3"))) + "n10_ii, n10_ii+n10_ex, n10_ii+n10_ex+n10_mm, n10_ex+n10_mm+n10_wb, n10_mm+n10_wb, n10_wb") + +(define_insn_reservation "nds_n10_store_multiple_4" 1 + (and (eq_attr "pipeline_model" "n10") + (and (eq_attr "type" "store_multiple") + (eq_attr "combo" "4"))) + "n10_ii, n10_ii+n10_ex, n10_ii+n10_ex+n10_mm, n10_ii+n10_ex+n10_mm+n10_wb, n10_ex+n10_mm+n10_wb, n10_mm+n10_wb, n10_wb") + +(define_insn_reservation "nds_n10_store_multiple_5" 1 + (and (eq_attr "pipeline_model" "n10") + (and (eq_attr "type" "store_multiple") + (eq_attr "combo" "5"))) + "n10_ii, n10_ii+n10_ex, n10_ii+n10_ex+n10_mm, (n10_ii+n10_ex+n10_mm+n10_wb)*2, n10_ex+n10_mm+n10_wb, n10_mm+n10_wb, n10_wb") + +(define_insn_reservation "nds_n10_store_multiple_6" 1 + (and (eq_attr "pipeline_model" "n10") + (and (eq_attr "type" "store_multiple") + (eq_attr "combo" "6"))) + "n10_ii, n10_ii+n10_ex, n10_ii+n10_ex+n10_mm, (n10_ii+n10_ex+n10_mm+n10_wb)*3, n10_ex+n10_mm+n10_wb, n10_mm+n10_wb, n10_wb") + +(define_insn_reservation "nds_n10_store_multiple_7" 1 + (and (eq_attr "pipeline_model" "n10") + (and (eq_attr "type" "store_multiple") + (eq_attr "combo" "7"))) + "n10_ii, n10_ii+n10_ex, n10_ii+n10_ex+n10_mm, (n10_ii+n10_ex+n10_mm+n10_wb)*4, n10_ex+n10_mm+n10_wb, n10_mm+n10_wb, n10_wb") + +(define_insn_reservation "nds_n10_store_multiple_N" 1 + (and (eq_attr "pipeline_model" "n10") + (and (eq_attr "type" "store_multiple") + (match_test "get_attr_combo (insn) >= 8"))) + "n10_ii, n10_ii+n10_ex, n10_ii+n10_ex+n10_mm, (n10_ii+n10_ex+n10_mm+n10_wb)*5, n10_ex+n10_mm+n10_wb, n10_mm+n10_wb, n10_wb") + +(define_insn_reservation "nds_n10_mul" 1 + (and (eq_attr "type" "mul") + (eq_attr "pipeline_model" "n10")) + "n10_ii, n10_ex, n10_mm, n10_wb") + +(define_insn_reservation "nds_n10_mac" 1 + (and (eq_attr "type" "mac") + (eq_attr "pipeline_model" "n10")) + "n10_ii, n10_ex, n10_mm, n10_wb") + +(define_insn_reservation "nds_n10_div" 1 + (and (eq_attr "type" "div") + (eq_attr "pipeline_model" "n10")) + "n10_ii, n10_ex*34, n10_mm, n10_wb") + +(define_insn_reservation "nds_n10_branch" 1 + (and (eq_attr "type" "branch") + (eq_attr "pipeline_model" "n10")) + "n10_ii, n10_ex, n10_mm, n10_wb") + +(define_insn_reservation "nds_n10_dsp_alu" 1 + (and (eq_attr "type" "dalu") + (eq_attr "pipeline_model" "n10")) + "n10_ii, n10_ex, n10_mm, n10_wb") + +(define_insn_reservation "nds_n10_dsp_alu64" 1 + (and (eq_attr "type" "dalu64") + (eq_attr "pipeline_model" "n10")) + "n10_ii, n10_ex, n10_mm, n10_wb") + +(define_insn_reservation "nds_n10_dsp_alu_round" 1 + (and (eq_attr "type" "daluround") + (eq_attr "pipeline_model" "n10")) + "n10_ii, n10_ex, n10_mm, n10_wb") + +(define_insn_reservation "nds_n10_dsp_cmp" 1 + (and (eq_attr "type" "dcmp") + (eq_attr "pipeline_model" "n10")) + "n10_ii, n10_ex, n10_mm, n10_wb") + +(define_insn_reservation "nds_n10_dsp_clip" 1 + (and (eq_attr "type" "dclip") + (eq_attr "pipeline_model" "n10")) + "n10_ii, n10_ex, n10_mm, n10_wb") + +(define_insn_reservation "nds_n10_dsp_mul" 1 + (and (eq_attr "type" "dmul") + (eq_attr "pipeline_model" "n10")) + "n10_ii, n10_ex, n10_mm, n10_wb") + +(define_insn_reservation "nds_n10_dsp_mac" 1 + (and (eq_attr "type" "dmac") + (eq_attr "pipeline_model" "n10")) + "n10_ii, n10_ex, n10_mm, n10_wb") + +(define_insn_reservation "nds_n10_dsp_insb" 1 + (and (eq_attr "type" "dinsb") + (eq_attr "pipeline_model" "n10")) + "n10_ii, n10_ex, n10_mm, n10_wb") + +(define_insn_reservation "nds_n10_dsp_pack" 1 + (and (eq_attr "type" "dpack") + (eq_attr "pipeline_model" "n10")) + "n10_ii, n10_ex, n10_mm, n10_wb") + +(define_insn_reservation "nds_n10_dsp_bpick" 1 + (and (eq_attr "type" "dbpick") + (eq_attr "pipeline_model" "n10")) + "n10_ii, n10_ex, n10_mm, n10_wb") + +(define_insn_reservation "nds_n10_dsp_wext" 1 + (and (eq_attr "type" "dwext") + (eq_attr "pipeline_model" "n10")) + "n10_ii, n10_ex, n10_mm, n10_wb") + +(define_insn_reservation "nds_n10_fpu_alu" 4 + (and (eq_attr "type" "falu") + (eq_attr "pipeline_model" "n10")) + "n10_ii, n10f_iq, n10f_rf, n10f_e1, n10f_e2, n10f_e3, n10f_e4") + +(define_insn_reservation "nds_n10_fpu_muls" 4 + (and (eq_attr "type" "fmuls") + (eq_attr "pipeline_model" "n10")) + "n10_ii, n10f_iq, n10f_rf, n10f_e1, n10f_e2, n10f_e3, n10f_e4") + +(define_insn_reservation "nds_n10_fpu_muld" 4 + (and (eq_attr "type" "fmuld") + (eq_attr "pipeline_model" "n10")) + "n10_ii, n10f_iq, n10f_rf, n10f_e1, n10f_e2*2, n10f_e3, n10f_e4") + +(define_insn_reservation "nds_n10_fpu_macs" 4 + (and (eq_attr "type" "fmacs") + (eq_attr "pipeline_model" "n10")) + "n10_ii, n10f_iq, n10f_rf, n10f_e1, n10f_e2*3, n10f_e3, n10f_e4") + +(define_insn_reservation "nds_n10_fpu_macd" 4 + (and (eq_attr "type" "fmacd") + (eq_attr "pipeline_model" "n10")) + "n10_ii, n10f_iq, n10f_rf, n10f_e1, n10f_e2*4, n10f_e3, n10f_e4") + +(define_insn_reservation "nds_n10_fpu_divs" 4 + (and (ior (eq_attr "type" "fdivs") + (eq_attr "type" "fsqrts")) + (eq_attr "pipeline_model" "n10")) + "n10_ii, n10f_iq, n10f_rf, n10f_e1, n10f_e2*14, n10f_e3, n10f_e4") + +(define_insn_reservation "nds_n10_fpu_divd" 4 + (and (ior (eq_attr "type" "fdivd") + (eq_attr "type" "fsqrtd")) + (eq_attr "pipeline_model" "n10")) + "n10_ii, n10f_iq, n10f_rf, n10f_e1, n10f_e2*28, n10f_e3, n10f_e4") + +(define_insn_reservation "nds_n10_fpu_fast_alu" 2 + (and (ior (eq_attr "type" "fcmp") + (ior (eq_attr "type" "fabs") + (ior (eq_attr "type" "fcpy") + (eq_attr "type" "fcmov")))) + (eq_attr "pipeline_model" "n10")) + "n10_ii, n10f_iq, n10f_rf, n10f_e1, n10f_e2, n10f_e3, n10f_e4") + +(define_insn_reservation "nds_n10_fpu_fmtsr" 4 + (and (eq_attr "type" "fmtsr") + (eq_attr "pipeline_model" "n10")) + "n10_ii, n10f_iq, n10f_rf, n10f_e1, n10f_e2, n10f_e3, n10f_e4") + +(define_insn_reservation "nds_n10_fpu_fmtdr" 4 + (and (eq_attr "type" "fmtdr") + (eq_attr "pipeline_model" "n10")) + "n10_ii, n10_ii+n10f_iq, n10f_iq+n10f_rf, n10f_rf+n10f_e1, n10f_e1+n10f_e2, n10f_e2+n10f_e3, n10f_e3+n10f_e4, n10f_e4") + +(define_insn_reservation "nds_n10_fpu_fmfsr" 2 + (and (eq_attr "type" "fmfsr") + (eq_attr "pipeline_model" "n10")) + "n10_ii, n10f_iq, n10f_rf, n10f_e1, n10f_e2, n10f_e3, n10f_e4") + +(define_insn_reservation "nds_n10_fpu_fmfdr" 2 + (and (eq_attr "type" "fmfdr") + (eq_attr "pipeline_model" "n10")) + "n10_ii, n10_ii+n10f_iq, n10f_iq+n10f_rf, n10f_rf+n10f_e1, n10f_e1+n10f_e2, n10f_e2+n10f_e3, n10f_e3+n10f_e4, n10f_e4") + +(define_insn_reservation "nds_n10_fpu_load" 3 + (and (eq_attr "type" "fload") + (eq_attr "pipeline_model" "n10")) + "n10_ii, n10f_iq, n10f_rf, n10f_e1, n10f_e2, n10f_e3, n10f_e4") + +(define_insn_reservation "nds_n10_fpu_store" 1 + (and (eq_attr "type" "fstore") + (eq_attr "pipeline_model" "n10")) + "n10_ii, n10f_iq, n10f_rf, n10f_e1, n10f_e2, n10f_e3, n10f_e4") + +;; ------------------------------------------------------------------------ +;; Comment Notations and Bypass Rules +;; ------------------------------------------------------------------------ +;; Producers (LHS) +;; LD +;; Load data from the memory and produce the loaded data. The result is +;; ready at MM. +;; LMW(N, M) +;; There are N micro-operations within an instruction that loads multiple +;; words. The result produced by the M-th micro-operation is sent to +;; consumers. The result is ready at MM. +;; MUL, MAC +;; Compute data in the multiply-adder and produce the data. The result +;; is ready at MM. +;; DIV +;; Compute data in the divider and produce the data. The result is ready +;; at MM. +;; +;; Consumers (RHS) +;; ALU, MOVD44, PBSAD, PBSADA_RaRb, MUL, MAC, DIV, MMU +;; Require operands at EX. +;; ALU_SHIFT_Rb +;; An ALU-SHIFT instruction consists of a shift micro-operation followed +;; by an arithmetic micro-operation. The operand Rb is used by the first +;; micro-operation, and there are some latencies if data dependency occurs. +;; MAC_RaRb +;; A MAC instruction does multiplication at EX and does accumulation at MM, +;; so the operand Rt is required at MM, and operands Ra and Rb are required +;; at EX. +;; ADDR_IN +;; If an instruction requires an address as its input operand, the address +;; is required at EX. +;; ST +;; A store instruction requires its data at MM. +;; SMW(N, M) +;; There are N micro-operations within an instruction that stores multiple +;; words. Each M-th micro-operation requires its data at MM. +;; BR +;; If a branch instruction is conditional, its input data is required at EX. + +;; FPU_ADDR_OUT -> FPU_ADDR_IN +;; Main pipeline rules don't need this because those default latency is 1. +(define_bypass 1 + "nds_n10_fpu_load, nds_n10_fpu_store" + "nds_n10_fpu_load, nds_n10_fpu_store" + "nds32_n10_ex_to_ex_p" +) + +;; LD, MUL, MAC, DIV, DALU64, DMUL, DMAC, DALUROUND, DBPICK, DWEXT +;; -> ALU, ALU_SHIFT_Rb, PBSAD, PBSADA_RaRb, MOVD44, MUL, MAC_RaRb, DIV, ADDR_IN, BR, MMU, +;; DALU, DALUROUND, DMUL, DMAC_RaRb, DPACK, DINSB, DCMP, DCLIP, WEXT_O, BPICK_RaRb +(define_bypass 2 + "nds_n10_load, nds_n10_mul, nds_n10_mac, nds_n10_div,\ + nds_n10_dsp_alu64, nds_n10_dsp_mul, nds_n10_dsp_mac,\ + nds_n10_dsp_alu_round, nds_n10_dsp_bpick, nds_n10_dsp_wext" + "nds_n10_alu, nds_n10_alu_shift,\ + nds_n10_pbsad, nds_n10_pbsada,\ + nds_n10_mul, nds_n10_mac, nds_n10_div,\ + nds_n10_branch,\ + nds_n10_load, nds_n10_store,\ + nds_n10_load_multiple_1, nds_n10_load_multiple_2, nds_n10_load_multiple_3,\ + nds_n10_load_multiple_4, nds_n10_load_multiple_5, nds_n10_load_multiple_6,\ + nds_n10_load_multiple_7, nds_n10_load_multiple_N,\ + nds_n10_store_multiple_1, nds_n10_store_multiple_2, nds_n10_store_multiple_3,\ + nds_n10_store_multiple_4, nds_n10_store_multiple_5, nds_n10_store_multiple_6,\ + nds_n10_store_multiple_7, nds_n10_store_multiple_N,\ + nds_n10_mmu,\ + nds_n10_dsp_alu, nds_n10_dsp_alu_round,\ + nds_n10_dsp_mul, nds_n10_dsp_mac, nds_n10_dsp_pack,\ + nds_n10_dsp_insb, nds_n10_dsp_cmp, nds_n10_dsp_clip,\ + nds_n10_dsp_wext, nds_n10_dsp_bpick" + "nds32_n10_mm_to_ex_p" +) + +;; LMW(N, N) +;; -> ALU, ALU_SHIFT_Rb, PBSAD, PBSADA_RaRb, MOVD44, MUL, MAC_RaRb, DIV, ADDR_IN, BR, MMU +;; DALU, DALUROUND, DMUL, DMAC_RaRb, DPACK, DINSB, DCMP, DCLIP, WEXT_O, BPICK_RaRb +(define_bypass 2 + "nds_n10_load_multiple_1, nds_n10_load_multiple_2, nds_n10_load_multiple_3,\ + nds_n10_load_multiple_4, nds_n10_load_multiple_5, nds_n10_load_multiple_6,\ + nds_n10_load_multiple_7, nds_n10_load_multiple_N" + "nds_n10_alu, nds_n10_alu_shift,\ + nds_n10_pbsad, nds_n10_pbsada,\ + nds_n10_mul, nds_n10_mac, nds_n10_div,\ + nds_n10_branch,\ + nds_n10_load, nds_n10_store,\ + nds_n10_load_multiple_1, nds_n10_load_multiple_2, nds_n10_load_multiple_3,\ + nds_n10_load_multiple_4, nds_n10_load_multiple_5, nds_n10_load_multiple_6,\ + nds_n10_load_multiple_7, nds_n10_load_multiple_N,\ + nds_n10_store_multiple_1, nds_n10_store_multiple_2, nds_n10_store_multiple_3,\ + nds_n10_store_multiple_4, nds_n10_store_multiple_5, nds_n10_store_multiple_6,\ + nds_n10_store_multiple_7, nds_n10_store_multiple_N,\ + nds_n10_mmu,\ + nds_n10_dsp_alu, nds_n10_dsp_alu_round,\ + nds_n10_dsp_mul, nds_n10_dsp_mac, nds_n10_dsp_pack,\ + nds_n10_dsp_insb, nds_n10_dsp_cmp, nds_n10_dsp_clip,\ + nds_n10_dsp_wext, nds_n10_dsp_bpick" + "nds32_n10_last_load_to_ex_p" +) diff --git a/gcc/config/nds32/nds32-n13.md b/gcc/config/nds32/nds32-n13.md new file mode 100644 index 0000000..ca7546b --- /dev/null +++ b/gcc/config/nds32/nds32-n13.md @@ -0,0 +1,401 @@ +;; Pipeline descriptions of Andes NDS32 cpu for GNU compiler +;; Copyright (C) 2012-2018 Free Software Foundation, Inc. +;; Contributed by Andes Technology Corporation. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. +;; +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + + +;; ------------------------------------------------------------------------ +;; Define N13 pipeline settings. +;; ------------------------------------------------------------------------ + +(define_automaton "nds32_n13_machine") + +;; ------------------------------------------------------------------------ +;; Pipeline Stages +;; ------------------------------------------------------------------------ +;; F1 - Instruction Fetch First +;; Instruction Tag/Data Arrays +;; ITLB Address Translation +;; Branch Target Buffer Prediction +;; F2 - Instruction Fetch Second +;; Instruction Cache Hit Detection +;; Cache Way Selection +;; Inustruction Alignment +;; I1 - Instruction Issue First / Instruction Decode +;; Instruction Cache Replay Triggering +;; 32/16-Bit Instruction Decode +;; Return Address Stack Prediction +;; I2 - Instruction Issue Second / Register File Access +;; Instruction Issue Logic +;; Register File Access +;; E1 - Instruction Execute First / Address Generation / MAC First +;; Data Access Address generation +;; Multiply Operation +;; E2 - Instruction Execute Second / Data Access First / MAC Second / +;; ALU Execute +;; Skewed ALU +;; Branch/Jump/Return Resolution +;; Data Tag/Data arrays +;; DTLB address translation +;; Accumulation Operation +;; E3 - Instruction Execute Third / Data Access Second +;; Data Cache Hit Detection +;; Cache Way Selection +;; Data Alignment +;; E4 - Instruction Execute Fourth / Write Back +;; Interruption Resolution +;; Instruction Retire +;; Register File Write Back + +(define_cpu_unit "n13_i1" "nds32_n13_machine") +(define_cpu_unit "n13_i2" "nds32_n13_machine") +(define_cpu_unit "n13_e1" "nds32_n13_machine") +(define_cpu_unit "n13_e2" "nds32_n13_machine") +(define_cpu_unit "n13_e3" "nds32_n13_machine") +(define_cpu_unit "n13_e4" "nds32_n13_machine") + +(define_insn_reservation "nds_n13_unknown" 1 + (and (eq_attr "type" "unknown") + (eq_attr "pipeline_model" "n13")) + "n13_i1, n13_i2, n13_e1, n13_e2, n13_e3, n13_e4") + +(define_insn_reservation "nds_n13_misc" 1 + (and (eq_attr "type" "misc") + (eq_attr "pipeline_model" "n13")) + "n13_i1, n13_i2, n13_e1, n13_e2, n13_e3, n13_e4") + +(define_insn_reservation "nds_n13_mmu" 1 + (and (eq_attr "type" "mmu") + (eq_attr "pipeline_model" "n13")) + "n13_i1, n13_i2, n13_e1, n13_e2, n13_e3, n13_e4") + +(define_insn_reservation "nds_n13_alu" 1 + (and (eq_attr "type" "alu") + (eq_attr "pipeline_model" "n13")) + "n13_i1, n13_i2, n13_e1, n13_e2, n13_e3, n13_e4") + +(define_insn_reservation "nds_n13_alu_shift" 1 + (and (eq_attr "type" "alu_shift") + (eq_attr "pipeline_model" "n13")) + "n13_i1, n13_i1+n13_i2, n13_i2+n13_e1, n13_e1+n13_e2, n13_e2+n13_e3, n13_e3+n13_e4, n13_e4") + +(define_insn_reservation "nds_n13_pbsad" 1 + (and (eq_attr "type" "pbsad") + (eq_attr "pipeline_model" "n13")) + "n13_i1, n13_i2, n13_e1, n13_e2*2, n13_e3, n13_e4") + +(define_insn_reservation "nds_n13_pbsada" 1 + (and (eq_attr "type" "pbsada") + (eq_attr "pipeline_model" "n13")) + "n13_i1, n13_i2, n13_e1, n13_e2*3, n13_e3, n13_e4") + +(define_insn_reservation "nds_n13_load" 1 + (and (match_test "nds32::load_single_p (insn)") + (eq_attr "pipeline_model" "n13")) + "n13_i1, n13_i2, n13_e1, n13_e2, n13_e3, n13_e4") + +(define_insn_reservation "nds_n13_store" 1 + (and (match_test "nds32::store_single_p (insn)") + (eq_attr "pipeline_model" "n13")) + "n13_i1, n13_i2, n13_e1, n13_e2, n13_e3, n13_e4") + +(define_insn_reservation "nds_n13_load_multiple_1" 1 + (and (and (eq_attr "type" "load_multiple") + (eq_attr "combo" "1")) + (eq_attr "pipeline_model" "n13")) + "n13_i1, n13_i2, n13_e1, n13_e2, n13_e3, n13_e4") + +(define_insn_reservation "nds_n13_load_multiple_2" 1 + (and (ior (and (eq_attr "type" "load_multiple") + (eq_attr "combo" "2")) + (match_test "nds32::load_double_p (insn)")) + (eq_attr "pipeline_model" "n13")) + "n13_i1, n13_i1+n13_i2, n13_i2+n13_e1, n13_e1+n13_e2, n13_e2+n13_e3, n13_e3+n13_e4, n13_e4") + +(define_insn_reservation "nds_n13_load_multiple_3" 1 + (and (and (eq_attr "type" "load_multiple") + (eq_attr "combo" "3")) + (eq_attr "pipeline_model" "n13")) + "n13_i1, n13_i2+n13_i2, n13_i1+n13_i2+n13_e1, n13_i2+n13_e1+n13_e2, n13_e1+n13_e2+n13_e3, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4") + +(define_insn_reservation "nds_n13_load_multiple_4" 1 + (and (and (eq_attr "type" "load_multiple") + (eq_attr "combo" "4")) + (eq_attr "pipeline_model" "n13")) + "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i2+n13_e1+n13_e2+n13_e3, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4") + +(define_insn_reservation "nds_n13_load_multiple_5" 1 + (and (and (eq_attr "type" "load_multiple") + (eq_attr "combo" "5")) + (eq_attr "pipeline_model" "n13")) + "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3, n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4") + +(define_insn_reservation "nds_n13_load_multiple_6" 1 + (and (and (eq_attr "type" "load_multiple") + (eq_attr "combo" "6")) + (eq_attr "pipeline_model" "n13")) + "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4") + +(define_insn_reservation "nds_n13_load_multiple_7" 1 + (and (and (eq_attr "type" "load_multiple") + (eq_attr "combo" "7")) + (eq_attr "pipeline_model" "n13")) + "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3, (n13_i1+n13_i2+n13_e1+n13_e2+n13_e3+n13_e4)*2, n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4") + +(define_insn_reservation "nds_n13_load_multiple_8" 1 + (and (and (eq_attr "type" "load_multiple") + (eq_attr "combo" "8")) + (eq_attr "pipeline_model" "n13")) + "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3, (n13_i1+n13_i2+n13_e1+n13_e2+n13_e3+n13_e4)*3, n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4") + +(define_insn_reservation "nds_n13_load_multiple_12" 1 + (and (and (eq_attr "type" "load_multiple") + (eq_attr "combo" "12")) + (eq_attr "pipeline_model" "n13")) + "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3, (n13_i1+n13_i2+n13_e1+n13_e2+n13_e3+n13_e4)*7, n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4") + +(define_insn_reservation "nds_n13_store_multiple_1" 1 + (and (and (eq_attr "type" "store_multiple") + (eq_attr "combo" "1")) + (eq_attr "pipeline_model" "n13")) + "n13_i1, n13_i2, n13_e1, n13_e2, n13_e3, n13_e4") + +(define_insn_reservation "nds_n13_store_multiple_2" 1 + (and (ior (and (eq_attr "type" "store_multiple") + (eq_attr "combo" "2")) + (match_test "nds32::store_double_p (insn)")) + (eq_attr "pipeline_model" "n13")) + "n13_i1, n13_i1+n13_i2, n13_i2+n13_e1, n13_e1+n13_e2, n13_e2+n13_e3, n13_e3+n13_e4, n13_e4") + +(define_insn_reservation "nds_n13_store_multiple_3" 1 + (and (and (eq_attr "type" "store_multiple") + (eq_attr "combo" "3")) + (eq_attr "pipeline_model" "n13")) + "n13_i1, n13_i2+n13_i2, n13_i1+n13_i2+n13_e1, n13_i2+n13_e1+n13_e2, n13_e1+n13_e2+n13_e3, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4") + +(define_insn_reservation "nds_n13_store_multiple_4" 1 + (and (and (eq_attr "type" "store_multiple") + (eq_attr "combo" "4")) + (eq_attr "pipeline_model" "n13")) + "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i2+n13_e1+n13_e2+n13_e3, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4") + +(define_insn_reservation "nds_n13_store_multiple_5" 1 + (and (and (eq_attr "type" "store_multiple") + (eq_attr "combo" "5")) + (eq_attr "pipeline_model" "n13")) + "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3, n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4") + +(define_insn_reservation "nds_n13_store_multiple_6" 1 + (and (and (eq_attr "type" "store_multiple") + (eq_attr "combo" "6")) + (eq_attr "pipeline_model" "n13")) + "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4") + +(define_insn_reservation "nds_n13_store_multiple_7" 1 + (and (and (eq_attr "type" "store_multiple") + (eq_attr "combo" "7")) + (eq_attr "pipeline_model" "n13")) + "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3, (n13_i1+n13_i2+n13_e1+n13_e2+n13_e3+n13_e4)*2, n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4") + +(define_insn_reservation "nds_n13_store_multiple_8" 1 + (and (and (eq_attr "type" "store_multiple") + (eq_attr "combo" "8")) + (eq_attr "pipeline_model" "n13")) + "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3, (n13_i1+n13_i2+n13_e1+n13_e2+n13_e3+n13_e4)*3, n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4") + +(define_insn_reservation "nds_n13_store_multiple_12" 1 + (and (and (eq_attr "type" "store_multiple") + (eq_attr "combo" "12")) + (eq_attr "pipeline_model" "n13")) + "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3, (n13_i1+n13_i2+n13_e1+n13_e2+n13_e3+n13_e4)*7, n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4") + +;; The multiplier at E1 takes two cycles. +(define_insn_reservation "nds_n13_mul" 1 + (and (eq_attr "type" "mul") + (eq_attr "pipeline_model" "n13")) + "n13_i1, n13_i2, n13_e1*2, n13_e2, n13_e3, n13_e4") + +(define_insn_reservation "nds_n13_mac" 1 + (and (eq_attr "type" "mac") + (eq_attr "pipeline_model" "n13")) + "n13_i1, n13_i2, n13_e1*2, n13_e2, n13_e3, n13_e4") + +;; The cycles consumed at E2 are 32 - CLZ(abs(Ra)) + 2, +;; so the worst case is 34. +(define_insn_reservation "nds_n13_div" 1 + (and (eq_attr "type" "div") + (eq_attr "pipeline_model" "n13")) + "n13_i1, n13_i2, n13_e1, n13_e2*34, n13_e3, n13_e4") + +(define_insn_reservation "nds_n13_branch" 1 + (and (eq_attr "type" "branch") + (eq_attr "pipeline_model" "n13")) + "n13_i1, n13_i2, n13_e1, n13_e2, n13_e3, n13_e4") + +;; ------------------------------------------------------------------------ +;; Comment Notations and Bypass Rules +;; ------------------------------------------------------------------------ +;; Producers (LHS) +;; LD +;; Load data from the memory and produce the loaded data. The result is +;; ready at E3. +;; LMW(N, M) +;; There are N micro-operations within an instruction that loads multiple +;; words. The result produced by the M-th micro-operation is sent to +;; consumers. The result is ready at E3. +;; ADDR_OUT +;; Most load/store instructions can produce an address output if updating +;; the base register is required. The result is ready at E2, which is +;; produced by ALU. +;; ALU, ALU_SHIFT, SIMD +;; Compute data in ALU and produce the data. The result is ready at E2. +;; MUL, MAC +;; Compute data in the multiply-adder and produce the data. The result +;; is ready at E2. +;; DIV +;; Compute data in the divider and produce the data. The result is ready +;; at E2. +;; BR +;; Branch-with-link instructions produces a result containing the return +;; address. The result is ready at E2. +;; +;; Consumers (RHS) +;; ALU +;; General ALU instructions require operands at E2. +;; ALU_E1 +;; Some special ALU instructions, such as BSE, BSP and MOVD44, require +;; operand at E1. +;; MUL, DIV, PBSAD, MMU +;; Operands are required at E1. +;; PBSADA_Rt, PBSADA_RaRb +;; Operands Ra and Rb are required at E1, and the operand Rt is required +;; at E2. +;; ALU_SHIFT_Rb +;; An ALU-SHIFT instruction consists of a shift micro-operation followed +;; by an arithmetic micro-operation. The operand Rb is used by the first +;; micro-operation, and there are some latencies if data dependency occurs. +;; MAC_RaRb +;; A MAC instruction does multiplication at E1 and does accumulation at E2, +;; so the operand Rt is required at E2, and operands Ra and Rb are required +;; at E1. +;; ADDR_IN +;; If an instruction requires an address as its input operand, the address +;; is required at E1. +;; ST +;; A store instruction requires its data at E2. +;; SMW(N, M) +;; There are N micro-operations within an instruction that stores multiple +;; words. Each M-th micro-operation requires its data at E2. +;; BR +;; If a branch instruction is conditional, its input data is required at E2. + +;; LD -> ALU_E1, PBSAD, PBSADA_RaRb, MUL, MAC_RaRb, DIV, MMU, ADDR_IN +(define_bypass 3 + "nds_n13_load" + "nds_n13_alu, nds_n13_pbsad, nds_n13_pbsada,\ + nds_n13_mul, nds_n13_mac, nds_n13_div,\ + nds_n13_mmu,\ + nds_n13_load, nds_n13_store,\ + nds_n13_load_multiple_1,nds_n13_load_multiple_2, nds_n13_load_multiple_3,\ + nds_n13_load_multiple_4,nds_n13_load_multiple_5, nds_n13_load_multiple_6,\ + nds_n13_load_multiple_7,nds_n13_load_multiple_8, nds_n13_load_multiple_12,\ + nds_n13_store_multiple_1,nds_n13_store_multiple_2, nds_n13_store_multiple_3,\ + nds_n13_store_multiple_4,nds_n13_store_multiple_5, nds_n13_store_multiple_6,\ + nds_n13_store_multiple_7,nds_n13_store_multiple_8, nds_n13_store_multiple_12" + "nds32_n13_load_to_e1_p" +) + +;; LD -> ALU, ALU_SHIFT_Rb, PBSADA_Rt, BR, ST, SMW(N, 1) +(define_bypass 2 + "nds_n13_load" + "nds_n13_alu, nds_n13_alu_shift, nds_n13_pbsada, nds_n13_branch, nds_n13_store,\ + nds_n13_store_multiple_1,nds_n13_store_multiple_2, nds_n13_store_multiple_3,\ + nds_n13_store_multiple_4,nds_n13_store_multiple_5, nds_n13_store_multiple_6,\ + nds_n13_store_multiple_7,nds_n13_store_multiple_8, nds_n13_store_multiple_12" + "nds32_n13_load_to_e2_p" +) + +;; LMW(N, N) -> ALU_E1, PBSAD, PBSADA_RaRb, MUL, MAC_RaRb, DIV, MMU, ADDR_IN +(define_bypass 3 + "nds_n13_load_multiple_1,nds_n13_load_multiple_2, nds_n13_load_multiple_3,\ + nds_n13_load_multiple_4,nds_n13_load_multiple_5, nds_n13_load_multiple_6,\ + nds_n13_load_multiple_7,nds_n13_load_multiple_8, nds_n13_load_multiple_12" + "nds_n13_alu, nds_n13_pbsad, nds_n13_pbsada,\ + nds_n13_mul, nds_n13_mac, nds_n13_div,\ + nds_n13_mmu,\ + nds_n13_load, nds_n13_store,\ + nds_n13_load_multiple_1,nds_n13_load_multiple_2, nds_n13_load_multiple_3,\ + nds_n13_load_multiple_4,nds_n13_load_multiple_5, nds_n13_load_multiple_6,\ + nds_n13_load_multiple_7,nds_n13_load_multiple_8, nds_n13_load_multiple_12,\ + nds_n13_store_multiple_1,nds_n13_store_multiple_2, nds_n13_store_multiple_3,\ + nds_n13_store_multiple_4,nds_n13_store_multiple_5, nds_n13_store_multiple_6,\ + nds_n13_store_multiple_7,nds_n13_store_multiple_8, nds_n13_store_multiple_12" + "nds32_n13_last_load_to_e1_p") + +;; LMW(N, N) -> ALU, ALU_SHIFT_Rb, PBSADA_Rt, BR, ST, SMW(N, 1) +(define_bypass 2 + "nds_n13_load_multiple_1,nds_n13_load_multiple_2, nds_n13_load_multiple_3,\ + nds_n13_load_multiple_4,nds_n13_load_multiple_5, nds_n13_load_multiple_6,\ + nds_n13_load_multiple_7,nds_n13_load_multiple_8, nds_n13_load_multiple_12" + "nds_n13_alu, nds_n13_alu_shift, nds_n13_pbsada, nds_n13_branch, nds_n13_store,\ + nds_n13_store_multiple_1,nds_n13_store_multiple_2, nds_n13_store_multiple_3,\ + nds_n13_store_multiple_4,nds_n13_store_multiple_5, nds_n13_store_multiple_6,\ + nds_n13_store_multiple_7,nds_n13_store_multiple_8, nds_n13_store_multiple_12" + "nds32_n13_last_load_to_e2_p" +) + +;; LMW(N, N - 1) -> ALU_E1, PBSAD, PBSADA_RaRb, MUL, MAC_RaRb, DIV, MMU, ADDR_IN +(define_bypass 2 + "nds_n13_load_multiple_1,nds_n13_load_multiple_2, nds_n13_load_multiple_3,\ + nds_n13_load_multiple_4,nds_n13_load_multiple_5, nds_n13_load_multiple_6,\ + nds_n13_load_multiple_7,nds_n13_load_multiple_8, nds_n13_load_multiple_12" + "nds_n13_alu, nds_n13_pbsad, nds_n13_pbsada,\ + nds_n13_mul, nds_n13_mac, nds_n13_div,\ + nds_n13_mmu,\ + nds_n13_load, nds_n13_store,\ + nds_n13_load_multiple_1,nds_n13_load_multiple_2, nds_n13_load_multiple_3,\ + nds_n13_load_multiple_4,nds_n13_load_multiple_5, nds_n13_load_multiple_6,\ + nds_n13_load_multiple_7,nds_n13_load_multiple_8, nds_n13_load_multiple_12,\ + nds_n13_store_multiple_1,nds_n13_store_multiple_2, nds_n13_store_multiple_3,\ + nds_n13_store_multiple_4,nds_n13_store_multiple_5, nds_n13_store_multiple_6,\ + nds_n13_store_multiple_7,nds_n13_store_multiple_8, nds_n13_store_multiple_12" + "nds32_n13_last_two_load_to_e1_p") + +;; ALU, ALU_SHIFT, SIMD, BR, MUL, MAC, DIV, ADDR_OUT +;; -> ALU_E1, PBSAD, PBSADA_RaRb, MUL, MAC_RaRb, DIV, MMU, ADDR_IN +(define_bypass 2 + "nds_n13_alu, nds_n13_alu_shift, nds_n13_pbsad, nds_n13_pbsada, nds_n13_branch,\ + nds_n13_mul, nds_n13_mac, nds_n13_div,\ + nds_n13_load, nds_n13_store,\ + nds_n13_load_multiple_1,nds_n13_load_multiple_2, nds_n13_load_multiple_3,\ + nds_n13_load_multiple_4,nds_n13_load_multiple_5, nds_n13_load_multiple_6,\ + nds_n13_load_multiple_7,nds_n13_load_multiple_8, nds_n13_load_multiple_12,\ + nds_n13_store_multiple_1,nds_n13_store_multiple_2, nds_n13_store_multiple_3,\ + nds_n13_store_multiple_4,nds_n13_store_multiple_5, nds_n13_store_multiple_6,\ + nds_n13_store_multiple_7,nds_n13_store_multiple_8, nds_n13_store_multiple_12" + "nds_n13_alu, nds_n13_pbsad, nds_n13_pbsada,\ + nds_n13_mul, nds_n13_mac, nds_n13_div,\ + nds_n13_mmu,\ + nds_n13_load, nds_n13_store,\ + nds_n13_load_multiple_1,nds_n13_load_multiple_2, nds_n13_load_multiple_3,\ + nds_n13_load_multiple_4,nds_n13_load_multiple_5, nds_n13_load_multiple_6,\ + nds_n13_load_multiple_7,nds_n13_load_multiple_8, nds_n13_load_multiple_12,\ + nds_n13_store_multiple_1,nds_n13_store_multiple_2, nds_n13_store_multiple_3,\ + nds_n13_store_multiple_4,nds_n13_store_multiple_5, nds_n13_store_multiple_6,\ + nds_n13_store_multiple_7,nds_n13_store_multiple_8, nds_n13_store_multiple_12" + "nds32_n13_e2_to_e1_p") diff --git a/gcc/config/nds32/nds32-opts.h b/gcc/config/nds32/nds32-opts.h index 5d7e165..8d76196 100644 --- a/gcc/config/nds32/nds32-opts.h +++ b/gcc/config/nds32/nds32-opts.h @@ -29,6 +29,7 @@ enum nds32_arch_type { ARCH_V2, ARCH_V3, + ARCH_V3J, ARCH_V3M, ARCH_V3F, ARCH_V3S @@ -42,6 +43,10 @@ enum nds32_cpu_type CPU_N8, CPU_E8, CPU_N9, + CPU_N10, + CPU_GRAYWOLF, + CPU_N12, + CPU_N13, CPU_SIMPLE }; @@ -53,6 +58,13 @@ enum nds32_cmodel_type CMODEL_LARGE }; +/* The code model defines the address generation strategy. */ +enum nds32_ict_model_type +{ + ICT_MODEL_SMALL, + ICT_MODEL_LARGE +}; + /* Multiply instruction configuration. */ enum nds32_mul_type { diff --git a/gcc/config/nds32/nds32-peephole2.md b/gcc/config/nds32/nds32-peephole2.md index a5e77b1..033f62b 100644 --- a/gcc/config/nds32/nds32-peephole2.md +++ b/gcc/config/nds32/nds32-peephole2.md @@ -22,3 +22,139 @@ ;; Use define_peephole2 to handle possible target-specific optimization. ;; ------------------------------------------------------------------------ +;; Try to utilize 16-bit instruction by swap operand if possible. +;; ------------------------------------------------------------------------ + +;; Try to make add as add45. +(define_peephole2 + [(set (match_operand:QIHISI 0 "register_operand" "") + (plus:QIHISI (match_operand:QIHISI 1 "register_operand" "") + (match_operand:QIHISI 2 "register_operand" "")))] + "reload_completed + && TARGET_16_BIT + && REGNO (operands[0]) == REGNO (operands[2]) + && REGNO (operands[0]) != REGNO (operands[1]) + && TEST_HARD_REG_BIT (reg_class_contents[MIDDLE_REGS], REGNO (operands[0]))" + [(set (match_dup 0) (plus:QIHISI (match_dup 2) (match_dup 1)))]) + +;; Try to make xor/ior/and/mult as xor33/ior33/and33/mult33. +(define_peephole2 + [(set (match_operand:SI 0 "register_operand" "") + (match_operator:SI 1 "nds32_have_33_inst_operator" + [(match_operand:SI 2 "register_operand" "") + (match_operand:SI 3 "register_operand" "")]))] + "reload_completed + && TARGET_16_BIT + && REGNO (operands[0]) == REGNO (operands[3]) + && REGNO (operands[0]) != REGNO (operands[2]) + && TEST_HARD_REG_BIT (reg_class_contents[LOW_REGS], REGNO (operands[0])) + && TEST_HARD_REG_BIT (reg_class_contents[LOW_REGS], REGNO (operands[2]))" + [(set (match_dup 0) (match_op_dup 1 [(match_dup 3) (match_dup 2)]))]) + +(define_peephole + [(set (match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "register_operand" "")) + (set (match_operand:SI 2 "register_operand" "") + (match_operand:SI 3 "register_operand" ""))] + "TARGET_16_BIT + && !TARGET_ISA_V2 + && NDS32_IS_GPR_REGNUM (REGNO (operands[0])) + && NDS32_IS_GPR_REGNUM (REGNO (operands[1])) + && ((REGNO (operands[0]) & 0x1) == 0) + && ((REGNO (operands[1]) & 0x1) == 0) + && (REGNO (operands[0]) + 1) == REGNO (operands[2]) + && (REGNO (operands[1]) + 1) == REGNO (operands[3])" + "movd44\t%0, %1" + [(set_attr "type" "alu") + (set_attr "length" "2")]) + +;; Merge two fcpyss to fcpysd. +(define_peephole2 + [(set (match_operand:SF 0 "float_even_register_operand" "") + (match_operand:SF 1 "float_even_register_operand" "")) + (set (match_operand:SF 2 "float_odd_register_operand" "") + (match_operand:SF 3 "float_odd_register_operand" ""))] + "(TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE) + && REGNO (operands[0]) == REGNO (operands[2]) - 1 + && REGNO (operands[1]) == REGNO (operands[3]) - 1" + [(set (match_dup 4) (match_dup 5))] + { + operands[4] = gen_rtx_REG (DFmode, REGNO (operands[0])); + operands[5] = gen_rtx_REG (DFmode, REGNO (operands[1])); + }) + +(define_peephole2 + [(set (match_operand:SF 0 "float_odd_register_operand" "") + (match_operand:SF 1 "float_odd_register_operand" "")) + (set (match_operand:SF 2 "float_even_register_operand" "") + (match_operand:SF 3 "float_even_register_operand" ""))] + "(TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE) + && REGNO (operands[2]) == REGNO (operands[0]) - 1 + && REGNO (operands[3]) == REGNO (operands[1]) - 1" + [(set (match_dup 4) (match_dup 5))] + { + operands[4] = gen_rtx_REG (DFmode, REGNO (operands[2])); + operands[5] = gen_rtx_REG (DFmode, REGNO (operands[3])); + }) + +;; ------------------------------------------------------------------------ +;; GCC will prefer [u]divmodsi3 rather than [u]divsi3 even remainder is +;; unused, so we use split to drop mod operation for lower register pressure. + +(define_split + [(set (match_operand:SI 0 "register_operand") + (div:SI (match_operand:SI 1 "register_operand") + (match_operand:SI 2 "register_operand"))) + (set (match_operand:SI 3 "register_operand") + (mod:SI (match_dup 1) (match_dup 2)))] + "find_regno_note (insn, REG_UNUSED, REGNO (operands[3])) != NULL + && can_create_pseudo_p ()" + [(set (match_dup 0) + (div:SI (match_dup 1) + (match_dup 2)))]) + +(define_split + [(set (match_operand:SI 0 "register_operand") + (udiv:SI (match_operand:SI 1 "register_operand") + (match_operand:SI 2 "register_operand"))) + (set (match_operand:SI 3 "register_operand") + (umod:SI (match_dup 1) (match_dup 2)))] + "find_regno_note (insn, REG_UNUSED, REGNO (operands[3])) != NULL + && can_create_pseudo_p ()" + [(set (match_dup 0) + (udiv:SI (match_dup 1) + (match_dup 2)))]) + +(define_peephole2 + [(set (match_operand:DI 0 "register_operand") + (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand")) + (sign_extend:DI (match_operand:SI 2 "register_operand"))))] + "NDS32_EXT_DSP_P () + && peep2_regno_dead_p (1, WORDS_BIG_ENDIAN ? REGNO (operands[0]) + 1 : REGNO (operands[0]))" + [(const_int 1)] +{ + rtx highpart = nds32_di_high_part_subreg (operands[0]); + emit_insn (gen_smulsi3_highpart (highpart, operands[1], operands[2])); + DONE; +}) + +(define_split + [(set (match_operand:DI 0 "nds32_general_register_operand" "") + (match_operand:DI 1 "nds32_general_register_operand" ""))] + "find_regno_note (insn, REG_UNUSED, REGNO (operands[0])) != NULL + || find_regno_note (insn, REG_UNUSED, REGNO (operands[0]) + 1) != NULL" + [(set (match_dup 0) (match_dup 1))] +{ + rtx dead_note = find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0])); + HOST_WIDE_INT offset; + if (dead_note == NULL_RTX) + offset = 0; + else + offset = 4; + operands[0] = simplify_gen_subreg ( + SImode, operands[0], + DImode, offset); + operands[1] = simplify_gen_subreg ( + SImode, operands[1], + DImode, offset); +}) diff --git a/gcc/config/nds32/nds32-pipelines-auxiliary.c b/gcc/config/nds32/nds32-pipelines-auxiliary.c index a983238..53619d2 100644 --- a/gcc/config/nds32/nds32-pipelines-auxiliary.c +++ b/gcc/config/nds32/nds32-pipelines-auxiliary.c @@ -306,6 +306,19 @@ pbsada_insn_ra_rb_dep_reg_p (rtx pbsada_insn, rtx def_reg) return false; } +/* Determine if the latency is occured when the consumer PBSADA_INSN uses the + value of DEF_REG in its Rt field. */ +bool +pbsada_insn_rt_dep_reg_p (rtx pbsada_insn, rtx def_reg) +{ + rtx pbsada_rt = SET_DEST (PATTERN (pbsada_insn)); + + if (rtx_equal_p (def_reg, pbsada_rt)) + return true; + + return false; +} + /* Check if INSN is a movd44 insn consuming DEF_REG. */ bool movd44_even_dep_p (rtx_insn *insn, rtx def_reg) @@ -335,6 +348,103 @@ movd44_even_dep_p (rtx_insn *insn, rtx def_reg) return false; } +/* Check if INSN is a wext insn consuming DEF_REG. */ +bool +wext_odd_dep_p (rtx insn, rtx def_reg) +{ + rtx shift_rtx = XEXP (SET_SRC (PATTERN (insn)), 0); + rtx use_reg = XEXP (shift_rtx, 0); + rtx pos_rtx = XEXP (shift_rtx, 1); + + if (REG_P (pos_rtx) && reg_overlap_p (def_reg, pos_rtx)) + return true; + + if (GET_MODE (def_reg) == DImode) + return reg_overlap_p (def_reg, use_reg); + + gcc_assert (REG_P (def_reg) || GET_CODE (def_reg) == SUBREG); + gcc_assert (REG_P (use_reg)); + + if (REG_P (def_reg)) + { + if (!TARGET_BIG_ENDIAN) + return REGNO (def_reg) == REGNO (use_reg) + 1; + else + return REGNO (def_reg) == REGNO (use_reg); + } + + if (GET_CODE (def_reg) == SUBREG) + { + if (!reg_overlap_p (def_reg, use_reg)) + return false; + + if (!TARGET_BIG_ENDIAN) + return SUBREG_BYTE (def_reg) == 4; + else + return SUBREG_BYTE (def_reg) == 0; + } + + return false; +} + +/* Check if INSN is a bpick insn consuming DEF_REG. */ +bool +bpick_ra_rb_dep_p (rtx insn, rtx def_reg) +{ + rtx ior_rtx = SET_SRC (PATTERN (insn)); + rtx and1_rtx = XEXP (ior_rtx, 0); + rtx and2_rtx = XEXP (ior_rtx, 1); + rtx reg1_0 = XEXP (and1_rtx, 0); + rtx reg1_1 = XEXP (and1_rtx, 1); + rtx reg2_0 = XEXP (and2_rtx, 0); + rtx reg2_1 = XEXP (and2_rtx, 1); + + if (GET_CODE (reg1_0) == NOT) + { + if (rtx_equal_p (reg1_0, reg2_0)) + return reg_overlap_p (def_reg, reg1_1) + || reg_overlap_p (def_reg, reg2_1); + + if (rtx_equal_p (reg1_0, reg2_1)) + return reg_overlap_p (def_reg, reg1_1) + || reg_overlap_p (def_reg, reg2_0); + } + + if (GET_CODE (reg1_1) == NOT) + { + if (rtx_equal_p (reg1_1, reg2_0)) + return reg_overlap_p (def_reg, reg1_0) + || reg_overlap_p (def_reg, reg2_1); + + if (rtx_equal_p (reg1_1, reg2_1)) + return reg_overlap_p (def_reg, reg1_0) + || reg_overlap_p (def_reg, reg2_0); + } + + if (GET_CODE (reg2_0) == NOT) + { + if (rtx_equal_p (reg2_0, reg1_0)) + return reg_overlap_p (def_reg, reg2_1) + || reg_overlap_p (def_reg, reg1_1); + + if (rtx_equal_p (reg2_0, reg1_1)) + return reg_overlap_p (def_reg, reg2_1) + || reg_overlap_p (def_reg, reg1_0); + } + + if (GET_CODE (reg2_1) == NOT) + { + if (rtx_equal_p (reg2_1, reg1_0)) + return reg_overlap_p (def_reg, reg2_0) + || reg_overlap_p (def_reg, reg1_1); + + if (rtx_equal_p (reg2_1, reg1_1)) + return reg_overlap_p (def_reg, reg2_0) + || reg_overlap_p (def_reg, reg1_0); + } + + gcc_unreachable (); +} } // namespace scheduling } // namespace nds32 @@ -375,8 +485,7 @@ n7_consumed_by_ii_dep_p (rtx_insn *consumer, rtx def_reg) operations in order to write two registers. We have to check the dependency from the producer to the first micro-operation. */ case TYPE_DIV: - if (INSN_CODE (consumer) == CODE_FOR_divmodsi4 - || INSN_CODE (consumer) == CODE_FOR_udivmodsi4) + if (divmod_p (consumer)) use_rtx = SET_SRC (parallel_element (consumer, 0)); else use_rtx = SET_SRC (PATTERN (consumer)); @@ -506,8 +615,7 @@ n8_consumed_by_ex_p (rtx_insn *consumer, rtx def_reg) operations in order to write two registers. We have to check the dependency from the producer to the first micro-operation. */ case TYPE_DIV: - if (INSN_CODE (consumer) == CODE_FOR_divmodsi4 - || INSN_CODE (consumer) == CODE_FOR_udivmodsi4) + if (divmod_p (consumer)) use_rtx = SET_SRC (parallel_element (consumer, 0)); else use_rtx = SET_SRC (PATTERN (consumer)); @@ -606,8 +714,7 @@ n9_2r1w_consumed_by_ex_dep_p (rtx_insn *consumer, rtx def_reg) break; case TYPE_DIV: - if (INSN_CODE (consumer) == CODE_FOR_divmodsi4 - || INSN_CODE (consumer) == CODE_FOR_udivmodsi4) + if (divmod_p (consumer)) use_rtx = SET_SRC (parallel_element (consumer, 0)); else use_rtx = SET_SRC (PATTERN (consumer)); @@ -706,8 +813,7 @@ n9_3r2w_consumed_by_ex_dep_p (rtx_insn *consumer, rtx def_reg) We have to check the dependency from the producer to the first micro-operation. */ case TYPE_DIV: - if (INSN_CODE (consumer) == CODE_FOR_divmodsi4 - || INSN_CODE (consumer) == CODE_FOR_udivmodsi4) + if (divmod_p (consumer)) use_rtx = SET_SRC (parallel_element (consumer, 0)); else use_rtx = SET_SRC (PATTERN (consumer)); @@ -744,7 +850,316 @@ n9_3r2w_consumed_by_ex_dep_p (rtx_insn *consumer, rtx def_reg) return false; } +/* Check the dependency between the producer defining DEF_REG and CONSUMER + requiring input operand at EX. */ +bool +n10_consumed_by_ex_dep_p (rtx_insn *consumer, rtx def_reg) +{ + rtx use_rtx; + + switch (get_attr_type (consumer)) + { + case TYPE_ALU: + case TYPE_PBSAD: + case TYPE_MUL: + case TYPE_DALU: + case TYPE_DALU64: + case TYPE_DMUL: + case TYPE_DPACK: + case TYPE_DINSB: + case TYPE_DCMP: + case TYPE_DCLIP: + case TYPE_DALUROUND: + use_rtx = SET_SRC (PATTERN (consumer)); + break; + + case TYPE_ALU_SHIFT: + use_rtx = extract_shift_reg (consumer); + break; + + case TYPE_PBSADA: + return pbsada_insn_ra_rb_dep_reg_p (consumer, def_reg); + + case TYPE_MAC: + case TYPE_DMAC: + use_rtx = extract_mac_non_acc_rtx (consumer); + break; + + /* Some special instructions, divmodsi4 and udivmodsi4, produce two + results, the quotient and the remainder. */ + case TYPE_DIV: + if (divmod_p (consumer)) + use_rtx = SET_SRC (parallel_element (consumer, 0)); + else + use_rtx = SET_SRC (PATTERN (consumer)); + break; + + case TYPE_DWEXT: + return wext_odd_dep_p (consumer, def_reg); + + case TYPE_DBPICK: + return bpick_ra_rb_dep_p (consumer, def_reg); + + case TYPE_MMU: + if (GET_CODE (PATTERN (consumer)) == SET) + use_rtx = SET_SRC (PATTERN (consumer)); + else + return true; + break; + + case TYPE_LOAD: + case TYPE_STORE: + use_rtx = extract_mem_rtx (consumer); + break; + + case TYPE_LOAD_MULTIPLE: + case TYPE_STORE_MULTIPLE: + use_rtx = extract_base_reg (consumer); + break; + + case TYPE_BRANCH: + use_rtx = PATTERN (consumer); + break; + + default: + gcc_unreachable (); + } + + if (reg_overlap_p (def_reg, use_rtx)) + return true; + + return false; +} + +/* Check the dependency between the producer defining DEF_REG and CONSUMER + requiring input operand at EX. */ +bool +gw_consumed_by_ex_dep_p (rtx_insn *consumer, rtx def_reg) +{ + rtx use_rtx; + + switch (get_attr_type (consumer)) + { + case TYPE_ALU: + case TYPE_PBSAD: + case TYPE_MUL: + case TYPE_DALU: + case TYPE_DALU64: + case TYPE_DMUL: + case TYPE_DPACK: + case TYPE_DINSB: + case TYPE_DCMP: + case TYPE_DCLIP: + case TYPE_DALUROUND: + use_rtx = SET_SRC (PATTERN (consumer)); + break; + + case TYPE_ALU_SHIFT: + use_rtx = extract_shift_reg (consumer); + break; + + case TYPE_PBSADA: + return pbsada_insn_ra_rb_dep_reg_p (consumer, def_reg); + + case TYPE_MAC: + case TYPE_DMAC: + use_rtx = extract_mac_non_acc_rtx (consumer); + break; + + /* Some special instructions, divmodsi4 and udivmodsi4, produce two + results, the quotient and the remainder. We have to check the + dependency from the producer to the first micro-operation. */ + case TYPE_DIV: + if (divmod_p (consumer)) + use_rtx = SET_SRC (parallel_element (consumer, 0)); + else + use_rtx = SET_SRC (PATTERN (consumer)); + break; + + case TYPE_DWEXT: + return wext_odd_dep_p (consumer, def_reg); + + case TYPE_DBPICK: + return bpick_ra_rb_dep_p (consumer, def_reg); + + case TYPE_MMU: + if (GET_CODE (PATTERN (consumer)) == SET) + use_rtx = SET_SRC (PATTERN (consumer)); + else + return true; + break; + + case TYPE_LOAD: + case TYPE_STORE: + use_rtx = extract_mem_rtx (consumer); + break; + + case TYPE_LOAD_MULTIPLE: + case TYPE_STORE_MULTIPLE: + use_rtx = extract_base_reg (consumer); + break; + + case TYPE_BRANCH: + use_rtx = PATTERN (consumer); + break; + + default: + gcc_unreachable (); + } + + if (reg_overlap_p (def_reg, use_rtx)) + return true; + + return false; +} + +/* Check dependencies from any stages to ALU_E1 (E1). This is a helper + function of n13_consumed_by_e1_dep_p (). */ +bool +n13_alu_e1_insn_dep_reg_p (rtx_insn *alu_e1_insn, rtx def_reg) +{ + rtx unspec_rtx, operand_ra, operand_rb; + rtx src_rtx, dst_rtx; + + switch (INSN_CODE (alu_e1_insn)) + { + /* BSP and BSE are supported by built-in functions, the corresponding + patterns are formed by UNSPEC RTXs. We have to handle them + individually. */ + case CODE_FOR_unspec_bsp: + case CODE_FOR_unspec_bse: + unspec_rtx = SET_SRC (parallel_element (alu_e1_insn, 0)); + gcc_assert (GET_CODE (unspec_rtx) == UNSPEC); + + operand_ra = XVECEXP (unspec_rtx, 0, 0); + operand_rb = XVECEXP (unspec_rtx, 0, 1); + + if (rtx_equal_p (def_reg, operand_ra) + || rtx_equal_p (def_reg, operand_rb)) + return true; + + return false; + + /* Unlink general ALU instructions, MOVD44 requires operands at E1. */ + case CODE_FOR_move_di: + case CODE_FOR_move_df: + src_rtx = SET_SRC (PATTERN (alu_e1_insn)); + dst_rtx = SET_DEST (PATTERN (alu_e1_insn)); + + if (REG_P (dst_rtx) && REG_P (src_rtx) + && rtx_equal_p (src_rtx, def_reg)) + return true; + + return false; + + default: + return false; + } +} + +/* Check the dependency between the producer defining DEF_REG and CONSUMER + requiring input operand at E1. Because the address generation unti is + at E1, the address input should be ready at E1. Note that the branch + target is also a kind of addresses, so we have to check it. */ +bool +n13_consumed_by_e1_dep_p (rtx_insn *consumer, rtx def_reg) +{ + rtx use_rtx; + + switch (get_attr_type (consumer)) + { + /* ALU_E1 */ + case TYPE_ALU: + return n13_alu_e1_insn_dep_reg_p (consumer, def_reg); + + case TYPE_PBSADA: + return pbsada_insn_ra_rb_dep_reg_p (consumer, def_reg); + + case TYPE_PBSAD: + case TYPE_MUL: + use_rtx = SET_SRC (PATTERN (consumer)); + break; + + case TYPE_MAC: + use_rtx = extract_mac_non_acc_rtx (consumer); + break; + + case TYPE_DIV: + if (divmod_p (consumer)) + use_rtx = SET_SRC (parallel_element (consumer, 0)); + else + use_rtx = SET_SRC (PATTERN (consumer)); + break; + + case TYPE_MMU: + if (GET_CODE (PATTERN (consumer)) == SET) + use_rtx = SET_SRC (PATTERN (consumer)); + else + return true; + break; + + case TYPE_BRANCH: + use_rtx = extract_branch_target_rtx (consumer); + break; + + case TYPE_LOAD: + case TYPE_STORE: + use_rtx = extract_mem_rtx (consumer); + break; + + case TYPE_LOAD_MULTIPLE: + case TYPE_STORE_MULTIPLE: + use_rtx = extract_base_reg (consumer); + break; + + default: + return false; + } + + if (reg_overlap_p (def_reg, use_rtx)) + return true; + + return false; +} + +/* Check the dependency between the producer defining DEF_REG and CONSUMER + requiring input operand at E2. */ +bool +n13_consumed_by_e2_dep_p (rtx_insn *consumer, rtx def_reg) +{ + rtx use_rtx; + + switch (get_attr_type (consumer)) + { + case TYPE_ALU: + case TYPE_STORE: + use_rtx = SET_SRC (PATTERN (consumer)); + break; + + case TYPE_ALU_SHIFT: + use_rtx = extract_shift_reg (consumer); + break; + + case TYPE_PBSADA: + return pbsada_insn_rt_dep_reg_p (consumer, def_reg); + + case TYPE_STORE_MULTIPLE: + use_rtx = extract_nth_access_rtx (consumer, 0); + break; + + case TYPE_BRANCH: + use_rtx = extract_branch_condition_rtx (consumer); + break; + + default: + gcc_unreachable(); + } + + if (reg_overlap_p (def_reg, use_rtx)) + return true; + return false; +} } // anonymous namespace /* ------------------------------------------------------------------------ */ @@ -837,8 +1252,7 @@ nds32_n8_ex_to_ii_p (rtx_insn *producer, rtx_insn *consumer) break; case TYPE_DIV: - if (INSN_CODE (producer) == CODE_FOR_divmodsi4 - || INSN_CODE (producer) == CODE_FOR_udivmodsi4) + if (divmod_p (producer)) def_reg = SET_DEST (parallel_element (producer, 1)); else def_reg = SET_DEST (PATTERN (producer)); @@ -969,8 +1383,7 @@ nds32_e8_ex_to_ii_p (rtx_insn *producer, rtx_insn *consumer) break; case TYPE_DIV: - if (INSN_CODE (producer) == CODE_FOR_divmodsi4 - || INSN_CODE (producer) == CODE_FOR_udivmodsi4) + if (divmod_p (producer)) { rtx def_reg1 = SET_DEST (parallel_element (producer, 0)); rtx def_reg2 = SET_DEST (parallel_element (producer, 1)); @@ -1073,8 +1486,7 @@ nds32_n9_3r2w_mm_to_ex_p (rtx_insn *producer, rtx_insn *consumer) results, the quotient and the remainder. We have to handle them individually. */ case TYPE_DIV: - if (INSN_CODE (producer) == CODE_FOR_divmodsi4 - || INSN_CODE (producer) == CODE_FOR_udivmodsi4) + if (divmod_p (producer)) { rtx def_reg1 = SET_DEST (parallel_element (producer, 0)); rtx def_reg2 = SET_DEST (parallel_element (producer, 1)); @@ -1132,4 +1544,245 @@ nds32_n9_last_load_to_ex_p (rtx_insn *producer, rtx_insn *consumer) return n9_3r2w_consumed_by_ex_dep_p (consumer, last_def_reg); } +/* Guard functions for N10 cores. */ + +/* Check dependencies from EX to EX (ADDR_OUT -> ADDR_IN). */ +bool +nds32_n10_ex_to_ex_p (rtx_insn *producer, rtx_insn *consumer) +{ + gcc_assert (get_attr_type (producer) == TYPE_FLOAD + || get_attr_type (producer) == TYPE_FSTORE); + gcc_assert (get_attr_type (consumer) == TYPE_FLOAD + || get_attr_type (consumer) == TYPE_FSTORE); + + if (!post_update_insn_p (producer)) + return false; + + return reg_overlap_p (extract_base_reg (producer), + extract_mem_rtx (consumer)); +} + +/* Check dependencies from MM to EX. */ +bool +nds32_n10_mm_to_ex_p (rtx_insn *producer, rtx_insn *consumer) +{ + rtx def_reg; + + switch (get_attr_type (producer)) + { + case TYPE_LOAD: + case TYPE_MUL: + case TYPE_MAC: + case TYPE_DALU64: + case TYPE_DMUL: + case TYPE_DMAC: + case TYPE_DALUROUND: + case TYPE_DBPICK: + case TYPE_DWEXT: + def_reg = SET_DEST (PATTERN (producer)); + break; + + /* Some special instructions, divmodsi4 and udivmodsi4, produce two + results, the quotient and the remainder. We have to handle them + individually. */ + case TYPE_DIV: + if (divmod_p (producer)) + { + rtx def_reg1 = SET_DEST (parallel_element (producer, 0)); + rtx def_reg2 = SET_DEST (parallel_element (producer, 1)); + + return (n10_consumed_by_ex_dep_p (consumer, def_reg1) + || n10_consumed_by_ex_dep_p (consumer, def_reg2)); + } + + def_reg = SET_DEST (PATTERN (producer)); + break; + + default: + gcc_unreachable (); + } + + return n10_consumed_by_ex_dep_p (consumer, def_reg); +} + +/* Check dependencies from LMW(N, N) to EX. */ +bool +nds32_n10_last_load_to_ex_p (rtx_insn *producer, rtx_insn *consumer) +{ + rtx last_def_reg = extract_nth_access_reg (producer, -1); + + return n10_consumed_by_ex_dep_p (consumer, last_def_reg); +} + +/* Guard functions for Graywolf cores. */ + +/* Check dependencies from EX to EX (ADDR_OUT -> ADDR_IN). */ +bool +nds32_gw_ex_to_ex_p (rtx_insn *producer, rtx_insn *consumer) +{ + return nds32_n10_ex_to_ex_p (producer, consumer); +} + +/* Check dependencies from MM to EX. */ +bool +nds32_gw_mm_to_ex_p (rtx_insn *producer, rtx_insn *consumer) +{ + rtx def_reg; + + switch (get_attr_type (producer)) + { + case TYPE_LOAD: + case TYPE_MUL: + case TYPE_MAC: + case TYPE_DALU64: + case TYPE_DMUL: + case TYPE_DMAC: + case TYPE_DALUROUND: + case TYPE_DBPICK: + case TYPE_DWEXT: + def_reg = SET_DEST (PATTERN (producer)); + break; + + /* Some special instructions, divmodsi4 and udivmodsi4, produce two + results, the quotient and the remainder. We have to handle them + individually. */ + case TYPE_DIV: + if (divmod_p (producer)) + { + rtx def_reg1 = SET_DEST (parallel_element (producer, 0)); + rtx def_reg2 = SET_DEST (parallel_element (producer, 1)); + + return (gw_consumed_by_ex_dep_p (consumer, def_reg1) + || gw_consumed_by_ex_dep_p (consumer, def_reg2)); + } + + def_reg = SET_DEST (PATTERN (producer)); + break; + + default: + gcc_unreachable (); + } + + return gw_consumed_by_ex_dep_p (consumer, def_reg); +} + +/* Check dependencies from LMW(N, N) to EX. */ +bool +nds32_gw_last_load_to_ex_p (rtx_insn *producer, rtx_insn *consumer) +{ + rtx last_def_reg = extract_nth_access_reg (producer, -1); + + return gw_consumed_by_ex_dep_p (consumer, last_def_reg); +} + +/* Guard functions for N12/N13 cores. */ + +/* Check dependencies from E2 to E1. */ +bool +nds32_n13_e2_to_e1_p (rtx_insn *producer, rtx_insn *consumer) +{ + rtx def_reg; + + switch (get_attr_type (producer)) + { + /* Only post-update load/store instructions are considered. These + instructions produces address output at E2. */ + case TYPE_LOAD: + case TYPE_STORE: + case TYPE_LOAD_MULTIPLE: + case TYPE_STORE_MULTIPLE: + if (!post_update_insn_p (producer)) + return false; + + def_reg = extract_base_reg (producer); + break; + + case TYPE_ALU: + case TYPE_ALU_SHIFT: + case TYPE_PBSAD: + case TYPE_PBSADA: + case TYPE_MUL: + case TYPE_MAC: + def_reg = SET_DEST (PATTERN (producer)); + break; + + case TYPE_BRANCH: + return true; + + case TYPE_DIV: + /* Some special instructions, divmodsi4 and udivmodsi4, produce two + results, the quotient and the remainder. We have to handle them + individually. */ + if (divmod_p (producer)) + { + rtx def_reg1 = SET_DEST (parallel_element (producer, 0)); + rtx def_reg2 = SET_DEST (parallel_element (producer, 1)); + + return (n13_consumed_by_e1_dep_p (consumer, def_reg1) + || n13_consumed_by_e1_dep_p (consumer, def_reg2)); + } + + def_reg = SET_DEST (PATTERN (producer)); + break; + + default: + gcc_unreachable (); + } + + return n13_consumed_by_e1_dep_p (consumer, def_reg); +} + +/* Check dependencies from Load-Store Unit (E3) to E1. */ +bool +nds32_n13_load_to_e1_p (rtx_insn *producer, rtx_insn *consumer) +{ + rtx def_reg = SET_DEST (PATTERN (producer)); + + gcc_assert (get_attr_type (producer) == TYPE_LOAD); + gcc_assert (REG_P (def_reg) || GET_CODE (def_reg) == SUBREG); + + return n13_consumed_by_e1_dep_p (consumer, def_reg); +} + +/* Check dependencies from Load-Store Unit (E3) to E2. */ +bool +nds32_n13_load_to_e2_p (rtx_insn *producer, rtx_insn *consumer) +{ + rtx def_reg = SET_DEST (PATTERN (producer)); + + gcc_assert (get_attr_type (producer) == TYPE_LOAD); + gcc_assert (REG_P (def_reg) || GET_CODE (def_reg) == SUBREG); + + return n13_consumed_by_e2_dep_p (consumer, def_reg); +} + +/* Check dependencies from LMW(N, N) to E1. */ +bool +nds32_n13_last_load_to_e1_p (rtx_insn *producer, rtx_insn *consumer) +{ + rtx last_def_reg = extract_nth_access_reg (producer, -1); + + return n13_consumed_by_e1_dep_p (consumer, last_def_reg); +} + +/* Check dependencies from LMW(N, N) to E2. */ +bool +nds32_n13_last_load_to_e2_p (rtx_insn *producer, rtx_insn *consumer) +{ + rtx last_def_reg = extract_nth_access_reg (producer, -1); + + return n13_consumed_by_e2_dep_p (consumer, last_def_reg); +} + +/* Check dependencies from LMW(N, N-1) to E2. */ +bool +nds32_n13_last_two_load_to_e1_p (rtx_insn *producer, rtx_insn *consumer) +{ + rtx last_two_def_reg = extract_nth_access_reg (producer, -2); + + if (last_two_def_reg == NULL_RTX) + return false; + + return n13_consumed_by_e1_dep_p (consumer, last_two_def_reg); +} /* ------------------------------------------------------------------------ */ diff --git a/gcc/config/nds32/nds32-predicates.c b/gcc/config/nds32/nds32-predicates.c index 5e01430..b41b6c7 100644 --- a/gcc/config/nds32/nds32-predicates.c +++ b/gcc/config/nds32/nds32-predicates.c @@ -356,54 +356,57 @@ nds32_valid_stack_push_pop_p (rtx op, bool push_p) } /* Function to check if 'bclr' instruction can be used with IVAL. */ -int -nds32_can_use_bclr_p (int ival) +bool +nds32_can_use_bclr_p (HOST_WIDE_INT ival) { int one_bit_count; + unsigned HOST_WIDE_INT mask = GET_MODE_MASK (SImode); /* Calculate the number of 1-bit of (~ival), if there is only one 1-bit, it means the original ival has only one 0-bit, So it is ok to perform 'bclr' operation. */ - one_bit_count = popcount_hwi ((unsigned HOST_WIDE_INT) (~ival)); + one_bit_count = popcount_hwi ((unsigned HOST_WIDE_INT) (~ival) & mask); /* 'bclr' is a performance extension instruction. */ return (TARGET_EXT_PERF && (one_bit_count == 1)); } /* Function to check if 'bset' instruction can be used with IVAL. */ -int -nds32_can_use_bset_p (int ival) +bool +nds32_can_use_bset_p (HOST_WIDE_INT ival) { int one_bit_count; + unsigned HOST_WIDE_INT mask = GET_MODE_MASK (SImode); /* Caculate the number of 1-bit of ival, if there is only one 1-bit, it is ok to perform 'bset' operation. */ - one_bit_count = popcount_hwi ((unsigned HOST_WIDE_INT) (ival)); + one_bit_count = popcount_hwi ((unsigned HOST_WIDE_INT) (ival) & mask); /* 'bset' is a performance extension instruction. */ return (TARGET_EXT_PERF && (one_bit_count == 1)); } /* Function to check if 'btgl' instruction can be used with IVAL. */ -int -nds32_can_use_btgl_p (int ival) +bool +nds32_can_use_btgl_p (HOST_WIDE_INT ival) { int one_bit_count; + unsigned HOST_WIDE_INT mask = GET_MODE_MASK (SImode); /* Caculate the number of 1-bit of ival, if there is only one 1-bit, it is ok to perform 'btgl' operation. */ - one_bit_count = popcount_hwi ((unsigned HOST_WIDE_INT) (ival)); + one_bit_count = popcount_hwi ((unsigned HOST_WIDE_INT) (ival) & mask); /* 'btgl' is a performance extension instruction. */ return (TARGET_EXT_PERF && (one_bit_count == 1)); } /* Function to check if 'bitci' instruction can be used with IVAL. */ -int -nds32_can_use_bitci_p (int ival) +bool +nds32_can_use_bitci_p (HOST_WIDE_INT ival) { /* If we are using V3 ISA, we have 'bitci' instruction. Try to see if we can present 'andi' semantic with @@ -515,4 +518,117 @@ nds32_const_double_range_ok_p (rtx op, machine_mode mode, return val >= lower && val < upper; } + +bool +nds32_const_unspec_p (rtx x) +{ + if (GET_CODE (x) == CONST) + { + x = XEXP (x, 0); + + if (GET_CODE (x) == PLUS) + x = XEXP (x, 0); + + if (GET_CODE (x) == UNSPEC) + { + switch (XINT (x, 1)) + { + case UNSPEC_GOTINIT: + case UNSPEC_GOT: + case UNSPEC_GOTOFF: + case UNSPEC_PLT: + case UNSPEC_TLSGD: + case UNSPEC_TLSLD: + case UNSPEC_TLSIE: + case UNSPEC_TLSLE: + return false; + default: + return true; + } + } + } + + if (GET_CODE (x) == SYMBOL_REF + && SYMBOL_REF_TLS_MODEL (x)) + return false; + + return true; +} + +HOST_WIDE_INT +const_vector_to_hwint (rtx op) +{ + HOST_WIDE_INT hwint = 0; + HOST_WIDE_INT mask; + int i; + int shift_adv; + int shift = 0; + int nelem; + + switch (GET_MODE (op)) + { + case E_V2HImode: + mask = 0xffff; + shift_adv = 16; + nelem = 2; + break; + case E_V4QImode: + mask = 0xff; + shift_adv = 8; + nelem = 4; + break; + default: + gcc_unreachable (); + } + + if (TARGET_BIG_ENDIAN) + { + for (i = 0; i < nelem; ++i) + { + HOST_WIDE_INT val = XINT (XVECEXP (op, 0, nelem - i - 1), 0); + hwint |= (val & mask) << shift; + shift = shift + shift_adv; + } + } + else + { + for (i = 0; i < nelem; ++i) + { + HOST_WIDE_INT val = XINT (XVECEXP (op, 0, i), 0); + hwint |= (val & mask) << shift; + shift = shift + shift_adv; + } + } + + return hwint; +} + +bool +nds32_valid_CVp5_p (rtx op) +{ + HOST_WIDE_INT ival = const_vector_to_hwint (op); + return (ival < ((1 << 5) + 16)) && (ival >= (0 + 16)); +} + +bool +nds32_valid_CVs5_p (rtx op) +{ + HOST_WIDE_INT ival = const_vector_to_hwint (op); + return (ival < (1 << 4)) && (ival >= -(1 << 4)); +} + +bool +nds32_valid_CVs2_p (rtx op) +{ + HOST_WIDE_INT ival = const_vector_to_hwint (op); + return (ival < (1 << 19)) && (ival >= -(1 << 19)); +} + +bool +nds32_valid_CVhi_p (rtx op) +{ + HOST_WIDE_INT ival = const_vector_to_hwint (op); + return (ival != 0) && ((ival & 0xfff) == 0); +} + /* ------------------------------------------------------------------------ */ diff --git a/gcc/config/nds32/nds32-protos.h b/gcc/config/nds32/nds32-protos.h index e7b7d41..7fb2315 100644 --- a/gcc/config/nds32/nds32-protos.h +++ b/gcc/config/nds32/nds32-protos.h @@ -69,9 +69,10 @@ extern unsigned int nds32_dbx_register_number (unsigned int); /* ------------------------------------------------------------------------ */ -/* Auxiliary functions for lwm/smw. */ +/* Auxiliary functions for manipulation DI mode. */ -extern bool nds32_valid_smw_lwm_base_p (rtx); +extern rtx nds32_di_high_part_subreg(rtx); +extern rtx nds32_di_low_part_subreg(rtx); /* Auxiliary functions for expanding rtl used in nds32-multiple.md. */ @@ -116,6 +117,20 @@ extern bool nds32_n9_2r1w_mm_to_ex_p (rtx_insn *, rtx_insn *); extern bool nds32_n9_3r2w_mm_to_ex_p (rtx_insn *, rtx_insn *); extern bool nds32_n9_last_load_to_ex_p (rtx_insn *, rtx_insn *); +extern bool nds32_n10_ex_to_ex_p (rtx_insn *, rtx_insn *); +extern bool nds32_n10_mm_to_ex_p (rtx_insn *, rtx_insn *); +extern bool nds32_n10_last_load_to_ex_p (rtx_insn *, rtx_insn *); + +extern bool nds32_gw_ex_to_ex_p (rtx_insn *, rtx_insn *); +extern bool nds32_gw_mm_to_ex_p (rtx_insn *, rtx_insn *); +extern bool nds32_gw_last_load_to_ex_p (rtx_insn *, rtx_insn *); + +extern bool nds32_n13_e2_to_e1_p (rtx_insn *, rtx_insn *); +extern bool nds32_n13_load_to_e1_p (rtx_insn *, rtx_insn *); +extern bool nds32_n13_load_to_e2_p (rtx_insn *, rtx_insn *); +extern bool nds32_n13_last_load_to_e1_p (rtx_insn *, rtx_insn *); +extern bool nds32_n13_last_load_to_e2_p (rtx_insn *, rtx_insn *); +extern bool nds32_n13_last_two_load_to_e1_p (rtx_insn *, rtx_insn *); /* Auxiliary functions for stack operation predicate checking. */ @@ -123,24 +138,25 @@ extern bool nds32_valid_stack_push_pop_p (rtx, bool); /* Auxiliary functions for bit operation detection. */ -extern int nds32_can_use_bclr_p (int); -extern int nds32_can_use_bset_p (int); -extern int nds32_can_use_btgl_p (int); +extern bool nds32_can_use_bclr_p (HOST_WIDE_INT); +extern bool nds32_can_use_bset_p (HOST_WIDE_INT); +extern bool nds32_can_use_btgl_p (HOST_WIDE_INT); -extern int nds32_can_use_bitci_p (int); +extern bool nds32_can_use_bitci_p (HOST_WIDE_INT); extern bool nds32_const_double_range_ok_p (rtx, machine_mode, HOST_WIDE_INT, HOST_WIDE_INT); +extern bool nds32_const_unspec_p (rtx x); + /* Auxiliary function for 'Computing the Length of an Insn'. */ extern int nds32_adjust_insn_length (rtx_insn *, int); /* Auxiliary functions for FP_AS_GP detection. */ -extern int nds32_fp_as_gp_check_available (void); - extern bool nds32_symbol_load_store_p (rtx_insn *); +extern bool nds32_naked_function_p (tree); /* Auxiliary functions for jump table generation. */ @@ -159,10 +175,50 @@ extern void nds32_expand_float_cstore (rtx *); extern enum nds32_expand_result_type nds32_expand_movcc (rtx *); extern void nds32_expand_float_movcc (rtx *); +/* Auxiliary functions for expand extv/insv instruction. */ + +extern enum nds32_expand_result_type nds32_expand_extv (rtx *); +extern enum nds32_expand_result_type nds32_expand_insv (rtx *); + +/* Auxiliary functions for expand PIC instruction. */ + +extern void nds32_expand_pic_move (rtx *); + +/* Auxiliary functions to legitimize PIC address. */ + +extern rtx nds32_legitimize_pic_address (rtx); + +/* Auxiliary functions for expand TLS instruction. */ + +extern void nds32_expand_tls_move (rtx *); + +/* Auxiliary functions to legitimize TLS address. */ + +extern rtx nds32_legitimize_tls_address (rtx); + +/* Auxiliary functions to identify thread-local symbol. */ + +extern bool nds32_tls_referenced_p (rtx); + +/* Auxiliary functions for expand ICT instruction. */ + +extern void nds32_expand_ict_move (rtx *); + +/* Auxiliary functions to legitimize address for indirect-call symbol. */ + +extern rtx nds32_legitimize_ict_address (rtx); + +/* Auxiliary functions to identify indirect-call symbol. */ + +extern bool nds32_indirect_call_referenced_p (rtx); /* Auxiliary functions to identify long-call symbol. */ extern bool nds32_long_call_p (rtx); +/* Auxiliary functions to identify SYMBOL_REF and LABEL_REF pattern. */ + +extern bool symbolic_reference_mentioned_p (rtx); + /* Auxiliary functions to identify conditional move comparison operand. */ extern int nds32_cond_move_p (rtx); @@ -185,6 +241,7 @@ extern const char *nds32_output_32bit_load_s (rtx *, int); extern const char *nds32_output_float_load(rtx *); extern const char *nds32_output_float_store(rtx *); extern const char *nds32_output_smw_single_word (rtx *); +extern const char *nds32_output_smw_double_word (rtx *); extern const char *nds32_output_lmw_single_word (rtx *); extern const char *nds32_output_double (rtx *, bool); extern const char *nds32_output_cbranchsi4_equality_zero (rtx_insn *, rtx *); @@ -193,9 +250,12 @@ extern const char *nds32_output_cbranchsi4_equality_reg_or_const_int (rtx_insn * rtx *); extern const char *nds32_output_cbranchsi4_greater_less_zero (rtx_insn *, rtx *); +extern const char *nds32_output_unpkd8 (rtx, rtx, rtx, rtx, bool); + extern const char *nds32_output_call (rtx, rtx *, rtx, const char *, const char *, bool); - +extern const char *nds32_output_tls_desc (rtx *); +extern const char *nds32_output_tls_ie (rtx *); /* Auxiliary functions to output stack push/pop instruction. */ @@ -203,9 +263,19 @@ extern const char *nds32_output_stack_push (rtx); extern const char *nds32_output_stack_pop (rtx); extern const char *nds32_output_return (void); + +/* Auxiliary functions to split/output sms pattern. */ +extern bool nds32_need_split_sms_p (rtx, rtx, rtx, rtx); +extern const char *nds32_output_sms (rtx, rtx, rtx, rtx); +extern void nds32_split_sms (rtx, rtx, rtx, rtx, rtx, rtx, rtx); + /* Auxiliary functions to split double word RTX pattern. */ extern void nds32_spilt_doubleword (rtx *, bool); +extern void nds32_split_ashiftdi3 (rtx, rtx, rtx); +extern void nds32_split_ashiftrtdi3 (rtx, rtx, rtx); +extern void nds32_split_lshiftrtdi3 (rtx, rtx, rtx); +extern void nds32_split_rotatertdi3 (rtx, rtx, rtx); /* Auxiliary functions to split large constant RTX pattern. */ @@ -237,15 +307,29 @@ extern void nds32_construct_isr_vectors_information (tree, const char *); extern void nds32_asm_file_start_for_isr (void); extern void nds32_asm_file_end_for_isr (void); extern bool nds32_isr_function_p (tree); +extern bool nds32_isr_function_critical_p (tree); /* Auxiliary functions for cost calculation. */ +extern void nds32_init_rtx_costs (void); extern bool nds32_rtx_costs_impl (rtx, machine_mode, int, int, int *, bool); extern int nds32_address_cost_impl (rtx, machine_mode, addr_space_t, bool); /* Auxiliary functions for pre-define marco. */ extern void nds32_cpu_cpp_builtins(struct cpp_reader *); +/* Auxiliary functions for const_vector's constraints. */ + +extern HOST_WIDE_INT const_vector_to_hwint (rtx); +extern bool nds32_valid_CVp5_p (rtx); +extern bool nds32_valid_CVs5_p (rtx); +extern bool nds32_valid_CVs2_p (rtx); +extern bool nds32_valid_CVhi_p (rtx); + +/* Auxiliary functions for lwm/smw. */ + +extern bool nds32_valid_smw_lwm_base_p (rtx); + extern bool nds32_split_double_word_load_store_p (rtx *,bool); namespace nds32 { @@ -258,11 +342,13 @@ bool load_single_p (rtx_insn *); bool store_single_p (rtx_insn *); bool load_double_p (rtx_insn *); bool store_double_p (rtx_insn *); +bool store_offset_reg_p (rtx_insn *); bool post_update_insn_p (rtx_insn *); bool immed_offset_p (rtx); int find_post_update_rtx (rtx_insn *); rtx extract_mem_rtx (rtx_insn *); rtx extract_base_reg (rtx_insn *); +rtx extract_offset_rtx (rtx_insn *); rtx extract_shift_reg (rtx); @@ -271,6 +357,8 @@ rtx extract_movd44_odd_reg (rtx_insn *); rtx extract_mac_non_acc_rtx (rtx_insn *); +bool divmod_p (rtx_insn *); + rtx extract_branch_target_rtx (rtx_insn *); rtx extract_branch_condition_rtx (rtx_insn *); } // namespace nds32 @@ -279,5 +367,6 @@ extern bool nds32_use_load_post_increment(machine_mode); /* Functions for create nds32 specific optimization pass. */ extern rtl_opt_pass *make_pass_nds32_relax_opt (gcc::context *); +extern rtl_opt_pass *make_pass_nds32_fp_as_gp (gcc::context *); /* ------------------------------------------------------------------------ */ diff --git a/gcc/config/nds32/nds32-relax-opt.c b/gcc/config/nds32/nds32-relax-opt.c index 0349be4..e54bd97 100644 --- a/gcc/config/nds32/nds32-relax-opt.c +++ b/gcc/config/nds32/nds32-relax-opt.c @@ -52,6 +52,8 @@ #include "cfgrtl.h" #include "tree-pass.h" +using namespace nds32; + /* This is used to create unique relax hint id value. The initial value is 0. */ static int relax_group_id = 0; @@ -185,6 +187,121 @@ nds32_plus_reg_load_store_p (rtx_insn *insn) return false; } +/* Return true if x is const and the referance is ict symbol. */ +static bool +nds32_ict_const_p (rtx x) +{ + if (GET_CODE (x) == CONST) + { + x = XEXP (x, 0); + return nds32_indirect_call_referenced_p (x); + } + return FALSE; +} + +/* Group the following pattern as relax candidates: + + GOT: + sethi $ra, hi20(sym) + ori $ra, $ra, lo12(sym) + lw $rb, [$ra + $gp] + + GOTOFF, TLSLE: + sethi $ra, hi20(sym) + ori $ra, $ra, lo12(sym) + LS $rb, [$ra + $gp] + + GOTOFF, TLSLE: + sethi $ra, hi20(sym) + ori $ra, $ra, lo12(sym) + add $rb, $ra, $gp($tp) + + Initial GOT table: + sethi $gp,hi20(sym) + ori $gp, $gp, lo12(sym) + add5.pc $gp */ + +static auto_vec nds32_group_infos; +/* Group the PIC and TLS relax candidate instructions for linker. */ +static bool +nds32_pic_tls_group (rtx_insn *def_insn, + enum nds32_relax_insn_type relax_type, + int sym_type) +{ + df_ref def_record; + df_link *link; + rtx_insn *use_insn = NULL; + rtx pat, new_pat; + def_record = DF_INSN_DEFS (def_insn); + for (link = DF_REF_CHAIN (def_record); link; link = link->next) + { + if (!DF_REF_INSN_INFO (link->ref)) + continue; + + use_insn = DF_REF_INSN (link->ref); + + /* Skip if define insn and use insn not in the same basic block. */ + if (!dominated_by_p (CDI_DOMINATORS, + BLOCK_FOR_INSN (use_insn), + BLOCK_FOR_INSN (def_insn))) + return FALSE; + + /* Skip if use_insn not active insn. */ + if (!active_insn_p (use_insn)) + return FALSE; + + switch (relax_type) + { + case RELAX_ORI: + + /* GOTOFF, TLSLE: + sethi $ra, hi20(sym) + ori $ra, $ra, lo12(sym) + add $rb, $ra, $gp($tp) */ + if ((sym_type == UNSPEC_TLSLE + || sym_type == UNSPEC_GOTOFF) + && (recog_memoized (use_insn) == CODE_FOR_addsi3)) + { + pat = XEXP (PATTERN (use_insn), 1); + new_pat = + gen_rtx_UNSPEC (SImode, + gen_rtvec (2, XEXP (pat, 0), XEXP (pat, 1)), + UNSPEC_ADD32); + validate_replace_rtx (pat, new_pat, use_insn); + nds32_group_infos.safe_push (use_insn); + } + else if (nds32_plus_reg_load_store_p (use_insn) + && !nds32_sp_base_or_plus_load_store_p (use_insn)) + nds32_group_infos.safe_push (use_insn); + else + return FALSE; + break; + + default: + return FALSE; + } + } + return TRUE; +} + +static int +nds32_pic_tls_symbol_type (rtx x) +{ + x = XEXP (SET_SRC (PATTERN (x)), 1); + + if (GET_CODE (x) == CONST) + { + x = XEXP (x, 0); + + if (GET_CODE (x) == PLUS) + x = XEXP (x, 0); + + return XINT (x, 1); + } + + return XINT (x, 1); +} + /* Group the relax candidates with group id. */ static void nds32_group_insns (rtx sethi) @@ -193,6 +310,7 @@ nds32_group_insns (rtx sethi) df_link *link; rtx_insn *use_insn = NULL; rtx group_id; + bool valid; def_record = DF_INSN_DEFS (sethi); @@ -242,6 +360,132 @@ nds32_group_insns (rtx sethi) /* Insert .relax_* directive. */ if (active_insn_p (use_insn)) emit_insn_before (gen_relax_group (group_id), use_insn); + + /* Find ori ra, ra, unspec(symbol) instruction. */ + if (use_insn != NULL + && recog_memoized (use_insn) == CODE_FOR_lo_sum + && !nds32_const_unspec_p (XEXP (SET_SRC (PATTERN (use_insn)), 1))) + { + int sym_type = nds32_pic_tls_symbol_type (use_insn); + valid = nds32_pic_tls_group (use_insn, RELAX_ORI, sym_type); + + /* Insert .relax_* directive. */ + while (!nds32_group_infos.is_empty ()) + { + use_insn = nds32_group_infos.pop (); + if (valid) + emit_insn_before (gen_relax_group (group_id), use_insn); + } + } + } + + relax_group_id++; +} + +/* Convert relax group id in rtl. */ + +static void +nds32_group_tls_insn (rtx insn) +{ + rtx pat = PATTERN (insn); + rtx unspec_relax_group = XEXP (XVECEXP (pat, 0, 1), 0); + + while (GET_CODE (pat) != SET && GET_CODE (pat) == PARALLEL) + { + pat = XVECEXP (pat, 0, 0); + } + + if (GET_CODE (unspec_relax_group) == UNSPEC + && XINT (unspec_relax_group, 1) == UNSPEC_VOLATILE_RELAX_GROUP) + { + XVECEXP (unspec_relax_group, 0, 0) = GEN_INT (relax_group_id); + } + + relax_group_id++; +} + +static bool +nds32_float_reg_load_store_p (rtx_insn *insn) +{ + rtx pat = PATTERN (insn); + + if (get_attr_type (insn) == TYPE_FLOAD + && GET_CODE (pat) == SET + && (GET_MODE (XEXP (pat, 0)) == SFmode + || GET_MODE (XEXP (pat, 0)) == DFmode) + && MEM_P (XEXP (pat, 1))) + { + rtx addr = XEXP (XEXP (pat, 1), 0); + + /* [$ra] */ + if (REG_P (addr)) + return true; + /* [$ra + offset] */ + if (GET_CODE (addr) == PLUS + && REG_P (XEXP (addr, 0)) + && CONST_INT_P (XEXP (addr, 1))) + return true; + } + return false; +} + + +/* Group float load-store instructions: + la $ra, symbol + flsi $rt, [$ra + offset] */ + +static void +nds32_group_float_insns (rtx insn) +{ + df_ref def_record, use_record; + df_link *link; + rtx_insn *use_insn = NULL; + rtx group_id; + + def_record = DF_INSN_DEFS (insn); + + for (link = DF_REF_CHAIN (def_record); link; link = link->next) + { + if (!DF_REF_INSN_INFO (link->ref)) + continue; + + use_insn = DF_REF_INSN (link->ref); + + /* Skip if define insn and use insn not in the same basic block. */ + if (!dominated_by_p (CDI_DOMINATORS, + BLOCK_FOR_INSN (use_insn), + BLOCK_FOR_INSN (insn))) + return; + + /* Skip if the low-part used register is from different high-part + instructions. */ + use_record = DF_INSN_USES (use_insn); + if (DF_REF_CHAIN (use_record) && DF_REF_CHAIN (use_record)->next) + return; + + /* Skip if use_insn not active insn. */ + if (!active_insn_p (use_insn)) + return; + + if (!nds32_float_reg_load_store_p (use_insn) + || find_post_update_rtx (use_insn) != -1) + return; + } + + group_id = GEN_INT (relax_group_id); + /* Insert .relax_* directive for insn. */ + emit_insn_before (gen_relax_group (group_id), insn); + + /* Scan the use insns and insert the directive. */ + for (link = DF_REF_CHAIN (def_record); link; link = link->next) + { + if (!DF_REF_INSN_INFO (link->ref)) + continue; + + use_insn = DF_REF_INSN (link->ref); + + /* Insert .relax_* directive. */ + emit_insn_before (gen_relax_group (group_id), use_insn); } relax_group_id++; @@ -271,8 +515,21 @@ nds32_relax_group (void) /* Find sethi ra, symbol instruction. */ if (recog_memoized (insn) == CODE_FOR_sethi && nds32_symbolic_operand (XEXP (SET_SRC (PATTERN (insn)), 0), - SImode)) + SImode) + && !nds32_ict_const_p (XEXP (SET_SRC (PATTERN (insn)), 0))) nds32_group_insns (insn); + else if (recog_memoized (insn) == CODE_FOR_tls_ie) + nds32_group_tls_insn (insn); + else if (TARGET_FPU_SINGLE + && recog_memoized (insn) == CODE_FOR_move_addr + && !nds32_ict_const_p (XEXP (SET_SRC (PATTERN (insn)), 0))) + { + nds32_group_float_insns (insn); + } + } + else if (CALL_P (insn) && recog_memoized (insn) == CODE_FOR_tls_desc) + { + nds32_group_tls_insn (insn); } } diff --git a/gcc/config/nds32/nds32-utils.c b/gcc/config/nds32/nds32-utils.c index b0151be..7c93cd2 100644 --- a/gcc/config/nds32/nds32-utils.c +++ b/gcc/config/nds32/nds32-utils.c @@ -142,6 +142,23 @@ store_double_p (rtx_insn *insn) return true; } +bool +store_offset_reg_p (rtx_insn *insn) +{ + if (get_attr_type (insn) != TYPE_STORE) + return false; + + rtx offset_rtx = extract_offset_rtx (insn); + + if (offset_rtx == NULL_RTX) + return false; + + if (REG_P (offset_rtx)) + return true; + + return false; +} + /* Determine if INSN is a post update insn. */ bool post_update_insn_p (rtx_insn *insn) @@ -316,22 +333,114 @@ extract_base_reg (rtx_insn *insn) if (REG_P (XEXP (mem_rtx, 0))) return XEXP (mem_rtx, 0); + /* (mem (lo_sum (reg) (symbol_ref)) */ + if (GET_CODE (XEXP (mem_rtx, 0)) == LO_SUM) + return XEXP (XEXP (mem_rtx, 0), 0); + plus_rtx = XEXP (mem_rtx, 0); if (GET_CODE (plus_rtx) == SYMBOL_REF || GET_CODE (plus_rtx) == CONST) return NULL_RTX; - gcc_assert (GET_CODE (plus_rtx) == PLUS - || GET_CODE (plus_rtx) == POST_INC - || GET_CODE (plus_rtx) == POST_DEC - || GET_CODE (plus_rtx) == POST_MODIFY); - gcc_assert (REG_P (XEXP (plus_rtx, 0))); /* (mem (plus (reg) (const_int))) or + (mem (plus (mult (reg) (const_int 4)) (reg))) or (mem (post_inc (reg))) or (mem (post_dec (reg))) or (mem (post_modify (reg) (plus (reg) (reg)))) */ - return XEXP (plus_rtx, 0); + gcc_assert (GET_CODE (plus_rtx) == PLUS + || GET_CODE (plus_rtx) == POST_INC + || GET_CODE (plus_rtx) == POST_DEC + || GET_CODE (plus_rtx) == POST_MODIFY); + + if (REG_P (XEXP (plus_rtx, 0))) + return XEXP (plus_rtx, 0); + + gcc_assert (REG_P (XEXP (plus_rtx, 1))); + return XEXP (plus_rtx, 1); +} + +/* Extract the offset rtx from load/store insns. The function returns + NULL_RTX if offset is absent. */ +rtx +extract_offset_rtx (rtx_insn *insn) +{ + rtx mem_rtx; + rtx plus_rtx; + rtx offset_rtx; + + /* Find the MEM rtx. The multiple load/store insns doens't have + the offset field so we can return NULL_RTX here. */ + switch (get_attr_type (insn)) + { + case TYPE_LOAD_MULTIPLE: + case TYPE_STORE_MULTIPLE: + return NULL_RTX; + + case TYPE_LOAD: + case TYPE_FLOAD: + case TYPE_STORE: + case TYPE_FSTORE: + mem_rtx = extract_mem_rtx (insn); + break; + + default: + gcc_unreachable (); + } + + gcc_assert (MEM_P (mem_rtx)); + + /* (mem (reg)) */ + if (REG_P (XEXP (mem_rtx, 0))) + return NULL_RTX; + + plus_rtx = XEXP (mem_rtx, 0); + + switch (GET_CODE (plus_rtx)) + { + case SYMBOL_REF: + case CONST: + case POST_INC: + case POST_DEC: + return NULL_RTX; + + case PLUS: + /* (mem (plus (reg) (const_int))) or + (mem (plus (mult (reg) (const_int 4)) (reg))) */ + if (REG_P (XEXP (plus_rtx, 0))) + offset_rtx = XEXP (plus_rtx, 1); + else + { + gcc_assert (REG_P (XEXP (plus_rtx, 1))); + offset_rtx = XEXP (plus_rtx, 0); + } + + if (ARITHMETIC_P (offset_rtx)) + { + gcc_assert (GET_CODE (offset_rtx) == MULT); + gcc_assert (REG_P (XEXP (offset_rtx, 0))); + offset_rtx = XEXP (offset_rtx, 0); + } + break; + + case LO_SUM: + /* (mem (lo_sum (reg) (symbol_ref)) */ + offset_rtx = XEXP (plus_rtx, 1); + break; + + case POST_MODIFY: + /* (mem (post_modify (reg) (plus (reg) (reg / const_int)))) */ + gcc_assert (REG_P (XEXP (plus_rtx, 0))); + plus_rtx = XEXP (plus_rtx, 1); + gcc_assert (GET_CODE (plus_rtx) == PLUS); + offset_rtx = XEXP (plus_rtx, 0); + break; + + default: + gcc_unreachable (); + } + + return offset_rtx; } /* Extract the register of the shift operand from an ALU_SHIFT rtx. */ @@ -413,6 +522,7 @@ extract_mac_non_acc_rtx (rtx_insn *insn) switch (get_attr_type (insn)) { case TYPE_MAC: + case TYPE_DMAC: if (REG_P (XEXP (exp, 0))) return XEXP (exp, 1); else @@ -423,6 +533,19 @@ extract_mac_non_acc_rtx (rtx_insn *insn) } } +/* Check if the DIV insn needs two write ports. */ +bool +divmod_p (rtx_insn *insn) +{ + gcc_assert (get_attr_type (insn) == TYPE_DIV); + + if (INSN_CODE (insn) == CODE_FOR_divmodsi4 + || INSN_CODE (insn) == CODE_FOR_udivmodsi4) + return true; + + return false; +} + /* Extract the rtx representing the branch target to help recognize data hazards. */ rtx diff --git a/gcc/config/nds32/nds32.c b/gcc/config/nds32/nds32.c index 8994c13..475fc71 100644 --- a/gcc/config/nds32/nds32.c +++ b/gcc/config/nds32/nds32.c @@ -305,6 +305,7 @@ static const struct attribute_spec nds32_attribute_table[] = { "nested", 0, 0, false, false, false, false, NULL, NULL }, { "not_nested", 0, 0, false, false, false, false, NULL, NULL }, { "nested_ready", 0, 0, false, false, false, false, NULL, NULL }, + { "critical", 0, 0, false, false, false, false, NULL, NULL }, /* The attributes describing isr register save scheme. */ { "save_all", 0, 0, false, false, false, false, NULL, NULL }, @@ -314,9 +315,19 @@ static const struct attribute_spec nds32_attribute_table[] = { "nmi", 1, 1, false, false, false, false, NULL, NULL }, { "warm", 1, 1, false, false, false, false, NULL, NULL }, + /* The attributes describing isr security level. */ + { "secure", 1, 1, false, false, false, false, NULL, NULL }, + /* The attribute telling no prologue/epilogue. */ { "naked", 0, 0, false, false, false, false, NULL, NULL }, + /* The attribute is used to tell this function to be ROM patch. */ + { "indirect_call",0, 0, false, false, false, false, NULL, NULL }, + + /* FOR BACKWARD COMPATIBILITY, + this attribute also tells no prologue/epilogue. */ + { "no_prologue", 0, 0, false, false, false, false, NULL, NULL }, + /* The last attribute spec is set to be NULL. */ { NULL, 0, 0, false, false, false, false, NULL, NULL } }; @@ -345,6 +356,10 @@ nds32_init_machine_status (void) /* Initially this function is not under strictly aligned situation. */ machine->strict_aligned_p = 0; + /* Initially this function has no naked and no_prologue attributes. */ + machine->attr_naked_p = 0; + machine->attr_no_prologue_p = 0; + return machine; } @@ -362,6 +377,15 @@ nds32_compute_stack_frame (void) needs prologue/epilogue. */ cfun->machine->naked_p = 0; + /* We need to mark whether this function has naked and no_prologue + attribute so that we can distinguish the difference if users applies + -mret-in-naked-func option. */ + cfun->machine->attr_naked_p + = lookup_attribute ("naked", DECL_ATTRIBUTES (current_function_decl)) + ? 1 : 0; + cfun->machine->attr_no_prologue_p + = lookup_attribute ("no_prologue", DECL_ATTRIBUTES (current_function_decl)) + ? 1 : 0; /* If __builtin_eh_return is used, we better have frame pointer needed so that we can easily locate the stack slot of return address. */ @@ -432,7 +456,8 @@ nds32_compute_stack_frame (void) /* If $gp value is required to be saved on stack, it needs 4 bytes space. Check whether we are using PIC code genration. */ - cfun->machine->gp_size = (flag_pic) ? 4 : 0; + cfun->machine->gp_size = + (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)) ? 4 : 0; /* If $lp value is required to be saved on stack, it needs 4 bytes space. Check whether $lp is ever live. */ @@ -497,7 +522,7 @@ nds32_compute_stack_frame (void) } /* Check if this function can omit prologue/epilogue code fragment. - If there is 'naked' attribute in this function, + If there is 'no_prologue'/'naked'/'secure' attribute in this function, we can set 'naked_p' flag to indicate that we do not have to generate prologue/epilogue. Or, if all the following conditions succeed, @@ -510,14 +535,17 @@ nds32_compute_stack_frame (void) is no outgoing size. condition 3: There is no local_size, which means we do not need to adjust $sp. */ - if (lookup_attribute ("naked", DECL_ATTRIBUTES (current_function_decl)) + if (lookup_attribute ("no_prologue", DECL_ATTRIBUTES (current_function_decl)) + || lookup_attribute ("naked", DECL_ATTRIBUTES (current_function_decl)) + || lookup_attribute ("secure", DECL_ATTRIBUTES (current_function_decl)) || (cfun->machine->callee_saved_first_gpr_regno == SP_REGNUM && cfun->machine->callee_saved_last_gpr_regno == SP_REGNUM && cfun->machine->callee_saved_first_fpr_regno == SP_REGNUM && cfun->machine->callee_saved_last_fpr_regno == SP_REGNUM && !df_regs_ever_live_p (FP_REGNUM) && !df_regs_ever_live_p (LP_REGNUM) - && cfun->machine->local_size == 0)) + && cfun->machine->local_size == 0 + && !flag_pic)) { /* Set this function 'naked_p' and other functions can check this flag. Note that in nds32 port, the 'naked_p = 1' JUST means there is no @@ -1259,6 +1287,32 @@ nds32_emit_stack_v3pop (unsigned Rb, REG_NOTES (parallel_insn) = dwarf; } +static void +nds32_emit_load_gp (void) +{ + rtx got_symbol, pat; + + /* Initial GLOBAL OFFSET TABLE don't do the scheduling. */ + emit_insn (gen_blockage ()); + + got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_"); + /* sethi $gp, _GLOBAL_OFFSET_TABLE_ -8 */ + pat = gen_rtx_UNSPEC (SImode, gen_rtvec (1, got_symbol), UNSPEC_GOTINIT); + pat = gen_rtx_CONST (SImode, gen_rtx_PLUS (Pmode, pat, GEN_INT (-8))); + emit_insn (gen_sethi (pic_offset_table_rtx,pat)); + + /* ori $gp, $gp, _GLOBAL_OFFSET_TABLE_ -4 */ + pat = gen_rtx_UNSPEC (SImode, gen_rtvec (1, got_symbol), UNSPEC_GOTINIT); + pat = gen_rtx_CONST (SImode, gen_rtx_PLUS (Pmode, pat, GEN_INT (-4))); + emit_insn (gen_lo_sum (pic_offset_table_rtx, pic_offset_table_rtx, pat)); + + /* add5.pc $gp */ + emit_insn (gen_add_pc (pic_offset_table_rtx, pic_offset_table_rtx)); + + /* Initial GLOBAL OFFSET TABLE don't do the scheduling. */ + emit_insn (gen_blockage ()); +} + /* Function that may creates more instructions for large value on adjusting stack pointer. @@ -1342,17 +1396,25 @@ nds32_needs_double_word_align (machine_mode mode, const_tree type) } /* Return true if FUNC is a naked function. */ -static bool +bool nds32_naked_function_p (tree func) { - tree t; + /* FOR BACKWARD COMPATIBILITY, + we need to support 'no_prologue' attribute as well. */ + tree t_naked; + tree t_no_prologue; if (TREE_CODE (func) != FUNCTION_DECL) abort (); - t = lookup_attribute ("naked", DECL_ATTRIBUTES (func)); + /* We have to use lookup_attribute() to check attributes. + Because attr_naked_p and attr_no_prologue_p are set in + nds32_compute_stack_frame() and the function has not been + invoked yet. */ + t_naked = lookup_attribute ("naked", DECL_ATTRIBUTES (func)); + t_no_prologue = lookup_attribute ("no_prologue", DECL_ATTRIBUTES (func)); - return (t != NULL_TREE); + return ((t_naked != NULL_TREE) || (t_no_prologue != NULL_TREE)); } /* Function that determine whether a load postincrement is a good thing to use @@ -1570,6 +1632,11 @@ static void nds32_register_passes (void) { nds32_register_pass ( + make_pass_nds32_fp_as_gp, + PASS_POS_INSERT_BEFORE, + "ira"); + + nds32_register_pass ( make_pass_nds32_relax_opt, PASS_POS_INSERT_AFTER, "mach"); @@ -1636,6 +1703,9 @@ nds32_conditional_register_usage (void) { int regno; + if (TARGET_LINUX_ABI) + fixed_regs[TP_REGNUM] = 1; + if (TARGET_HARD_FLOAT) { for (regno = NDS32_FIRST_FPR_REGNUM; @@ -1987,6 +2057,16 @@ nds32_function_arg_boundary (machine_mode mode, const_tree type) : PARM_BOUNDARY); } +bool +nds32_vector_mode_supported_p (machine_mode mode) +{ + if (mode == V4QImode + || mode == V2HImode) + return NDS32_EXT_DSP_P (); + + return false; +} + /* -- How Scalar Function Values Are Returned. */ static rtx @@ -2124,56 +2204,12 @@ static void nds32_asm_function_end_prologue (FILE *file) { fprintf (file, "\t! END PROLOGUE\n"); - - /* If frame pointer is NOT needed and -mfp-as-gp is issued, - we can generate special directive: ".omit_fp_begin" - to guide linker doing fp-as-gp optimization. - However, for a naked function, which means - it should not have prologue/epilogue, - using fp-as-gp still requires saving $fp by push/pop behavior and - there is no benefit to use fp-as-gp on such small function. - So we need to make sure this function is NOT naked as well. */ - if (!frame_pointer_needed - && !cfun->machine->naked_p - && cfun->machine->fp_as_gp_p) - { - fprintf (file, "\t! ----------------------------------------\n"); - fprintf (file, "\t! Guide linker to do " - "link time optimization: fp-as-gp\n"); - fprintf (file, "\t! We add one more instruction to " - "initialize $fp near to $gp location.\n"); - fprintf (file, "\t! If linker fails to use fp-as-gp transformation,\n"); - fprintf (file, "\t! this extra instruction should be " - "eliminated at link stage.\n"); - fprintf (file, "\t.omit_fp_begin\n"); - fprintf (file, "\tla\t$fp,_FP_BASE_\n"); - fprintf (file, "\t! ----------------------------------------\n"); - } } /* Before rtl epilogue has been expanded, this function is used. */ static void nds32_asm_function_begin_epilogue (FILE *file) { - /* If frame pointer is NOT needed and -mfp-as-gp is issued, - we can generate special directive: ".omit_fp_end" - to claim fp-as-gp optimization range. - However, for a naked function, - which means it should not have prologue/epilogue, - using fp-as-gp still requires saving $fp by push/pop behavior and - there is no benefit to use fp-as-gp on such small function. - So we need to make sure this function is NOT naked as well. */ - if (!frame_pointer_needed - && !cfun->machine->naked_p - && cfun->machine->fp_as_gp_p) - { - fprintf (file, "\t! ----------------------------------------\n"); - fprintf (file, "\t! Claim the range of fp-as-gp " - "link time optimization\n"); - fprintf (file, "\t.omit_fp_end\n"); - fprintf (file, "\t! ----------------------------------------\n"); - } - fprintf (file, "\t! BEGIN EPILOGUE\n"); } @@ -2200,6 +2236,26 @@ nds32_asm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED, ? 1 : 0); + if (flag_pic) + { + fprintf (file, "\tsmw.adm\t$r31, [$r31], $r31, 4\n"); + fprintf (file, "\tsethi\t%s, hi20(_GLOBAL_OFFSET_TABLE_-8)\n", + reg_names [PIC_OFFSET_TABLE_REGNUM]); + fprintf (file, "\tori\t%s, %s, lo12(_GLOBAL_OFFSET_TABLE_-4)\n", + reg_names [PIC_OFFSET_TABLE_REGNUM], + reg_names [PIC_OFFSET_TABLE_REGNUM]); + + if (TARGET_ISA_V3) + fprintf (file, "\tadd5.pc\t$gp\n"); + else + { + fprintf (file, "\tmfusr\t$ta, $pc\n"); + fprintf (file, "\tadd\t%s, $ta, %s\n", + reg_names [PIC_OFFSET_TABLE_REGNUM], + reg_names [PIC_OFFSET_TABLE_REGNUM]); + } + } + if (delta != 0) { if (satisfies_constraint_Is15 (GEN_INT (delta))) @@ -2224,9 +2280,23 @@ nds32_asm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED, } } - fprintf (file, "\tb\t"); - assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0)); - fprintf (file, "\n"); + if (flag_pic) + { + fprintf (file, "\tla\t$ta, "); + assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0)); + fprintf (file, "@PLT\n"); + fprintf (file, "\t! epilogue\n"); + fprintf (file, "\tlwi.bi\t%s, [%s], 4\n", + reg_names[PIC_OFFSET_TABLE_REGNUM], + reg_names[STACK_POINTER_REGNUM]); + fprintf (file, "\tbr\t$ta\n"); + } + else + { + fprintf (file, "\tb\t"); + assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0)); + fprintf (file, "\n"); + } final_end_function (); } @@ -2242,15 +2312,20 @@ nds32_function_ok_for_sibcall (tree decl, /* 1. Do not apply sibling call if -mv3push is enabled, because pop25 instruction also represents return behavior. - 2. If this function is a variadic function, do not apply sibling call + 2. If this function is a isr function, do not apply sibling call + because it may perform the behavior that user does not expect. + 3. If this function is a variadic function, do not apply sibling call because the stack layout may be a mess. - 3. We don't want to apply sibling call optimization for indirect + 4. We don't want to apply sibling call optimization for indirect sibcall because the pop behavior in epilogue may pollute the content of caller-saved regsiter when the register is used for - indirect sibcall. */ + indirect sibcall. + 5. In pic mode, it may use some registers for PLT call. */ return (!TARGET_V3PUSH + && !nds32_isr_function_p (current_function_decl) && (cfun->machine->va_args_size == 0) - && decl); + && decl + && !flag_pic); } /* Determine whether we need to enable warning for function return check. */ @@ -2566,6 +2641,13 @@ nds32_legitimate_address_p (machine_mode mode, rtx x, bool strict) case SYMBOL_REF: /* (mem (symbol_ref A)) => [symbol_ref] */ + + if (flag_pic || SYMBOL_REF_TLS_MODEL (x)) + return false; + + if (TARGET_ICT_MODEL_LARGE && nds32_indirect_call_referenced_p (x)) + return false; + /* If -mcmodel=large, the 'symbol_ref' is not a valid address during or after LRA/reload phase. */ if (TARGET_CMODEL_LARGE @@ -2577,7 +2659,8 @@ nds32_legitimate_address_p (machine_mode mode, rtx x, bool strict) the 'symbol_ref' is not a valid address during or after LRA/reload phase. */ if (TARGET_CMODEL_MEDIUM - && NDS32_SYMBOL_REF_RODATA_P (x) + && (NDS32_SYMBOL_REF_RODATA_P (x) + || CONSTANT_POOL_ADDRESS_P (x)) && (reload_completed || reload_in_progress || lra_in_progress)) @@ -2599,6 +2682,10 @@ nds32_legitimate_address_p (machine_mode mode, rtx x, bool strict) { /* Now we see the [ + const_addr ] pattern, but we need some further checking. */ + + if (flag_pic) + return false; + /* If -mcmodel=large, the 'const_addr' is not a valid address during or after LRA/reload phase. */ if (TARGET_CMODEL_LARGE @@ -2675,17 +2762,202 @@ nds32_legitimate_address_p (machine_mode mode, rtx x, bool strict) case LO_SUM: /* (mem (lo_sum (reg) (symbol_ref))) */ - /* (mem (lo_sum (reg) (const))) */ - gcc_assert (REG_P (XEXP (x, 0))); - if (GET_CODE (XEXP (x, 1)) == SYMBOL_REF - || GET_CODE (XEXP (x, 1)) == CONST) - return nds32_legitimate_address_p (mode, XEXP (x, 1), strict); - else + /* (mem (lo_sum (reg) (const (plus (symbol_ref) (reg)))) */ + /* TLS case: (mem (lo_sum (reg) (const (unspec symbol_ref X)))) */ + /* The LO_SUM is a valid address if and only if we would like to + generate 32-bit full address memory access with any of following + circumstance: + 1. -mcmodel=large. + 2. -mcmodel=medium and the symbol_ref references to rodata. */ + { + rtx sym = NULL_RTX; + + if (flag_pic) + return false; + + if (!REG_P (XEXP (x, 0))) + return false; + + if (GET_CODE (XEXP (x, 1)) == SYMBOL_REF) + sym = XEXP (x, 1); + else if (GET_CODE (XEXP (x, 1)) == CONST) + { + rtx plus = XEXP(XEXP (x, 1), 0); + if (GET_CODE (plus) == PLUS) + sym = XEXP (plus, 0); + else if (GET_CODE (plus) == UNSPEC) + sym = XVECEXP (plus, 0, 0); + } + else + return false; + + gcc_assert (GET_CODE (sym) == SYMBOL_REF); + + if (TARGET_ICT_MODEL_LARGE + && nds32_indirect_call_referenced_p (sym)) + return true; + + if (TARGET_CMODEL_LARGE) + return true; + else if (TARGET_CMODEL_MEDIUM + && NDS32_SYMBOL_REF_RODATA_P (sym)) + return true; + else + return false; + } + + default: + return false; + } +} + +static rtx +nds32_legitimize_address (rtx x, + rtx oldx ATTRIBUTE_UNUSED, + machine_mode mode ATTRIBUTE_UNUSED) +{ + if (nds32_tls_referenced_p (x)) + x = nds32_legitimize_tls_address (x); + else if (flag_pic && SYMBOLIC_CONST_P (x)) + x = nds32_legitimize_pic_address (x); + else if (TARGET_ICT_MODEL_LARGE && nds32_indirect_call_referenced_p (x)) + x = nds32_legitimize_ict_address (x); + + return x; +} + +static bool +nds32_legitimate_constant_p (machine_mode mode, rtx x) +{ + switch (GET_CODE (x)) + { + case CONST_DOUBLE: + if ((TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE) + && (mode == DFmode || mode == SFmode)) return false; + break; + case CONST: + x = XEXP (x, 0); + + if (GET_CODE (x) == PLUS) + { + if (!CONST_INT_P (XEXP (x, 1))) + return false; + x = XEXP (x, 0); + } + + if (GET_CODE (x) == UNSPEC) + { + switch (XINT (x, 1)) + { + case UNSPEC_GOT: + case UNSPEC_GOTOFF: + case UNSPEC_PLT: + case UNSPEC_TLSGD: + case UNSPEC_TLSLD: + case UNSPEC_TLSIE: + case UNSPEC_TLSLE: + case UNSPEC_ICT: + return false; + default: + return true; + } + } + break; + case SYMBOL_REF: + /* TLS symbols need a call to resolve in + precompute_register_parameters. */ + if (SYMBOL_REF_TLS_MODEL (x)) + return false; + break; + default: + return true; + } + + return true; +} +/* Reorgnize the UNSPEC CONST and return its direct symbol. */ +static rtx +nds32_delegitimize_address (rtx x) +{ + x = delegitimize_mem_from_attrs (x); + + if (GET_CODE(x) == CONST) + { + rtx inner = XEXP (x, 0); + + /* Handle for GOTOFF. */ + if (GET_CODE (inner) == PLUS) + inner = XEXP (inner, 0); + + if (GET_CODE (inner) == UNSPEC) + { + switch (XINT (inner, 1)) + { + case UNSPEC_GOTINIT: + case UNSPEC_GOT: + case UNSPEC_GOTOFF: + case UNSPEC_PLT: + case UNSPEC_TLSGD: + case UNSPEC_TLSLD: + case UNSPEC_TLSIE: + case UNSPEC_TLSLE: + case UNSPEC_ICT: + x = XVECEXP (inner, 0, 0); + break; + default: + break; + } + } + } + return x; +} + +static machine_mode +nds32_vectorize_preferred_simd_mode (scalar_mode mode) +{ + if (!NDS32_EXT_DSP_P ()) + return word_mode; + + switch (mode) + { + case E_QImode: + return V4QImode; + case E_HImode: + return V2HImode; + default: + return word_mode; + } +} + +static bool +nds32_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x) +{ + switch (GET_CODE (x)) + { + case CONST: + return !nds32_legitimate_constant_p (mode, x); + case SYMBOL_REF: + /* All symbols have to be accessed through gp-relative in PIC mode. */ + /* We don't want to force symbol as constant pool in .text section, + because we use the gp-relatived instruction to load in small + or medium model. */ + if (flag_pic + || SYMBOL_REF_TLS_MODEL (x) + || TARGET_CMODEL_SMALL + || TARGET_CMODEL_MEDIUM) + return true; + break; + case CONST_INT: + case CONST_DOUBLE: + if (flag_pic && (lra_in_progress || reload_completed)) + return true; + break; default: return false; } + return false; } @@ -2731,13 +3003,33 @@ nds32_canonicalize_comparison (int *code, /* Describing Relative Costs of Operations. */ static int -nds32_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED, +nds32_register_move_cost (machine_mode mode, reg_class_t from, reg_class_t to) { + /* In garywolf cpu, FPR to GPR is chaper than other cpu. */ + if (TARGET_PIPELINE_GRAYWOLF) + { + if (GET_MODE_SIZE (mode) == 8) + { + /* DPR to GPR. */ + if (from == FP_REGS && to != FP_REGS) + return 3; + /* GPR to DPR. */ + if (from != FP_REGS && to == FP_REGS) + return 2; + } + else + { + if ((from == FP_REGS && to != FP_REGS) + || (from != FP_REGS && to == FP_REGS)) + return 2; + } + } + if ((from == FP_REGS && to != FP_REGS) || (from != FP_REGS && to == FP_REGS)) - return 9; + return 3; else if (from == HIGH_REGS || to == HIGH_REGS) return optimize_size ? 6 : 2; else @@ -2825,6 +3117,9 @@ nds32_asm_file_start (void) { default_file_start (); + if (flag_pic) + fprintf (asm_out_file, "\t.pic\n"); + /* Tell assembler which ABI we are using. */ fprintf (asm_out_file, "\t! ABI version\n"); if (TARGET_HARD_FLOAT) @@ -2835,10 +3130,36 @@ nds32_asm_file_start (void) /* Tell assembler that this asm code is generated by compiler. */ fprintf (asm_out_file, "\t! This asm file is generated by compiler\n"); fprintf (asm_out_file, "\t.flag\tverbatim\n"); - /* Give assembler the size of each vector for interrupt handler. */ - fprintf (asm_out_file, "\t! This vector size directive is required " - "for checking inconsistency on interrupt handler\n"); - fprintf (asm_out_file, "\t.vec_size\t%d\n", nds32_isr_vector_size); + + /* Insert directive for linker to distinguish object's ict flag. */ + if (!TARGET_LINUX_ABI) + { + if (TARGET_ICT_MODEL_LARGE) + fprintf (asm_out_file, "\t.ict_model\tlarge\n"); + else + fprintf (asm_out_file, "\t.ict_model\tsmall\n"); + } + + /* We need to provide the size of each vector for interrupt handler + under elf toolchain. */ + if (!TARGET_LINUX_ABI) + { + fprintf (asm_out_file, "\t! This vector size directive is required " + "for checking inconsistency on interrupt handler\n"); + fprintf (asm_out_file, "\t.vec_size\t%d\n", nds32_isr_vector_size); + } + + /* If user enables '-mforce-fp-as-gp' or compiles programs with -Os, + the compiler may produce 'la $fp,_FP_BASE_' instruction + at prologue for fp-as-gp optimization. + We should emit weak reference of _FP_BASE_ to avoid undefined reference + in case user does not pass '--relax' option to linker. */ + if (!TARGET_LINUX_ABI && (TARGET_FORCE_FP_AS_GP || optimize_size)) + { + fprintf (asm_out_file, "\t! This weak reference is required to do " + "fp-as-gp link time optimization\n"); + fprintf (asm_out_file, "\t.weak\t_FP_BASE_\n"); + } fprintf (asm_out_file, "\t! ------------------------------------\n"); @@ -2849,6 +3170,49 @@ nds32_asm_file_start (void) if (TARGET_ISA_V3M) fprintf (asm_out_file, "\t! ISA family\t\t: %s\n", "V3M"); + switch (nds32_cpu_option) + { + case CPU_N6: + fprintf (asm_out_file, "\t! Pipeline model\t: %s\n", "N6"); + break; + + case CPU_N7: + fprintf (asm_out_file, "\t! Pipeline model\t: %s\n", "N7"); + break; + + case CPU_N8: + fprintf (asm_out_file, "\t! Pipeline model\t: %s\n", "N8"); + break; + + case CPU_E8: + fprintf (asm_out_file, "\t! Pipeline model\t: %s\n", "E8"); + break; + + case CPU_N9: + fprintf (asm_out_file, "\t! Pipeline model\t: %s\n", "N9"); + break; + + case CPU_N10: + fprintf (asm_out_file, "\t! Pipeline model\t: %s\n", "N10"); + break; + + case CPU_GRAYWOLF: + fprintf (asm_out_file, "\t! Pipeline model\t: %s\n", "Graywolf"); + break; + + case CPU_N12: + case CPU_N13: + fprintf (asm_out_file, "\t! Pipeline model\t: %s\n", "N13"); + break; + + case CPU_SIMPLE: + fprintf (asm_out_file, "\t! Pipeline model\t: %s\n", "SIMPLE"); + break; + + default: + gcc_unreachable (); + } + if (TARGET_CMODEL_SMALL) fprintf (asm_out_file, "\t! Code model\t\t: %s\n", "SMALL"); if (TARGET_CMODEL_MEDIUM) @@ -2926,9 +3290,65 @@ nds32_asm_file_end (void) { nds32_asm_file_end_for_isr (); + /* The NDS32 Linux stack is mapped non-executable by default, so add a + .note.GNU-stack section. */ + if (TARGET_LINUX_ABI) + file_end_indicate_exec_stack (); + fprintf (asm_out_file, "\t! ------------------------------------\n"); } +static bool +nds32_asm_output_addr_const_extra (FILE *file, rtx x) +{ + if (GET_CODE (x) == UNSPEC) + { + switch (XINT (x, 1)) + { + case UNSPEC_GOTINIT: + output_addr_const (file, XVECEXP (x, 0, 0)); + break; + case UNSPEC_GOTOFF: + output_addr_const (file, XVECEXP (x, 0, 0)); + fputs ("@GOTOFF", file); + break; + case UNSPEC_GOT: + output_addr_const (file, XVECEXP (x, 0, 0)); + fputs ("@GOT", file); + break; + case UNSPEC_PLT: + output_addr_const (file, XVECEXP (x, 0, 0)); + fputs ("@PLT", file); + break; + case UNSPEC_TLSGD: + output_addr_const (file, XVECEXP (x, 0, 0)); + fputs ("@TLSDESC", file); + break; + case UNSPEC_TLSLD: + output_addr_const (file, XVECEXP (x, 0, 0)); + fputs ("@TLSDESC", file); + break; + case UNSPEC_TLSIE: + output_addr_const (file, XVECEXP (x, 0, 0)); + fputs ("@GOTTPOFF", file); + break; + case UNSPEC_TLSLE: + output_addr_const (file, XVECEXP (x, 0, 0)); + fputs ("@TPOFF", file); + break; + case UNSPEC_ICT: + output_addr_const (file, XVECEXP (x, 0, 0)); + fputs ("@ICT", file); + break; + default: + return false; + } + return true; + } + else + return false; +} + /* -- Output and Generation of Labels. */ static void @@ -2944,13 +3364,15 @@ nds32_asm_globalize_label (FILE *stream, const char *name) static void nds32_print_operand (FILE *stream, rtx x, int code) { + HOST_WIDE_INT op_value = 0; HOST_WIDE_INT one_position; HOST_WIDE_INT zero_position; bool pick_lsb_p = false; bool pick_msb_p = false; int regno; - int op_value; + if (CONST_INT_P (x)) + op_value = INTVAL (x); switch (code) { @@ -2976,6 +3398,18 @@ nds32_print_operand (FILE *stream, rtx x, int code) /* No need to handle following process, so return immediately. */ return; + + case 'v': + gcc_assert (CONST_INT_P (x) + && (INTVAL (x) == 0 + || INTVAL (x) == 8 + || INTVAL (x) == 16 + || INTVAL (x) == 24)); + fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) / 8); + + /* No need to handle following process, so return immediately. */ + return; + case 'B': /* Use exact_log2() to search the 1-bit position. */ gcc_assert (CONST_INT_P (x)); @@ -3003,7 +3437,6 @@ nds32_print_operand (FILE *stream, rtx x, int code) case 'V': /* 'x' is supposed to be CONST_INT, get the value. */ gcc_assert (CONST_INT_P (x)); - op_value = INTVAL (x); /* According to the Andes architecture, the system/user register index range is 0 ~ 1023. @@ -3083,8 +3516,15 @@ nds32_print_operand (FILE *stream, rtx x, int code) switch (GET_CODE (x)) { case LABEL_REF: + output_addr_const (stream, x); + break; + case SYMBOL_REF: output_addr_const (stream, x); + + if (!TARGET_LINUX_ABI && nds32_indirect_call_referenced_p (x)) + fprintf (stream, "@ICT"); + break; case REG: @@ -3167,6 +3607,17 @@ nds32_print_operand (FILE *stream, rtx x, int code) output_addr_const (stream, x); break; + case CONST_VECTOR: + fprintf (stream, HOST_WIDE_INT_PRINT_HEX, const_vector_to_hwint (x)); + break; + + case LO_SUM: + /* This is a special case for inline assembly using memory address 'p'. + The inline assembly code is expected to use pesudo instruction + for the operand. EX: la */ + output_addr_const (stream, XEXP(x, 1)); + break; + default: /* Generally, output_addr_const () is able to handle most cases. We want to see what CODE could appear, @@ -3178,7 +3629,9 @@ nds32_print_operand (FILE *stream, rtx x, int code) } static void -nds32_print_operand_address (FILE *stream, machine_mode /*mode*/, rtx x) +nds32_print_operand_address (FILE *stream, + machine_mode mode ATTRIBUTE_UNUSED, + rtx x) { rtx op0, op1; @@ -3193,6 +3646,16 @@ nds32_print_operand_address (FILE *stream, machine_mode /*mode*/, rtx x) fputs ("]", stream); break; + case LO_SUM: + /* This is a special case for inline assembly using memory operand 'm'. + The inline assembly code is expected to use pesudo instruction + for the operand. EX: [ls].[bhw] */ + fputs ("[ + ", stream); + op1 = XEXP (x, 1); + output_addr_const (stream, op1); + fputs ("]", stream); + break; + case REG: /* Forbid using static chain register ($r16) on reduced-set registers configuration. */ @@ -3259,6 +3722,20 @@ nds32_print_operand_address (FILE *stream, machine_mode /*mode*/, rtx x) reg_names[REGNO (XEXP (op0, 0))], sv); } + else if (GET_CODE (op0) == ASHIFT && REG_P (op1)) + { + /* [Ra + Rb << sv] + In normal, ASHIFT can be converted to MULT like above case. + But when the address rtx does not go through canonicalize_address + defined in fwprop, we'll need this case. */ + int sv = INTVAL (XEXP (op0, 1)); + gcc_assert (sv <= 3 && sv >=0); + + fprintf (stream, "[%s + %s << %d]", + reg_names[REGNO (op1)], + reg_names[REGNO (XEXP (op0, 0))], + sv); + } else { /* The control flow is not supposed to be here. */ @@ -3453,6 +3930,27 @@ nds32_merge_decl_attributes (tree olddecl, tree newdecl) static void nds32_insert_attributes (tree decl, tree *attributes) { + /* A "indirect_call" function attribute implies "noinline" and "noclone" + for elf toolchain to support ROM patch mechanism. */ + if (TREE_CODE (decl) == FUNCTION_DECL + && lookup_attribute ("indirect_call", *attributes) != NULL) + { + tree new_attrs = *attributes; + + if (TARGET_LINUX_ABI) + error("cannot use indirect_call attribute under linux toolchain"); + + if (lookup_attribute ("noinline", new_attrs) == NULL) + new_attrs = tree_cons (get_identifier ("noinline"), NULL, new_attrs); + if (lookup_attribute ("noclone", new_attrs) == NULL) + new_attrs = tree_cons (get_identifier ("noclone"), NULL, new_attrs); + + if (!TREE_PUBLIC (decl)) + error("indirect_call attribute can't apply for static function"); + + *attributes = new_attrs; + } + /* For function declaration, we need to check isr-specific attributes: 1. Call nds32_check_isr_attrs_conflict() to check any conflict. 2. Check valid integer value for interrupt/exception. @@ -3478,6 +3976,38 @@ nds32_insert_attributes (tree decl, tree *attributes) excp = lookup_attribute ("exception", func_attrs); reset = lookup_attribute ("reset", func_attrs); + /* The following code may use attribute arguments. If there is no + argument from source code, it will cause segmentation fault. + Therefore, return dircetly and report error message later. */ + if ((intr && TREE_VALUE (intr) == NULL) + || (excp && TREE_VALUE (excp) == NULL) + || (reset && TREE_VALUE (reset) == NULL)) + return; + + /* ------------------------------------------------------------- */ + /* FIXME: + FOR BACKWARD COMPATIBILITY, we need to support following patterns: + + __attribute__((interrupt("XXX;YYY;id=ZZZ"))) + __attribute__((exception("XXX;YYY;id=ZZZ"))) + __attribute__((reset("vectors=XXX;nmi_func=YYY;warm_func=ZZZ"))) + + If interrupt/exception/reset appears and its argument is a + STRING_CST, we will use other functions to parse string in the + nds32_construct_isr_vectors_information() and then set necessary + isr information in the nds32_isr_vectors[] array. Here we can + just return immediately to avoid new-syntax checking. */ + if (intr != NULL_TREE + && TREE_CODE (TREE_VALUE (TREE_VALUE (intr))) == STRING_CST) + return; + if (excp != NULL_TREE + && TREE_CODE (TREE_VALUE (TREE_VALUE (excp))) == STRING_CST) + return; + if (reset != NULL_TREE + && TREE_CODE (TREE_VALUE (TREE_VALUE (reset))) == STRING_CST) + return; + /* ------------------------------------------------------------- */ + if (intr || excp) { /* Deal with interrupt/exception. */ @@ -3597,7 +4127,9 @@ nds32_option_override (void) } if (TARGET_ISA_V3) { - /* Under V3 ISA, currently nothing should be strictly set. */ + /* If this is ARCH_V3J, we need to enable TARGET_REDUCED_REGS. */ + if (nds32_arch_option == ARCH_V3J) + target_flags |= MASK_REDUCED_REGS; } if (TARGET_ISA_V3M) { @@ -3609,6 +4141,9 @@ nds32_option_override (void) target_flags &= ~MASK_EXT_PERF2; /* Under V3M ISA, we need to strictly disable TARGET_EXT_STRING. */ target_flags &= ~MASK_EXT_STRING; + + if (flag_pic) + error ("not support -fpic option for v3m toolchain"); } /* See if we are using reduced-set registers: @@ -3626,6 +4161,12 @@ nds32_option_override (void) fixed_regs[r] = call_used_regs[r] = 1; } + /* See if user explicitly would like to use fp-as-gp optimization. + If so, we must prevent $fp from being allocated + during register allocation. */ + if (TARGET_FORCE_FP_AS_GP) + fixed_regs[FP_REGNUM] = call_used_regs[FP_REGNUM] = 1; + if (!TARGET_16_BIT) { /* Under no 16 bit ISA, we need to strictly disable TARGET_V3PUSH. */ @@ -3642,9 +4183,7 @@ nds32_option_override (void) "must be enable '-mext-fpu-sp' or '-mext-fpu-dp'"); } - /* Currently, we don't support PIC code generation yet. */ - if (flag_pic) - sorry ("position-independent code not supported"); + nds32_init_rtx_costs (); nds32_register_passes (); } @@ -3658,8 +4197,11 @@ nds32_md_asm_adjust (vec &outputs ATTRIBUTE_UNUSED, vec &constraints ATTRIBUTE_UNUSED, vec &clobbers, HARD_REG_SET &clobbered_regs) { - clobbers.safe_push (gen_rtx_REG (SImode, TA_REGNUM)); - SET_HARD_REG_BIT (clobbered_regs, TA_REGNUM); + if (!flag_inline_asm_r15) + { + clobbers.safe_push (gen_rtx_REG (SImode, TA_REGNUM)); + SET_HARD_REG_BIT (clobbered_regs, TA_REGNUM); + } return NULL; } @@ -3686,6 +4228,13 @@ nds32_expand_builtin (tree exp, return nds32_expand_builtin_impl (exp, target, subtarget, mode, ignore); } +/* Implement TARGET_INIT_LIBFUNCS. */ +static void +nds32_init_libfuncs (void) +{ + if (TARGET_LINUX_ABI) + init_sync_libfuncs (UNITS_PER_WORD); +} /* ------------------------------------------------------------------------ */ @@ -3702,6 +4251,16 @@ nds32_cpu_cpp_builtins(struct cpp_reader *pfile) builtin_define ("__nds32__"); builtin_define ("__NDS32__"); + /* We need to provide builtin macro to describe the size of + each vector for interrupt handler under elf toolchain. */ + if (!TARGET_LINUX_ABI) + { + if (TARGET_ISR_VECTOR_SIZE_4_BYTE) + builtin_define ("__NDS32_ISR_VECTOR_SIZE_4__"); + else + builtin_define ("__NDS32_ISR_VECTOR_SIZE_16__"); + } + if (TARGET_HARD_FLOAT) builtin_define ("__NDS32_ABI_2FP_PLUS__"); else @@ -3769,6 +4328,8 @@ nds32_cpu_cpp_builtins(struct cpp_reader *pfile) builtin_define ("__NDS32_GP_DIRECT__"); if (TARGET_VH) builtin_define ("__NDS32_VH__"); + if (NDS32_EXT_DSP_P ()) + builtin_define ("__NDS32_EXT_DSP__"); if (TARGET_BIG_ENDIAN) builtin_define ("__big_endian__"); @@ -4041,6 +4602,10 @@ nds32_expand_prologue (void) The result will be in cfun->machine. */ nds32_compute_stack_frame (); + /* Check frame_pointer_needed again to prevent fp is need after reload. */ + if (frame_pointer_needed) + cfun->machine->fp_as_gp_p = false; + /* If this is a variadic function, first we need to push argument registers that hold the unnamed argument value. */ if (cfun->machine->va_args_size != 0) @@ -4065,7 +4630,7 @@ nds32_expand_prologue (void) /* If the function is 'naked', we do not have to generate prologue code fragment. */ - if (cfun->machine->naked_p) + if (cfun->machine->naked_p && !flag_pic) return; /* Get callee_first_regno and callee_last_regno. */ @@ -4194,9 +4759,15 @@ nds32_expand_prologue (void) -1 * sp_adjust); } - /* Prevent the instruction scheduler from - moving instructions across the boundary. */ - emit_insn (gen_blockage ()); + /* Emit gp setup instructions for -fpic. */ + if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)) + nds32_emit_load_gp (); + + /* If user applies -mno-sched-prolog-epilog option, + we need to prevent instructions of function body from being + scheduled with stack adjustment in prologue. */ + if (!flag_sched_prolog_epilog) + emit_insn (gen_blockage ()); } /* Function for normal multiple pop epilogue. */ @@ -4210,9 +4781,11 @@ nds32_expand_epilogue (bool sibcall_p) The result will be in cfun->machine. */ nds32_compute_stack_frame (); - /* Prevent the instruction scheduler from - moving instructions across the boundary. */ - emit_insn (gen_blockage ()); + /* If user applies -mno-sched-prolog-epilog option, + we need to prevent instructions of function body from being + scheduled with stack adjustment in epilogue. */ + if (!flag_sched_prolog_epilog) + emit_insn (gen_blockage ()); /* If the function is 'naked', we do not have to generate epilogue code fragment BUT 'ret' instruction. @@ -4238,7 +4811,16 @@ nds32_expand_epilogue (bool sibcall_p) /* Generate return instruction by using 'return_internal' pattern. Make sure this instruction is after gen_blockage(). */ if (!sibcall_p) - emit_jump_insn (gen_return_internal ()); + { + /* We need to further check attributes to determine whether + there should be return instruction at epilogue. + If the attribute naked exists but -mno-ret-in-naked-func + is issued, there is NO need to generate return instruction. */ + if (cfun->machine->attr_naked_p && !flag_ret_in_naked_func) + return; + + emit_jump_insn (gen_return_internal ()); + } return; } @@ -4435,9 +5017,13 @@ nds32_expand_prologue_v3push (void) if (cfun->machine->callee_saved_gpr_regs_size > 0) df_set_regs_ever_live (FP_REGNUM, 1); + /* Check frame_pointer_needed again to prevent fp is need after reload. */ + if (frame_pointer_needed) + cfun->machine->fp_as_gp_p = false; + /* If the function is 'naked', we do not have to generate prologue code fragment. */ - if (cfun->machine->naked_p) + if (cfun->machine->naked_p && !flag_pic) return; /* Get callee_first_regno and callee_last_regno. */ @@ -4565,6 +5151,10 @@ nds32_expand_prologue_v3push (void) -1 * sp_adjust); } + /* Emit gp setup instructions for -fpic. */ + if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)) + nds32_emit_load_gp (); + /* Prevent the instruction scheduler from moving instructions across the boundary. */ emit_insn (gen_blockage ()); @@ -4590,9 +5180,19 @@ nds32_expand_epilogue_v3pop (bool sibcall_p) if (cfun->machine->naked_p) { /* Generate return instruction by using 'return_internal' pattern. - Make sure this instruction is after gen_blockage(). */ + Make sure this instruction is after gen_blockage(). + First we need to check this is a function without sibling call. */ if (!sibcall_p) - emit_jump_insn (gen_return_internal ()); + { + /* We need to further check attributes to determine whether + there should be return instruction at epilogue. + If the attribute naked exists but -mno-ret-in-naked-func + is issued, there is NO need to generate return instruction. */ + if (cfun->machine->attr_naked_p && !flag_ret_in_naked_func) + return; + + emit_jump_insn (gen_return_internal ()); + } return; } @@ -4756,6 +5356,11 @@ nds32_can_use_return_insn (void) if (!reload_completed) return 0; + /* If attribute 'naked' appears but -mno-ret-in-naked-func is used, + we cannot use return instruction. */ + if (cfun->machine->attr_naked_p && !flag_ret_in_naked_func) + return 0; + sp_adjust = cfun->machine->local_size + cfun->machine->out_args_size + cfun->machine->callee_saved_area_gpr_padding_bytes @@ -5009,6 +5614,9 @@ nds32_use_blocks_for_constant_p (machine_mode mode, #undef TARGET_FUNCTION_ARG_BOUNDARY #define TARGET_FUNCTION_ARG_BOUNDARY nds32_function_arg_boundary +#undef TARGET_VECTOR_MODE_SUPPORTED_P +#define TARGET_VECTOR_MODE_SUPPORTED_P nds32_vector_mode_supported_p + /* -- How Scalar Function Values Are Returned. */ #undef TARGET_FUNCTION_VALUE @@ -5086,6 +5694,21 @@ nds32_use_blocks_for_constant_p (machine_mode mode, #undef TARGET_LEGITIMATE_ADDRESS_P #define TARGET_LEGITIMATE_ADDRESS_P nds32_legitimate_address_p +#undef TARGET_LEGITIMIZE_ADDRESS +#define TARGET_LEGITIMIZE_ADDRESS nds32_legitimize_address + +#undef TARGET_LEGITIMATE_CONSTANT_P +#define TARGET_LEGITIMATE_CONSTANT_P nds32_legitimate_constant_p + +#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE +#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE nds32_vectorize_preferred_simd_mode + +#undef TARGET_CANNOT_FORCE_CONST_MEM +#define TARGET_CANNOT_FORCE_CONST_MEM nds32_cannot_force_const_mem + +#undef TARGET_DELEGITIMIZE_ADDRESS +#define TARGET_DELEGITIMIZE_ADDRESS nds32_delegitimize_address + /* Anchored Addresses. */ @@ -5146,6 +5769,9 @@ nds32_use_blocks_for_constant_p (machine_mode mode, #undef TARGET_ASM_ALIGNED_SI_OP #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t" +#undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA +#define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA nds32_asm_output_addr_const_extra + /* -- Output of Uninitialized Variables. */ /* -- Output and Generation of Labels. */ @@ -5215,6 +5841,9 @@ nds32_use_blocks_for_constant_p (machine_mode mode, /* Emulating TLS. */ +#undef TARGET_HAVE_TLS +#define TARGET_HAVE_TLS TARGET_LINUX_ABI + /* Defining coprocessor specifics for MIPS targets. */ @@ -5242,6 +5871,8 @@ nds32_use_blocks_for_constant_p (machine_mode mode, #undef TARGET_EXPAND_BUILTIN #define TARGET_EXPAND_BUILTIN nds32_expand_builtin +#undef TARGET_INIT_LIBFUNCS +#define TARGET_INIT_LIBFUNCS nds32_init_libfuncs #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P #define TARGET_USE_BLOCKS_FOR_CONSTANT_P nds32_use_blocks_for_constant_p diff --git a/gcc/config/nds32/nds32.h b/gcc/config/nds32/nds32.h index 29edccd..e3ceb63 100644 --- a/gcc/config/nds32/nds32.h +++ b/gcc/config/nds32/nds32.h @@ -36,6 +36,16 @@ #define NDS32_SYMBOL_REF_RODATA_P(x) \ ((SYMBOL_REF_FLAGS (x) & NDS32_SYMBOL_FLAG_RODATA) != 0) +enum nds32_relax_insn_type +{ + RELAX_ORI, + RELAX_PLT_ADD, + RELAX_TLS_ADD_or_LW, + RELAX_TLS_ADD_LW, + RELAX_TLS_LW_JRAL, + RELAX_DONE +}; + /* Classifies expand result for expand helper function. */ enum nds32_expand_result_type { @@ -140,6 +150,9 @@ enum nds32_16bit_address_type Check gcc/common/config/nds32/nds32-common.c for the optimizations that apply -malways-align. */ #define NDS32_ALIGN_P() (TARGET_ALWAYS_ALIGN) + +#define NDS32_EXT_DSP_P() (TARGET_EXT_DSP && !TARGET_FORCE_NO_EXT_DSP) + /* Get alignment according to mode or type information. When 'type' is nonnull, there is no need to look at 'mode'. */ #define NDS32_MODE_TYPE_ALIGN(mode, type) \ @@ -305,6 +318,10 @@ struct GTY(()) machine_function 2. The rtl lowering and optimization are close to target code. For this case we need address to be strictly aligned. */ int strict_aligned_p; + + /* Record two similar attributes status. */ + int attr_naked_p; + int attr_no_prologue_p; }; /* A C structure that contains the arguments information. */ @@ -350,7 +367,8 @@ enum nds32_isr_nested_type { NDS32_NESTED, NDS32_NOT_NESTED, - NDS32_NESTED_READY + NDS32_NESTED_READY, + NDS32_CRITICAL }; /* Define structure to record isr information. @@ -378,6 +396,13 @@ struct nds32_isr_info unless user specifies attribute to change it. */ enum nds32_isr_nested_type nested_type; + /* Secure isr level. + Currently we have 0-3 security level. + It should be set to 0 by default. + For security processors, this is determined by secure + attribute or compiler options. */ + unsigned int security_level; + /* Total vectors. The total vectors = interrupt + exception numbers + reset. It should be set to 0 by default. @@ -439,7 +464,30 @@ enum nds32_builtins NDS32_BUILTIN_FFB, NDS32_BUILTIN_FFMISM, NDS32_BUILTIN_FLMISM, - + NDS32_BUILTIN_KADDW, + NDS32_BUILTIN_KSUBW, + NDS32_BUILTIN_KADDH, + NDS32_BUILTIN_KSUBH, + NDS32_BUILTIN_KDMBB, + NDS32_BUILTIN_V_KDMBB, + NDS32_BUILTIN_KDMBT, + NDS32_BUILTIN_V_KDMBT, + NDS32_BUILTIN_KDMTB, + NDS32_BUILTIN_V_KDMTB, + NDS32_BUILTIN_KDMTT, + NDS32_BUILTIN_V_KDMTT, + NDS32_BUILTIN_KHMBB, + NDS32_BUILTIN_V_KHMBB, + NDS32_BUILTIN_KHMBT, + NDS32_BUILTIN_V_KHMBT, + NDS32_BUILTIN_KHMTB, + NDS32_BUILTIN_V_KHMTB, + NDS32_BUILTIN_KHMTT, + NDS32_BUILTIN_V_KHMTT, + NDS32_BUILTIN_KSLRAW, + NDS32_BUILTIN_KSLRAW_U, + NDS32_BUILTIN_RDOV, + NDS32_BUILTIN_CLROV, NDS32_BUILTIN_ROTR, NDS32_BUILTIN_SVA, NDS32_BUILTIN_SVS, @@ -512,7 +560,295 @@ enum nds32_builtins NDS32_BUILTIN_SET_TRIG_LEVEL, NDS32_BUILTIN_SET_TRIG_EDGE, NDS32_BUILTIN_GET_TRIG_TYPE, - + NDS32_BUILTIN_DSP_BEGIN, + NDS32_BUILTIN_ADD16, + NDS32_BUILTIN_V_UADD16, + NDS32_BUILTIN_V_SADD16, + NDS32_BUILTIN_RADD16, + NDS32_BUILTIN_V_RADD16, + NDS32_BUILTIN_URADD16, + NDS32_BUILTIN_V_URADD16, + NDS32_BUILTIN_KADD16, + NDS32_BUILTIN_V_KADD16, + NDS32_BUILTIN_UKADD16, + NDS32_BUILTIN_V_UKADD16, + NDS32_BUILTIN_SUB16, + NDS32_BUILTIN_V_USUB16, + NDS32_BUILTIN_V_SSUB16, + NDS32_BUILTIN_RSUB16, + NDS32_BUILTIN_V_RSUB16, + NDS32_BUILTIN_URSUB16, + NDS32_BUILTIN_V_URSUB16, + NDS32_BUILTIN_KSUB16, + NDS32_BUILTIN_V_KSUB16, + NDS32_BUILTIN_UKSUB16, + NDS32_BUILTIN_V_UKSUB16, + NDS32_BUILTIN_CRAS16, + NDS32_BUILTIN_V_UCRAS16, + NDS32_BUILTIN_V_SCRAS16, + NDS32_BUILTIN_RCRAS16, + NDS32_BUILTIN_V_RCRAS16, + NDS32_BUILTIN_URCRAS16, + NDS32_BUILTIN_V_URCRAS16, + NDS32_BUILTIN_KCRAS16, + NDS32_BUILTIN_V_KCRAS16, + NDS32_BUILTIN_UKCRAS16, + NDS32_BUILTIN_V_UKCRAS16, + NDS32_BUILTIN_CRSA16, + NDS32_BUILTIN_V_UCRSA16, + NDS32_BUILTIN_V_SCRSA16, + NDS32_BUILTIN_RCRSA16, + NDS32_BUILTIN_V_RCRSA16, + NDS32_BUILTIN_URCRSA16, + NDS32_BUILTIN_V_URCRSA16, + NDS32_BUILTIN_KCRSA16, + NDS32_BUILTIN_V_KCRSA16, + NDS32_BUILTIN_UKCRSA16, + NDS32_BUILTIN_V_UKCRSA16, + NDS32_BUILTIN_ADD8, + NDS32_BUILTIN_V_UADD8, + NDS32_BUILTIN_V_SADD8, + NDS32_BUILTIN_RADD8, + NDS32_BUILTIN_V_RADD8, + NDS32_BUILTIN_URADD8, + NDS32_BUILTIN_V_URADD8, + NDS32_BUILTIN_KADD8, + NDS32_BUILTIN_V_KADD8, + NDS32_BUILTIN_UKADD8, + NDS32_BUILTIN_V_UKADD8, + NDS32_BUILTIN_SUB8, + NDS32_BUILTIN_V_USUB8, + NDS32_BUILTIN_V_SSUB8, + NDS32_BUILTIN_RSUB8, + NDS32_BUILTIN_V_RSUB8, + NDS32_BUILTIN_URSUB8, + NDS32_BUILTIN_V_URSUB8, + NDS32_BUILTIN_KSUB8, + NDS32_BUILTIN_V_KSUB8, + NDS32_BUILTIN_UKSUB8, + NDS32_BUILTIN_V_UKSUB8, + NDS32_BUILTIN_SRA16, + NDS32_BUILTIN_V_SRA16, + NDS32_BUILTIN_SRA16_U, + NDS32_BUILTIN_V_SRA16_U, + NDS32_BUILTIN_SRL16, + NDS32_BUILTIN_V_SRL16, + NDS32_BUILTIN_SRL16_U, + NDS32_BUILTIN_V_SRL16_U, + NDS32_BUILTIN_SLL16, + NDS32_BUILTIN_V_SLL16, + NDS32_BUILTIN_KSLL16, + NDS32_BUILTIN_V_KSLL16, + NDS32_BUILTIN_KSLRA16, + NDS32_BUILTIN_V_KSLRA16, + NDS32_BUILTIN_KSLRA16_U, + NDS32_BUILTIN_V_KSLRA16_U, + NDS32_BUILTIN_CMPEQ16, + NDS32_BUILTIN_V_SCMPEQ16, + NDS32_BUILTIN_V_UCMPEQ16, + NDS32_BUILTIN_SCMPLT16, + NDS32_BUILTIN_V_SCMPLT16, + NDS32_BUILTIN_SCMPLE16, + NDS32_BUILTIN_V_SCMPLE16, + NDS32_BUILTIN_UCMPLT16, + NDS32_BUILTIN_V_UCMPLT16, + NDS32_BUILTIN_UCMPLE16, + NDS32_BUILTIN_V_UCMPLE16, + NDS32_BUILTIN_CMPEQ8, + NDS32_BUILTIN_V_SCMPEQ8, + NDS32_BUILTIN_V_UCMPEQ8, + NDS32_BUILTIN_SCMPLT8, + NDS32_BUILTIN_V_SCMPLT8, + NDS32_BUILTIN_SCMPLE8, + NDS32_BUILTIN_V_SCMPLE8, + NDS32_BUILTIN_UCMPLT8, + NDS32_BUILTIN_V_UCMPLT8, + NDS32_BUILTIN_UCMPLE8, + NDS32_BUILTIN_V_UCMPLE8, + NDS32_BUILTIN_SMIN16, + NDS32_BUILTIN_V_SMIN16, + NDS32_BUILTIN_UMIN16, + NDS32_BUILTIN_V_UMIN16, + NDS32_BUILTIN_SMAX16, + NDS32_BUILTIN_V_SMAX16, + NDS32_BUILTIN_UMAX16, + NDS32_BUILTIN_V_UMAX16, + NDS32_BUILTIN_SCLIP16, + NDS32_BUILTIN_V_SCLIP16, + NDS32_BUILTIN_UCLIP16, + NDS32_BUILTIN_V_UCLIP16, + NDS32_BUILTIN_KHM16, + NDS32_BUILTIN_V_KHM16, + NDS32_BUILTIN_KHMX16, + NDS32_BUILTIN_V_KHMX16, + NDS32_BUILTIN_KABS16, + NDS32_BUILTIN_V_KABS16, + NDS32_BUILTIN_SMIN8, + NDS32_BUILTIN_V_SMIN8, + NDS32_BUILTIN_UMIN8, + NDS32_BUILTIN_V_UMIN8, + NDS32_BUILTIN_SMAX8, + NDS32_BUILTIN_V_SMAX8, + NDS32_BUILTIN_UMAX8, + NDS32_BUILTIN_V_UMAX8, + NDS32_BUILTIN_KABS8, + NDS32_BUILTIN_V_KABS8, + NDS32_BUILTIN_SUNPKD810, + NDS32_BUILTIN_V_SUNPKD810, + NDS32_BUILTIN_SUNPKD820, + NDS32_BUILTIN_V_SUNPKD820, + NDS32_BUILTIN_SUNPKD830, + NDS32_BUILTIN_V_SUNPKD830, + NDS32_BUILTIN_SUNPKD831, + NDS32_BUILTIN_V_SUNPKD831, + NDS32_BUILTIN_ZUNPKD810, + NDS32_BUILTIN_V_ZUNPKD810, + NDS32_BUILTIN_ZUNPKD820, + NDS32_BUILTIN_V_ZUNPKD820, + NDS32_BUILTIN_ZUNPKD830, + NDS32_BUILTIN_V_ZUNPKD830, + NDS32_BUILTIN_ZUNPKD831, + NDS32_BUILTIN_V_ZUNPKD831, + NDS32_BUILTIN_RADDW, + NDS32_BUILTIN_URADDW, + NDS32_BUILTIN_RSUBW, + NDS32_BUILTIN_URSUBW, + NDS32_BUILTIN_SRA_U, + NDS32_BUILTIN_KSLL, + NDS32_BUILTIN_PKBB16, + NDS32_BUILTIN_V_PKBB16, + NDS32_BUILTIN_PKBT16, + NDS32_BUILTIN_V_PKBT16, + NDS32_BUILTIN_PKTB16, + NDS32_BUILTIN_V_PKTB16, + NDS32_BUILTIN_PKTT16, + NDS32_BUILTIN_V_PKTT16, + NDS32_BUILTIN_SMMUL, + NDS32_BUILTIN_SMMUL_U, + NDS32_BUILTIN_KMMAC, + NDS32_BUILTIN_KMMAC_U, + NDS32_BUILTIN_KMMSB, + NDS32_BUILTIN_KMMSB_U, + NDS32_BUILTIN_KWMMUL, + NDS32_BUILTIN_KWMMUL_U, + NDS32_BUILTIN_SMMWB, + NDS32_BUILTIN_V_SMMWB, + NDS32_BUILTIN_SMMWB_U, + NDS32_BUILTIN_V_SMMWB_U, + NDS32_BUILTIN_SMMWT, + NDS32_BUILTIN_V_SMMWT, + NDS32_BUILTIN_SMMWT_U, + NDS32_BUILTIN_V_SMMWT_U, + NDS32_BUILTIN_KMMAWB, + NDS32_BUILTIN_V_KMMAWB, + NDS32_BUILTIN_KMMAWB_U, + NDS32_BUILTIN_V_KMMAWB_U, + NDS32_BUILTIN_KMMAWT, + NDS32_BUILTIN_V_KMMAWT, + NDS32_BUILTIN_KMMAWT_U, + NDS32_BUILTIN_V_KMMAWT_U, + NDS32_BUILTIN_SMBB, + NDS32_BUILTIN_V_SMBB, + NDS32_BUILTIN_SMBT, + NDS32_BUILTIN_V_SMBT, + NDS32_BUILTIN_SMTT, + NDS32_BUILTIN_V_SMTT, + NDS32_BUILTIN_KMDA, + NDS32_BUILTIN_V_KMDA, + NDS32_BUILTIN_KMXDA, + NDS32_BUILTIN_V_KMXDA, + NDS32_BUILTIN_SMDS, + NDS32_BUILTIN_V_SMDS, + NDS32_BUILTIN_SMDRS, + NDS32_BUILTIN_V_SMDRS, + NDS32_BUILTIN_SMXDS, + NDS32_BUILTIN_V_SMXDS, + NDS32_BUILTIN_KMABB, + NDS32_BUILTIN_V_KMABB, + NDS32_BUILTIN_KMABT, + NDS32_BUILTIN_V_KMABT, + NDS32_BUILTIN_KMATT, + NDS32_BUILTIN_V_KMATT, + NDS32_BUILTIN_KMADA, + NDS32_BUILTIN_V_KMADA, + NDS32_BUILTIN_KMAXDA, + NDS32_BUILTIN_V_KMAXDA, + NDS32_BUILTIN_KMADS, + NDS32_BUILTIN_V_KMADS, + NDS32_BUILTIN_KMADRS, + NDS32_BUILTIN_V_KMADRS, + NDS32_BUILTIN_KMAXDS, + NDS32_BUILTIN_V_KMAXDS, + NDS32_BUILTIN_KMSDA, + NDS32_BUILTIN_V_KMSDA, + NDS32_BUILTIN_KMSXDA, + NDS32_BUILTIN_V_KMSXDA, + NDS32_BUILTIN_SMAL, + NDS32_BUILTIN_V_SMAL, + NDS32_BUILTIN_BITREV, + NDS32_BUILTIN_WEXT, + NDS32_BUILTIN_BPICK, + NDS32_BUILTIN_INSB, + NDS32_BUILTIN_SADD64, + NDS32_BUILTIN_UADD64, + NDS32_BUILTIN_RADD64, + NDS32_BUILTIN_URADD64, + NDS32_BUILTIN_KADD64, + NDS32_BUILTIN_UKADD64, + NDS32_BUILTIN_SSUB64, + NDS32_BUILTIN_USUB64, + NDS32_BUILTIN_RSUB64, + NDS32_BUILTIN_URSUB64, + NDS32_BUILTIN_KSUB64, + NDS32_BUILTIN_UKSUB64, + NDS32_BUILTIN_SMAR64, + NDS32_BUILTIN_SMSR64, + NDS32_BUILTIN_UMAR64, + NDS32_BUILTIN_UMSR64, + NDS32_BUILTIN_KMAR64, + NDS32_BUILTIN_KMSR64, + NDS32_BUILTIN_UKMAR64, + NDS32_BUILTIN_UKMSR64, + NDS32_BUILTIN_SMALBB, + NDS32_BUILTIN_V_SMALBB, + NDS32_BUILTIN_SMALBT, + NDS32_BUILTIN_V_SMALBT, + NDS32_BUILTIN_SMALTT, + NDS32_BUILTIN_V_SMALTT, + NDS32_BUILTIN_SMALDA, + NDS32_BUILTIN_V_SMALDA, + NDS32_BUILTIN_SMALXDA, + NDS32_BUILTIN_V_SMALXDA, + NDS32_BUILTIN_SMALDS, + NDS32_BUILTIN_V_SMALDS, + NDS32_BUILTIN_SMALDRS, + NDS32_BUILTIN_V_SMALDRS, + NDS32_BUILTIN_SMALXDS, + NDS32_BUILTIN_V_SMALXDS, + NDS32_BUILTIN_SMUL16, + NDS32_BUILTIN_V_SMUL16, + NDS32_BUILTIN_SMULX16, + NDS32_BUILTIN_V_SMULX16, + NDS32_BUILTIN_UMUL16, + NDS32_BUILTIN_V_UMUL16, + NDS32_BUILTIN_UMULX16, + NDS32_BUILTIN_V_UMULX16, + NDS32_BUILTIN_SMSLDA, + NDS32_BUILTIN_V_SMSLDA, + NDS32_BUILTIN_SMSLXDA, + NDS32_BUILTIN_V_SMSLXDA, + NDS32_BUILTIN_UCLIP32, + NDS32_BUILTIN_SCLIP32, + NDS32_BUILTIN_KABS, + NDS32_BUILTIN_UALOAD_U16, + NDS32_BUILTIN_UALOAD_S16, + NDS32_BUILTIN_UALOAD_U8, + NDS32_BUILTIN_UALOAD_S8, + NDS32_BUILTIN_UASTORE_U16, + NDS32_BUILTIN_UASTORE_S16, + NDS32_BUILTIN_UASTORE_U8, + NDS32_BUILTIN_UASTORE_S8, + NDS32_BUILTIN_DSP_END, NDS32_BUILTIN_UNALIGNED_FEATURE, NDS32_BUILTIN_ENABLE_UNALIGNED, NDS32_BUILTIN_DISABLE_UNALIGNED, @@ -521,16 +857,30 @@ enum nds32_builtins /* ------------------------------------------------------------------------ */ -#define TARGET_ISA_V2 (nds32_arch_option == ARCH_V2) +#define TARGET_ISR_VECTOR_SIZE_4_BYTE \ + (nds32_isr_vector_size == 4) +#define TARGET_ISA_V2 (nds32_arch_option == ARCH_V2) #define TARGET_ISA_V3 \ (nds32_arch_option == ARCH_V3 \ + || nds32_arch_option == ARCH_V3J \ || nds32_arch_option == ARCH_V3F \ || nds32_arch_option == ARCH_V3S) #define TARGET_ISA_V3M (nds32_arch_option == ARCH_V3M) +#define TARGET_PIPELINE_N7 \ + (nds32_cpu_option == CPU_N7) +#define TARGET_PIPELINE_N8 \ + (nds32_cpu_option == CPU_N6 \ + || nds32_cpu_option == CPU_N8) #define TARGET_PIPELINE_N9 \ (nds32_cpu_option == CPU_N9) +#define TARGET_PIPELINE_N10 \ + (nds32_cpu_option == CPU_N10) +#define TARGET_PIPELINE_N13 \ + (nds32_cpu_option == CPU_N12 || nds32_cpu_option == CPU_N13) +#define TARGET_PIPELINE_GRAYWOLF \ + (nds32_cpu_option == CPU_GRAYWOLF) #define TARGET_PIPELINE_SIMPLE \ (nds32_cpu_option == CPU_SIMPLE) @@ -541,6 +891,12 @@ enum nds32_builtins #define TARGET_CMODEL_LARGE \ (nds32_cmodel_option == CMODEL_LARGE) +#define TARGET_ICT_MODEL_SMALL \ + (nds32_ict_model == ICT_MODEL_SMALL) + +#define TARGET_ICT_MODEL_LARGE \ + (nds32_ict_model == ICT_MODEL_LARGE) + /* When -mcmodel=small or -mcmodel=medium, compiler may generate gp-base instruction directly. */ #define TARGET_GP_DIRECT \ @@ -576,6 +932,21 @@ enum nds32_builtins #endif #define TARGET_CONFIG_FPU_DEFAULT NDS32_CONFIG_FPU_2 + +/* ------------------------------------------------------------------------ */ + +#ifdef TARGET_DEFAULT_RELAX +# define NDS32_RELAX_SPEC " %{!mno-relax:--relax}" +#else +# define NDS32_RELAX_SPEC " %{mrelax:--relax}" +#endif + +#ifdef TARGET_DEFAULT_EXT_DSP +# define NDS32_EXT_DSP_SPEC " %{!mno-ext-dsp:-mext-dsp}" +#else +# define NDS32_EXT_DSP_SPEC "" +#endif + /* ------------------------------------------------------------------------ */ /* Controlling the Compilation Driver. */ @@ -591,11 +962,15 @@ enum nds32_builtins {"float", "%{!mfloat-abi=*:-mfloat-abi=%(VALUE)}" } #define CC1_SPEC \ - "" + NDS32_EXT_DSP_SPEC #define ASM_SPEC \ " %{mbig-endian:-EB} %{mlittle-endian:-EL}" \ " %{march=*:-march=%*}" \ + " %{mno-16-bit|mno-16bit:-mno-16bit-ext}" \ + " %{march=v3m:%{!mfull-regs:%{!mreduced-regs:-mreduced-regs}}}" \ + " %{mfull-regs:-mno-reduced-regs}" \ + " %{mreduced-regs:-mreduced-regs}" \ " %{mabi=*:-mabi=v%*}" \ " %{mconfig-fpu=*:-mfpu-freg=%*}" \ " %{mext-fpu-mac:-mmac}" \ @@ -603,35 +978,9 @@ enum nds32_builtins " %{mext-fpu-sp:-mfpu-sp-ext}" \ " %{mno-ext-fpu-sp:-mno-fpu-sp-ext}" \ " %{mext-fpu-dp:-mfpu-dp-ext}" \ - " %{mno-ext-fpu-sp:-mno-fpu-dp-ext}" - -/* If user issues -mrelax, we need to pass '--relax' to linker. */ -#define LINK_SPEC \ - " %{mbig-endian:-EB} %{mlittle-endian:-EL}" \ - " %{mrelax:--relax}" - -#define LIB_SPEC \ - " -lc -lgloss" - -/* The option -mno-ctor-dtor can disable constructor/destructor feature - by applying different crt stuff. In the convention, crt0.o is the - startup file without constructor/destructor; - crt1.o, crti.o, crtbegin.o, crtend.o, and crtn.o are the - startup files with constructor/destructor. - Note that crt0.o, crt1.o, crti.o, and crtn.o are provided - by newlib/mculib/glibc/ublic, while crtbegin.o and crtend.o are - currently provided by GCC for nds32 target. - - For nds32 target so far: - If -mno-ctor-dtor, we are going to link - "crt0.o [user objects]". - If general cases, we are going to link - "crt1.o crtbegin1.o [user objects] crtend1.o". */ -#define STARTFILE_SPEC \ - " %{!mno-ctor-dtor:crt1.o%s;:crt0.o%s}" \ - " %{!mno-ctor-dtor:crtbegin1.o%s}" -#define ENDFILE_SPEC \ - " %{!mno-ctor-dtor:crtend1.o%s}" + " %{mno-ext-fpu-sp:-mno-fpu-dp-ext}" \ + " %{mext-dsp:-mdsp-ext}" \ + " %{O|O1|O2|O3|Ofast:-O1;:-Os}" /* The TARGET_BIG_ENDIAN_DEFAULT is defined if we configure gcc with --target=nds32be-* setting. @@ -642,9 +991,11 @@ enum nds32_builtins # define NDS32_ENDIAN_DEFAULT "mlittle-endian" #endif -/* Currently we only have elf toolchain, - where -mcmodel=medium is always the default. */ -#define NDS32_CMODEL_DEFAULT "mcmodel=medium" +#if TARGET_ELF +# define NDS32_CMODEL_DEFAULT "mcmodel=medium" +#else +# define NDS32_CMODEL_DEFAULT "mcmodel=large" +#endif #define MULTILIB_DEFAULTS \ { NDS32_ENDIAN_DEFAULT, NDS32_CMODEL_DEFAULT } @@ -1139,12 +1490,17 @@ enum reg_class #define PIC_OFFSET_TABLE_REGNUM GP_REGNUM +#define SYMBOLIC_CONST_P(X) \ +(GET_CODE (X) == SYMBOL_REF \ + || GET_CODE (X) == LABEL_REF \ + || (GET_CODE (X) == CONST && symbolic_reference_mentioned_p (X))) + /* Defining the Output Assembler Language. */ #define ASM_COMMENT_START "!" -#define ASM_APP_ON "! #APP" +#define ASM_APP_ON "! #APP\n" #define ASM_APP_OFF "! #NO_APP\n" diff --git a/gcc/config/nds32/nds32.md b/gcc/config/nds32/nds32.md index 3b8107e..f5349d7 100644 --- a/gcc/config/nds32/nds32.md +++ b/gcc/config/nds32/nds32.md @@ -56,24 +56,29 @@ ;; ------------------------------------------------------------------------ ;; CPU pipeline model. -(define_attr "pipeline_model" "n7,n8,e8,n9,simple" +(define_attr "pipeline_model" "n7,n8,e8,n9,n10,graywolf,n13,simple" (const (cond [(match_test "nds32_cpu_option == CPU_N7") (const_string "n7") (match_test "nds32_cpu_option == CPU_E8") (const_string "e8") (match_test "nds32_cpu_option == CPU_N6 || nds32_cpu_option == CPU_N8") (const_string "n8") (match_test "nds32_cpu_option == CPU_N9") (const_string "n9") + (match_test "nds32_cpu_option == CPU_N10") (const_string "n10") + (match_test "nds32_cpu_option == CPU_GRAYWOLF") (const_string "graywolf") + (match_test "nds32_cpu_option == CPU_N12") (const_string "n13") + (match_test "nds32_cpu_option == CPU_N13") (const_string "n13") (match_test "nds32_cpu_option == CPU_SIMPLE") (const_string "simple")] (const_string "n9")))) ;; Insn type, it is used to default other attribute values. (define_attr "type" "unknown,load,store,load_multiple,store_multiple,alu,alu_shift,pbsad,pbsada,mul,mac,div,branch,mmu,misc,\ - falu,fmuls,fmuld,fmacs,fmacd,fdivs,fdivd,fsqrts,fsqrtd,fcmp,fabs,fcpy,fcmov,fmfsr,fmfdr,fmtsr,fmtdr,fload,fstore" + falu,fmuls,fmuld,fmacs,fmacd,fdivs,fdivd,fsqrts,fsqrtd,fcmp,fabs,fcpy,fcmov,fmfsr,fmfdr,fmtsr,fmtdr,fload,fstore,\ + dalu,dalu64,daluround,dcmp,dclip,dmul,dmac,dinsb,dpack,dbpick,dwext" (const_string "unknown")) ;; Insn sub-type (define_attr "subtype" - "simple,shift" + "simple,shift,saturation" (const_string "simple")) ;; Length, in bytes, default is 4-bytes. @@ -133,6 +138,7 @@ ;; ---------------------------------------------------------------------------- +(include "nds32-dspext.md") ;; Move instructions. @@ -209,6 +215,27 @@ low12_int)); DONE; } + + if ((REG_P (operands[0]) || GET_CODE (operands[0]) == SUBREG) + && SYMBOLIC_CONST_P (operands[1])) + { + if (TARGET_ICT_MODEL_LARGE + && nds32_indirect_call_referenced_p (operands[1])) + { + nds32_expand_ict_move (operands); + DONE; + } + else if (nds32_tls_referenced_p (operands [1])) + { + nds32_expand_tls_move (operands); + DONE; + } + else if (flag_pic) + { + nds32_expand_pic_move (operands); + DONE; + } + } }) (define_insn "*mov" @@ -271,8 +298,8 @@ ;; We use nds32_symbolic_operand to limit that only CONST/SYMBOL_REF/LABEL_REF ;; are able to match such instruction template. (define_insn "move_addr" - [(set (match_operand:SI 0 "register_operand" "=l, r") - (match_operand:SI 1 "nds32_symbolic_operand" " i, i"))] + [(set (match_operand:SI 0 "nds32_general_register_operand" "=l, r") + (match_operand:SI 1 "nds32_nonunspec_symbolic_operand" " i, i"))] "" "la\t%0, %1" [(set_attr "type" "alu") @@ -351,13 +378,58 @@ ;; ---------------------------------------------------------------------------- +(define_expand "extv" + [(set (match_operand 0 "register_operand" "") + (sign_extract (match_operand 1 "nonimmediate_operand" "") + (match_operand 2 "const_int_operand" "") + (match_operand 3 "const_int_operand" "")))] + "" +{ + enum nds32_expand_result_type result = nds32_expand_extv (operands); + switch (result) + { + case EXPAND_DONE: + DONE; + break; + case EXPAND_FAIL: + FAIL; + break; + case EXPAND_CREATE_TEMPLATE: + break; + default: + gcc_unreachable (); + } +}) + +(define_expand "insv" + [(set (zero_extract (match_operand 0 "nonimmediate_operand" "") + (match_operand 1 "const_int_operand" "") + (match_operand 2 "const_int_operand" "")) + (match_operand 3 "register_operand" ""))] + "" +{ + enum nds32_expand_result_type result = nds32_expand_insv (operands); + switch (result) + { + case EXPAND_DONE: + DONE; + break; + case EXPAND_FAIL: + FAIL; + break; + case EXPAND_CREATE_TEMPLATE: + break; + default: + gcc_unreachable (); + } +}) ;; Arithmetic instructions. (define_insn "addsi3" [(set (match_operand:SI 0 "register_operand" "= d, l, d, l, d, l, k, l, r, r") (plus:SI (match_operand:SI 1 "register_operand" "% 0, l, 0, l, 0, l, 0, k, r, r") - (match_operand:SI 2 "nds32_rimm15s_operand" " In05,In03,Iu05,Iu03, r, l,Is10,Iu06, Is15, r")))] + (match_operand:SI 2 "nds32_rimm15s_operand" " In05,In03,Iu05,Iu03, r, l,Is10,IU06, Is15, r")))] "" { switch (which_alternative) @@ -1428,11 +1500,30 @@ (clobber (reg:SI LP_REGNUM)) (clobber (reg:SI TA_REGNUM))])] "" - "" + { + rtx insn; + rtx sym = XEXP (operands[0], 0); + + if (TARGET_ICT_MODEL_LARGE + && nds32_indirect_call_referenced_p (sym)) + { + rtx reg = gen_reg_rtx (Pmode); + emit_move_insn (reg, sym); + operands[0] = gen_const_mem (Pmode, reg); + } + + if (flag_pic) + { + insn = emit_call_insn (gen_call_internal + (XEXP (operands[0], 0), GEN_INT (0))); + use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx); + DONE; + } + } ) (define_insn "call_internal" - [(parallel [(call (mem (match_operand:SI 0 "nds32_call_address_operand" "r, i")) + [(parallel [(call (mem (match_operand:SI 0 "nds32_call_address_operand" "r, S")) (match_operand 1)) (clobber (reg:SI LP_REGNUM)) (clobber (reg:SI TA_REGNUM))])] @@ -1474,9 +1565,11 @@ (const_int 2) (const_int 4)) ;; Alternative 1 - (if_then_else (match_test "nds32_long_call_p (operands[0])") - (const_int 12) - (const_int 4)) + (if_then_else (match_test "flag_pic") + (const_int 16) + (if_then_else (match_test "nds32_long_call_p (operands[0])") + (const_int 12) + (const_int 4))) ])] ) @@ -1492,11 +1585,33 @@ (match_operand 2))) (clobber (reg:SI LP_REGNUM)) (clobber (reg:SI TA_REGNUM))])] - "") + "" + { + rtx insn; + rtx sym = XEXP (operands[1], 0); + + if (TARGET_ICT_MODEL_LARGE + && nds32_indirect_call_referenced_p (sym)) + { + rtx reg = gen_reg_rtx (Pmode); + emit_move_insn (reg, sym); + operands[1] = gen_const_mem (Pmode, reg); + } + + if (flag_pic) + { + insn = + emit_call_insn (gen_call_value_internal + (operands[0], XEXP (operands[1], 0), GEN_INT (0))); + use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx); + DONE; + } + } +) (define_insn "call_value_internal" [(parallel [(set (match_operand 0) - (call (mem (match_operand:SI 1 "nds32_call_address_operand" "r, i")) + (call (mem (match_operand:SI 1 "nds32_call_address_operand" "r, S")) (match_operand 2))) (clobber (reg:SI LP_REGNUM)) (clobber (reg:SI TA_REGNUM))])] @@ -1538,9 +1653,11 @@ (const_int 2) (const_int 4)) ;; Alternative 1 - (if_then_else (match_test "nds32_long_call_p (operands[1])") - (const_int 12) - (const_int 4)) + (if_then_else (match_test "flag_pic") + (const_int 16) + (if_then_else (match_test "nds32_long_call_p (operands[1])") + (const_int 12) + (const_int 4))) ])] ) @@ -1583,10 +1700,21 @@ (const_int 0)) (clobber (reg:SI TA_REGNUM)) (return)])] - "") + "" +{ + rtx sym = XEXP (operands[0], 0); + + if (TARGET_ICT_MODEL_LARGE + && nds32_indirect_call_referenced_p (sym)) + { + rtx reg = gen_reg_rtx (Pmode); + emit_move_insn (reg, sym); + operands[0] = gen_const_mem (Pmode, reg); + } +}) (define_insn "sibcall_internal" - [(parallel [(call (mem (match_operand:SI 0 "nds32_call_address_operand" "r, i")) + [(parallel [(call (mem (match_operand:SI 0 "nds32_call_address_operand" "r, S")) (match_operand 1)) (clobber (reg:SI TA_REGNUM)) (return)])] @@ -1617,9 +1745,11 @@ (const_int 2) (const_int 4)) ;; Alternative 1 - (if_then_else (match_test "nds32_long_call_p (operands[0])") - (const_int 12) - (const_int 4)) + (if_then_else (match_test "flag_pic") + (const_int 16) + (if_then_else (match_test "nds32_long_call_p (operands[0])") + (const_int 12) + (const_int 4))) ])] ) @@ -1633,11 +1763,22 @@ (const_int 0))) (clobber (reg:SI TA_REGNUM)) (return)])] - "") + "" +{ + rtx sym = XEXP (operands[1], 0); + + if (TARGET_ICT_MODEL_LARGE + && nds32_indirect_call_referenced_p (sym)) + { + rtx reg = gen_reg_rtx (Pmode); + emit_move_insn (reg, sym); + operands[1] = gen_const_mem (Pmode, reg); + } +}) (define_insn "sibcall_value_internal" [(parallel [(set (match_operand 0) - (call (mem (match_operand:SI 1 "nds32_call_address_operand" "r, i")) + (call (mem (match_operand:SI 1 "nds32_call_address_operand" "r, S")) (match_operand 2))) (clobber (reg:SI TA_REGNUM)) (return)])] @@ -1668,9 +1809,11 @@ (const_int 2) (const_int 4)) ;; Alternative 1 - (if_then_else (match_test "nds32_long_call_p (operands[1])") - (const_int 12) - (const_int 4)) + (if_then_else (match_test "flag_pic") + (const_int 16) + (if_then_else (match_test "nds32_long_call_p (operands[1])") + (const_int 12) + (const_int 4))) ])] ) @@ -1687,12 +1830,33 @@ nds32_expand_prologue_v3push (); else nds32_expand_prologue (); + + /* If cfun->machine->fp_as_gp_p is true, we can generate special + directive to guide linker doing fp-as-gp optimization. + However, for a naked function, which means + it should not have prologue/epilogue, + using fp-as-gp still requires saving $fp by push/pop behavior and + there is no benefit to use fp-as-gp on such small function. + So we need to make sure this function is NOT naked as well. */ + if (cfun->machine->fp_as_gp_p && !cfun->machine->naked_p) + emit_insn (gen_omit_fp_begin (gen_rtx_REG (SImode, FP_REGNUM))); + DONE; }) (define_expand "epilogue" [(const_int 0)] "" { + /* If cfun->machine->fp_as_gp_p is true, we can generate special + directive to guide linker doing fp-as-gp optimization. + However, for a naked function, which means + it should not have prologue/epilogue, + using fp-as-gp still requires saving $fp by push/pop behavior and + there is no benefit to use fp-as-gp on such small function. + So we need to make sure this function is NOT naked as well. */ + if (cfun->machine->fp_as_gp_p && !cfun->machine->naked_p) + emit_insn (gen_omit_fp_end (gen_rtx_REG (SImode, FP_REGNUM))); + /* Note that only under V3/V3M ISA, we could use v3pop epilogue. In addition, we need to check if v3push is indeed available. */ if (NDS32_V3PUSH_AVAILABLE_P) @@ -1792,7 +1956,8 @@ "nds32_can_use_return_insn ()" { /* Emit as the simple return. */ - if (cfun->machine->naked_p + if (!cfun->machine->fp_as_gp_p + && cfun->machine->naked_p && (cfun->machine->va_args_size == 0)) { emit_jump_insn (gen_return_internal ()); @@ -1802,9 +1967,14 @@ ;; This pattern is expanded only by the shrink-wrapping optimization ;; on paths where the function prologue has not been executed. +;; However, such optimization may reorder the prologue/epilogue blocks +;; together with basic blocks within function body. +;; So we must disable this pattern if we have already decided +;; to perform fp_as_gp optimization, which requires prologue to be +;; first block and epilogue to be last block. (define_expand "simple_return" [(simple_return)] - "" + "!cfun->machine->fp_as_gp_p" "" ) @@ -1823,6 +1993,9 @@ [(simple_return)] "" { + if (nds32_isr_function_critical_p (current_function_decl)) + return "iret"; + if (TARGET_16_BIT) return "ret5"; else @@ -1831,9 +2004,11 @@ [(set_attr "type" "branch") (set_attr "enabled" "yes") (set (attr "length") - (if_then_else (match_test "TARGET_16_BIT") - (const_int 2) - (const_int 4)))]) + (if_then_else (match_test "nds32_isr_function_critical_p (current_function_decl)") + (const_int 4) + (if_then_else (match_test "TARGET_16_BIT") + (const_int 2) + (const_int 4))))]) ;; ---------------------------------------------------------------------------- @@ -1868,6 +2043,7 @@ { rtx add_tmp; rtx reg, test; + rtx tmp_reg; /* Step A: "k <-- (plus (operands[0]) (-operands[1]))". */ if (operands[1] != const0_rtx) @@ -1889,9 +2065,14 @@ emit_jump_insn (gen_cbranchsi4 (test, operands[0], operands[2], operands[4])); - /* Step C, D, E, and F, using another temporary register. */ - rtx tmp = gen_reg_rtx (SImode); - emit_jump_insn (gen_casesi_internal (operands[0], operands[3], tmp)); + tmp_reg = gen_reg_rtx (SImode); + /* Step C, D, E, and F, using another temporary register tmp_reg. */ + if (flag_pic) + emit_use (pic_offset_table_rtx); + + emit_jump_insn (gen_casesi_internal (operands[0], + operands[3], + tmp_reg)); DONE; }) @@ -1927,13 +2108,30 @@ else return nds32_output_casesi (operands); } - [(set_attr "length" "20") - (set_attr "type" "branch")]) + [(set_attr "type" "branch") + (set (attr "length") + (if_then_else (match_test "flag_pic") + (const_int 28) + (const_int 20)))]) ;; ---------------------------------------------------------------------------- ;; Performance Extension +; If -fwrapv option is issued, GCC expects there will be +; signed overflow situation. So the ABS(INT_MIN) is still INT_MIN +; (e.g. ABS(0x80000000)=0x80000000). +; However, the hardware ABS instruction of nds32 target +; always performs saturation: abs 0x80000000 -> 0x7fffffff. +; So that we can only enable abssi2 pattern if flag_wrapv is NOT presented. +(define_insn "abssi2" + [(set (match_operand:SI 0 "register_operand" "=r") + (abs:SI (match_operand:SI 1 "register_operand" " r")))] + "TARGET_EXT_PERF && TARGET_HW_ABS && !flag_wrapv" + "abs\t%0, %1" + [(set_attr "type" "alu") + (set_attr "length" "4")]) + (define_insn "clzsi2" [(set (match_operand:SI 0 "register_operand" "=r") (clz:SI (match_operand:SI 1 "register_operand" " r")))] @@ -1996,6 +2194,25 @@ [(set_attr "length" "0")] ) +;; Output .omit_fp_begin for fp-as-gp optimization. +;; Also we have to set $fp register. +(define_insn "omit_fp_begin" + [(set (match_operand:SI 0 "register_operand" "=x") + (unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_OMIT_FP_BEGIN))] + "" + "! -----\;.omit_fp_begin\;la\t$fp,_FP_BASE_\;! -----" + [(set_attr "length" "8")] +) + +;; Output .omit_fp_end for fp-as-gp optimization. +;; Claim that we have to use $fp register. +(define_insn "omit_fp_end" + [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "x")] UNSPEC_VOLATILE_OMIT_FP_END)] + "" + "! -----\;.omit_fp_end\;! -----" + [(set_attr "length" "0")] +) + (define_insn "pop25return" [(return) (unspec_volatile:SI [(reg:SI LP_REGNUM)] UNSPEC_VOLATILE_POP25_RETURN)] @@ -2004,6 +2221,36 @@ [(set_attr "length" "0")] ) +;; Add pc +(define_insn "add_pc" + [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI (match_operand:SI 1 "register_operand" "0") + (pc)))] + "TARGET_LINUX_ABI || flag_pic" + "add5.pc\t%0" + [(set_attr "type" "alu") + (set_attr "length" "4")] +) + +(define_expand "bswapsi2" + [(set (match_operand:SI 0 "register_operand" "=r") + (bswap:SI (match_operand:SI 1 "register_operand" "r")))] + "" +{ + emit_insn (gen_unspec_wsbh (operands[0], operands[1])); + emit_insn (gen_rotrsi3 (operands[0], operands[0], GEN_INT (16))); + DONE; +}) + +(define_insn "bswaphi2" + [(set (match_operand:HI 0 "register_operand" "=r") + (bswap:HI (match_operand:HI 1 "register_operand" "r")))] + "" + "wsbh\t%0, %1" + [(set_attr "type" "alu") + (set_attr "length" "4")] +) + ;; ---------------------------------------------------------------------------- ;; Patterns for exception handling @@ -2068,3 +2315,57 @@ }) ;; ---------------------------------------------------------------------------- + +;; Patterns for TLS. +;; The following two tls patterns don't be expanded directly because the +;; intermediate value may be spilled into the stack. As a result, it is +;; hard to analyze the define-use chain in the relax_opt pass. + + +;; There is a unspec operand to record RELAX_GROUP number because each +;; emitted instruction need a relax_hint above it. +(define_insn "tls_desc" + [(set (reg:SI 0) + (call (unspec_volatile:SI [(match_operand:SI 0 "nds32_symbolic_operand" "i")] UNSPEC_TLS_DESC) + (const_int 1))) + (use (unspec [(match_operand:SI 1 "immediate_operand" "i")] UNSPEC_VOLATILE_RELAX_GROUP)) + (use (reg:SI GP_REGNUM)) + (clobber (reg:SI LP_REGNUM)) + (clobber (reg:SI TA_REGNUM))] + "" + { + return nds32_output_tls_desc (operands); + } + [(set_attr "length" "20") + (set_attr "type" "branch")] +) + +;; There is a unspec operand to record RELAX_GROUP number because each +;; emitted instruction need a relax_hint above it. +(define_insn "tls_ie" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand:SI 1 "nds32_symbolic_operand" "i")] UNSPEC_TLS_IE)) + (use (unspec [(match_operand:SI 2 "immediate_operand" "i")] UNSPEC_VOLATILE_RELAX_GROUP)) + (use (reg:SI GP_REGNUM))] + "" + { + return nds32_output_tls_ie (operands); + } + [(set (attr "length") (if_then_else (match_test "flag_pic") + (const_int 12) + (const_int 8))) + (set_attr "type" "misc")] +) + +;; The pattern is for some relaxation groups that have to keep addsi3 in 32-bit mode. +(define_insn "addsi3_32bit" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand:SI 1 "register_operand" "%r") + (match_operand:SI 2 "register_operand" " r")] UNSPEC_ADD32))] + "" + "add\t%0, %1, %2"; + [(set_attr "type" "alu") + (set_attr "length" "4") + (set_attr "feature" "v1")]) + +;; ---------------------------------------------------------------------------- diff --git a/gcc/config/nds32/nds32.opt b/gcc/config/nds32/nds32.opt index dcf6d39..0e50c99 100644 --- a/gcc/config/nds32/nds32.opt +++ b/gcc/config/nds32/nds32.opt @@ -32,6 +32,13 @@ EL Target RejectNegative Alias(mlittle-endian) Generate code in little-endian mode. +mfp-as-gp +Target RejectNegative Alias(mforce-fp-as-gp) +Force performing fp-as-gp optimization. + +mno-fp-as-gp +Target RejectNegative Alias(mforbid-fp-as-gp) +Forbid performing fp-as-gp optimization. ; --------------------------------------------------------------- @@ -85,11 +92,36 @@ mlittle-endian Target Undocumented RejectNegative Negative(mbig-endian) InverseMask(BIG_ENDIAN) Generate code in little-endian mode. +mforce-fp-as-gp +Target Undocumented Mask(FORCE_FP_AS_GP) +Prevent $fp being allocated during register allocation so that compiler is able to force performing fp-as-gp optimization. + +mforbid-fp-as-gp +Target Undocumented Mask(FORBID_FP_AS_GP) +Forbid using $fp to access static and global variables. This option strictly forbids fp-as-gp optimization regardless of '-mforce-fp-as-gp'. + +mict-model= +Target Undocumented RejectNegative Joined Enum(nds32_ict_model_type) Var(nds32_ict_model) Init(ICT_MODEL_SMALL) +Specify the address generation strategy for ICT call's code model. + +Enum +Name(nds32_ict_model_type) Type(enum nds32_ict_model_type) +Known cmodel types (for use with the -mict-model= option): + +EnumValue +Enum(nds32_ict_model_type) String(small) Value(ICT_MODEL_SMALL) + +EnumValue +Enum(nds32_ict_model_type) String(large) Value(ICT_MODEL_LARGE) mcmov Target Report Mask(CMOV) Generate conditional move instructions. +mhw-abs +Target Report Mask(HW_ABS) +Generate hardware abs instructions. + mext-perf Target Report Mask(EXT_PERF) Generate performance extension instructions. @@ -102,6 +134,10 @@ mext-string Target Report Mask(EXT_STRING) Generate string extension instructions. +mext-dsp +Target Report Mask(EXT_DSP) +Generate DSP extension instructions. + mv3push Target Report Mask(V3PUSH) Generate v3 push25/pop25 instructions. @@ -115,13 +151,17 @@ Target Report Mask(RELAX_HINT) Insert relax hint for linker to do relaxation. mvh -Target Report Mask(VH) +Target Report Mask(VH) Condition(!TARGET_LINUX_ABI) Enable Virtual Hosting support. misr-vector-size= Target RejectNegative Joined UInteger Var(nds32_isr_vector_size) Init(NDS32_DEFAULT_ISR_VECTOR_SIZE) Specify the size of each interrupt vector, which must be 4 or 16. +misr-secure= +Target RejectNegative Joined UInteger Var(nds32_isr_secure_level) Init(0) +Specify the security level of c-isr for the whole file. + mcache-block-size= Target RejectNegative Joined UInteger Var(nds32_cache_block_size) Init(NDS32_DEFAULT_CACHE_BLOCK_SIZE) Specify the size of each cache block, which must be a power of 2 between 4 and 512. @@ -141,6 +181,9 @@ EnumValue Enum(nds32_arch_type) String(v3) Value(ARCH_V3) EnumValue +Enum(nds32_arch_type) String(v3j) Value(ARCH_V3J) + +EnumValue Enum(nds32_arch_type) String(v3m) Value(ARCH_V3M) EnumValue @@ -149,23 +192,6 @@ Enum(nds32_arch_type) String(v3f) Value(ARCH_V3F) EnumValue Enum(nds32_arch_type) String(v3s) Value(ARCH_V3S) -mcmodel= -Target RejectNegative Joined Enum(nds32_cmodel_type) Var(nds32_cmodel_option) Init(CMODEL_LARGE) -Specify the address generation strategy for code model. - -Enum -Name(nds32_cmodel_type) Type(enum nds32_cmodel_type) -Known cmodel types (for use with the -mcmodel= option): - -EnumValue -Enum(nds32_cmodel_type) String(small) Value(CMODEL_SMALL) - -EnumValue -Enum(nds32_cmodel_type) String(medium) Value(CMODEL_MEDIUM) - -EnumValue -Enum(nds32_cmodel_type) String(large) Value(CMODEL_LARGE) - mcpu= Target RejectNegative Joined Enum(nds32_cpu_type) Var(nds32_cpu_option) Init(CPU_N9) Specify the cpu for pipeline model. @@ -235,6 +261,99 @@ EnumValue Enum(nds32_cpu_type) String(n968a) Value(CPU_N9) EnumValue +Enum(nds32_cpu_type) String(n10) Value(CPU_N10) + +EnumValue +Enum(nds32_cpu_type) String(n1033) Value(CPU_N10) + +EnumValue +Enum(nds32_cpu_type) String(n1033a) Value(CPU_N10) + +EnumValue +Enum(nds32_cpu_type) String(n1033-fpu) Value(CPU_N10) + +EnumValue +Enum(nds32_cpu_type) String(n1033-spu) Value(CPU_N10) + +EnumValue +Enum(nds32_cpu_type) String(n1068) Value(CPU_N10) + +EnumValue +Enum(nds32_cpu_type) String(n1068a) Value(CPU_N10) + +EnumValue +Enum(nds32_cpu_type) String(n1068-fpu) Value(CPU_N10) + +EnumValue +Enum(nds32_cpu_type) String(n1068a-fpu) Value(CPU_N10) + +EnumValue +Enum(nds32_cpu_type) String(n1068-spu) Value(CPU_N10) + +EnumValue +Enum(nds32_cpu_type) String(n1068a-spu) Value(CPU_N10) + +EnumValue +Enum(nds32_cpu_type) String(d10) Value(CPU_N10) + +EnumValue +Enum(nds32_cpu_type) String(d1088) Value(CPU_N10) + +EnumValue +Enum(nds32_cpu_type) String(d1088-fpu) Value(CPU_N10) + +EnumValue +Enum(nds32_cpu_type) String(d1088-spu) Value(CPU_N10) + +EnumValue +Enum(nds32_cpu_type) Undocumented String(graywolf) Value(CPU_GRAYWOLF) + +EnumValue +Enum(nds32_cpu_type) String(n15) Value(CPU_GRAYWOLF) + +EnumValue +Enum(nds32_cpu_type) String(d15) Value(CPU_GRAYWOLF) + +EnumValue +Enum(nds32_cpu_type) String(n15s) Value(CPU_GRAYWOLF) + +EnumValue +Enum(nds32_cpu_type) String(d15s) Value(CPU_GRAYWOLF) + +EnumValue +Enum(nds32_cpu_type) String(n15f) Value(CPU_GRAYWOLF) + +EnumValue +Enum(nds32_cpu_type) String(d15f) Value(CPU_GRAYWOLF) + +EnumValue +Enum(nds32_cpu_type) String(n12) Value(CPU_N12) + +EnumValue +Enum(nds32_cpu_type) String(n1213) Value(CPU_N12) + +EnumValue +Enum(nds32_cpu_type) String(n1233) Value(CPU_N12) + +EnumValue +Enum(nds32_cpu_type) String(n1233-fpu) Value(CPU_N12) + +EnumValue +Enum(nds32_cpu_type) String(n1233-spu) Value(CPU_N12) + +EnumValue +Enum(nds32_cpu_type) String(n13) Value(CPU_N13) + +EnumValue +Enum(nds32_cpu_type) String(n1337) Value(CPU_N13) + +EnumValue +Enum(nds32_cpu_type) String(n1337-fpu) Value(CPU_N13) + +EnumValue +Enum(nds32_cpu_type) String(n1337-spu) Value(CPU_N13) + +EnumValue Enum(nds32_cpu_type) String(simple) Value(CPU_SIMPLE) mconfig-fpu= @@ -321,6 +440,18 @@ mext-fpu-dp Target Report Mask(FPU_DOUBLE) Generate double-precision floating-point instructions. +mforce-no-ext-dsp +Target Undocumented Report Mask(FORCE_NO_EXT_DSP) +Force disable hardware loop, even use -mext-dsp. + +msched-prolog-epilog +Target Var(flag_sched_prolog_epilog) Init(0) +Permit scheduling of a function's prologue and epilogue sequence. + +mret-in-naked-func +Target Var(flag_ret_in_naked_func) Init(1) +Generate return instruction in naked function. + malways-save-lp Target Var(flag_always_save_lp) Init(0) Always save $lp in the stack. @@ -328,3 +459,7 @@ Always save $lp in the stack. munaligned-access Target Report Var(flag_unaligned_access) Init(0) Enable unaligned word and halfword accesses to packed data. + +minline-asm-r15 +Target Report Var(flag_inline_asm_r15) Init(0) +Allow use r15 for inline ASM. diff --git a/gcc/config/nds32/nds32_init.inc b/gcc/config/nds32/nds32_init.inc new file mode 100644 index 0000000..1084ad0 --- /dev/null +++ b/gcc/config/nds32/nds32_init.inc @@ -0,0 +1,43 @@ +/* + * nds32_init.inc + * + * NDS32 architecture startup assembler header file + * + */ + +.macro nds32_init + + ! Initialize GP for data access + la $gp, _SDA_BASE_ + +#if defined(__NDS32_EXT_EX9__) + ! Check HW for EX9 + mfsr $r0, $MSC_CFG + li $r1, (1 << 24) + and $r2, $r0, $r1 + beqz $r2, 1f + + ! Initialize the table base of EX9 instruction + la $r0, _ITB_BASE_ + mtusr $r0, $ITB +1: +#endif + +#if defined(__NDS32_EXT_FPU_DP__) || defined(__NDS32_EXT_FPU_SP__) + ! Enable FPU + mfsr $r0, $FUCOP_CTL + ori $r0, $r0, #0x1 + mtsr $r0, $FUCOP_CTL + dsb + + ! Enable denormalized flush-to-Zero mode + fmfcsr $r0 + ori $r0,$r0,#0x1000 + fmtcsr $r0 + dsb +#endif + + ! Initialize default stack pointer + la $sp, _stack + +.endm diff --git a/gcc/config/nds32/nds32_intrinsic.h b/gcc/config/nds32/nds32_intrinsic.h index 7bb1177..24cb291 100644 --- a/gcc/config/nds32/nds32_intrinsic.h +++ b/gcc/config/nds32/nds32_intrinsic.h @@ -26,6 +26,13 @@ #ifndef _NDS32_INTRINSIC_H #define _NDS32_INTRINSIC_H +typedef signed char int8x4_t __attribute ((vector_size(4))); +typedef short int16x2_t __attribute ((vector_size(4))); +typedef int int32x2_t __attribute__((vector_size(8))); +typedef unsigned char uint8x4_t __attribute__ ((vector_size (4))); +typedef unsigned short uint16x2_t __attribute__ ((vector_size (4))); +typedef unsigned int uint32x2_t __attribute__((vector_size(8))); + /* General instrinsic register names. */ enum nds32_intrinsic_registers { @@ -691,6 +698,55 @@ enum nds32_dpref #define __nds32__tlbop_flua() \ (__builtin_nds32_tlbop_flua()) +#define __nds32__kaddw(a, b) \ + (__builtin_nds32_kaddw ((a), (b))) +#define __nds32__kaddh(a, b) \ + (__builtin_nds32_kaddh ((a), (b))) +#define __nds32__ksubw(a, b) \ + (__builtin_nds32_ksubw ((a), (b))) +#define __nds32__ksubh(a, b) \ + (__builtin_nds32_ksubh ((a), (b))) +#define __nds32__kdmbb(a, b) \ + (__builtin_nds32_kdmbb ((a), (b))) +#define __nds32__v_kdmbb(a, b) \ + (__builtin_nds32_v_kdmbb ((a), (b))) +#define __nds32__kdmbt(a, b) \ + (__builtin_nds32_kdmbt ((a), (b))) +#define __nds32__v_kdmbt(a, b) \ + (__builtin_nds32_v_kdmbt ((a), (b))) +#define __nds32__kdmtb(a, b) \ + (__builtin_nds32_kdmtb ((a), (b))) +#define __nds32__v_kdmtb(a, b) \ + (__builtin_nds32_v_kdmtb ((a), (b))) +#define __nds32__kdmtt(a, b) \ + (__builtin_nds32_kdmtt ((a), (b))) +#define __nds32__v_kdmtt(a, b) \ + (__builtin_nds32_v_kdmtt ((a), (b))) +#define __nds32__khmbb(a, b) \ + (__builtin_nds32_khmbb ((a), (b))) +#define __nds32__v_khmbb(a, b) \ + (__builtin_nds32_v_khmbb ((a), (b))) +#define __nds32__khmbt(a, b) \ + (__builtin_nds32_khmbt ((a), (b))) +#define __nds32__v_khmbt(a, b) \ + (__builtin_nds32_v_khmbt ((a), (b))) +#define __nds32__khmtb(a, b) \ + (__builtin_nds32_khmtb ((a), (b))) +#define __nds32__v_khmtb(a, b) \ + (__builtin_nds32_v_khmtb ((a), (b))) +#define __nds32__khmtt(a, b) \ + (__builtin_nds32_khmtt ((a), (b))) +#define __nds32__v_khmtt(a, b) \ + (__builtin_nds32_v_khmtt ((a), (b))) +#define __nds32__kslraw(a, b) \ + (__builtin_nds32_kslraw ((a), (b))) +#define __nds32__kslraw_u(a, b) \ + (__builtin_nds32_kslraw_u ((a), (b))) + +#define __nds32__rdov() \ + (__builtin_nds32_rdov()) +#define __nds32__clrov() \ + (__builtin_nds32_clrov()) #define __nds32__gie_dis() \ (__builtin_nds32_gie_dis()) #define __nds32__gie_en() \ @@ -720,10 +776,622 @@ enum nds32_dpref #define __nds32__get_trig_type(a) \ (__builtin_nds32_get_trig_type ((a))) +#define __nds32__get_unaligned_hw(a) \ + (__builtin_nds32_unaligned_load_hw ((a))) +#define __nds32__get_unaligned_w(a) \ + (__builtin_nds32_unaligned_load_w ((a))) +#define __nds32__get_unaligned_dw(a) \ + (__builtin_nds32_unaligned_load_dw ((a))) +#define __nds32__put_unaligned_hw(a, data) \ + (__builtin_nds32_unaligned_store_hw ((a), (data))) +#define __nds32__put_unaligned_w(a, data) \ + (__builtin_nds32_unaligned_store_w ((a), (data))) +#define __nds32__put_unaligned_dw(a, data) \ + (__builtin_nds32_unaligned_store_dw ((a), (data))) + +#define __nds32__add16(a, b) \ + (__builtin_nds32_add16 ((a), (b))) +#define __nds32__v_uadd16(a, b) \ + (__builtin_nds32_v_uadd16 ((a), (b))) +#define __nds32__v_sadd16(a, b) \ + (__builtin_nds32_v_sadd16 ((a), (b))) +#define __nds32__radd16(a, b) \ + (__builtin_nds32_radd16 ((a), (b))) +#define __nds32__v_radd16(a, b) \ + (__builtin_nds32_v_radd16 ((a), (b))) +#define __nds32__uradd16(a, b) \ + (__builtin_nds32_uradd16 ((a), (b))) +#define __nds32__v_uradd16(a, b) \ + (__builtin_nds32_v_uradd16 ((a), (b))) +#define __nds32__kadd16(a, b) \ + (__builtin_nds32_kadd16 ((a), (b))) +#define __nds32__v_kadd16(a, b) \ + (__builtin_nds32_v_kadd16 ((a), (b))) +#define __nds32__ukadd16(a, b) \ + (__builtin_nds32_ukadd16 ((a), (b))) +#define __nds32__v_ukadd16(a, b) \ + (__builtin_nds32_v_ukadd16 ((a), (b))) +#define __nds32__sub16(a, b) \ + (__builtin_nds32_sub16 ((a), (b))) +#define __nds32__v_usub16(a, b) \ + (__builtin_nds32_v_usub16 ((a), (b))) +#define __nds32__v_ssub16(a, b) \ + (__builtin_nds32_v_ssub16 ((a), (b))) +#define __nds32__rsub16(a, b) \ + (__builtin_nds32_rsub16 ((a), (b))) +#define __nds32__v_rsub16(a, b) \ + (__builtin_nds32_v_rsub16 ((a), (b))) +#define __nds32__ursub16(a, b) \ + (__builtin_nds32_ursub16 ((a), (b))) +#define __nds32__v_ursub16(a, b) \ + (__builtin_nds32_v_ursub16 ((a), (b))) +#define __nds32__ksub16(a, b) \ + (__builtin_nds32_ksub16 ((a), (b))) +#define __nds32__v_ksub16(a, b) \ + (__builtin_nds32_v_ksub16 ((a), (b))) +#define __nds32__uksub16(a, b) \ + (__builtin_nds32_uksub16 ((a), (b))) +#define __nds32__v_uksub16(a, b) \ + (__builtin_nds32_v_uksub16 ((a), (b))) +#define __nds32__cras16(a, b) \ + (__builtin_nds32_cras16 ((a), (b))) +#define __nds32__v_ucras16(a, b) \ + (__builtin_nds32_v_ucras16 ((a), (b))) +#define __nds32__v_scras16(a, b) \ + (__builtin_nds32_v_scras16 ((a), (b))) +#define __nds32__rcras16(a, b) \ + (__builtin_nds32_rcras16 ((a), (b))) +#define __nds32__v_rcras16(a, b) \ + (__builtin_nds32_v_rcras16 ((a), (b))) +#define __nds32__urcras16(a, b) \ + (__builtin_nds32_urcras16 ((a), (b))) +#define __nds32__v_urcras16(a, b) \ + (__builtin_nds32_v_urcras16 ((a), (b))) +#define __nds32__kcras16(a, b) \ + (__builtin_nds32_kcras16 ((a), (b))) +#define __nds32__v_kcras16(a, b) \ + (__builtin_nds32_v_kcras16 ((a), (b))) +#define __nds32__ukcras16(a, b) \ + (__builtin_nds32_ukcras16 ((a), (b))) +#define __nds32__v_ukcras16(a, b) \ + (__builtin_nds32_v_ukcras16 ((a), (b))) +#define __nds32__crsa16(a, b) \ + (__builtin_nds32_crsa16 ((a), (b))) +#define __nds32__v_ucrsa16(a, b) \ + (__builtin_nds32_v_ucrsa16 ((a), (b))) +#define __nds32__v_scrsa16(a, b) \ + (__builtin_nds32_v_scrsa16 ((a), (b))) +#define __nds32__rcrsa16(a, b) \ + (__builtin_nds32_rcrsa16 ((a), (b))) +#define __nds32__v_rcrsa16(a, b) \ + (__builtin_nds32_v_rcrsa16 ((a), (b))) +#define __nds32__urcrsa16(a, b) \ + (__builtin_nds32_urcrsa16 ((a), (b))) +#define __nds32__v_urcrsa16(a, b) \ + (__builtin_nds32_v_urcrsa16 ((a), (b))) +#define __nds32__kcrsa16(a, b) \ + (__builtin_nds32_kcrsa16 ((a), (b))) +#define __nds32__v_kcrsa16(a, b) \ + (__builtin_nds32_v_kcrsa16 ((a), (b))) +#define __nds32__ukcrsa16(a, b) \ + (__builtin_nds32_ukcrsa16 ((a), (b))) +#define __nds32__v_ukcrsa16(a, b) \ + (__builtin_nds32_v_ukcrsa16 ((a), (b))) + +#define __nds32__add8(a, b) \ + (__builtin_nds32_add8 ((a), (b))) +#define __nds32__v_uadd8(a, b) \ + (__builtin_nds32_v_uadd8 ((a), (b))) +#define __nds32__v_sadd8(a, b) \ + (__builtin_nds32_v_sadd8 ((a), (b))) +#define __nds32__radd8(a, b) \ + (__builtin_nds32_radd8 ((a), (b))) +#define __nds32__v_radd8(a, b) \ + (__builtin_nds32_v_radd8 ((a), (b))) +#define __nds32__uradd8(a, b) \ + (__builtin_nds32_uradd8 ((a), (b))) +#define __nds32__v_uradd8(a, b) \ + (__builtin_nds32_v_uradd8 ((a), (b))) +#define __nds32__kadd8(a, b) \ + (__builtin_nds32_kadd8 ((a), (b))) +#define __nds32__v_kadd8(a, b) \ + (__builtin_nds32_v_kadd8 ((a), (b))) +#define __nds32__ukadd8(a, b) \ + (__builtin_nds32_ukadd8 ((a), (b))) +#define __nds32__v_ukadd8(a, b) \ + (__builtin_nds32_v_ukadd8 ((a), (b))) +#define __nds32__sub8(a, b) \ + (__builtin_nds32_sub8 ((a), (b))) +#define __nds32__v_usub8(a, b) \ + (__builtin_nds32_v_usub8 ((a), (b))) +#define __nds32__v_ssub8(a, b) \ + (__builtin_nds32_v_ssub8 ((a), (b))) +#define __nds32__rsub8(a, b) \ + (__builtin_nds32_rsub8 ((a), (b))) +#define __nds32__v_rsub8(a, b) \ + (__builtin_nds32_v_rsub8 ((a), (b))) +#define __nds32__ursub8(a, b) \ + (__builtin_nds32_ursub8 ((a), (b))) +#define __nds32__v_ursub8(a, b) \ + (__builtin_nds32_v_ursub8 ((a), (b))) +#define __nds32__ksub8(a, b) \ + (__builtin_nds32_ksub8 ((a), (b))) +#define __nds32__v_ksub8(a, b) \ + (__builtin_nds32_v_ksub8 ((a), (b))) +#define __nds32__uksub8(a, b) \ + (__builtin_nds32_uksub8 ((a), (b))) +#define __nds32__v_uksub8(a, b) \ + (__builtin_nds32_v_uksub8 ((a), (b))) + +#define __nds32__sra16(a, b) \ + (__builtin_nds32_sra16 ((a), (b))) +#define __nds32__v_sra16(a, b) \ + (__builtin_nds32_v_sra16 ((a), (b))) +#define __nds32__sra16_u(a, b) \ + (__builtin_nds32_sra16_u ((a), (b))) +#define __nds32__v_sra16_u(a, b) \ + (__builtin_nds32_v_sra16_u ((a), (b))) +#define __nds32__srl16(a, b) \ + (__builtin_nds32_srl16 ((a), (b))) +#define __nds32__v_srl16(a, b) \ + (__builtin_nds32_v_srl16 ((a), (b))) +#define __nds32__srl16_u(a, b) \ + (__builtin_nds32_srl16_u ((a), (b))) +#define __nds32__v_srl16_u(a, b) \ + (__builtin_nds32_v_srl16_u ((a), (b))) +#define __nds32__sll16(a, b) \ + (__builtin_nds32_sll16 ((a), (b))) +#define __nds32__v_sll16(a, b) \ + (__builtin_nds32_v_sll16 ((a), (b))) +#define __nds32__ksll16(a, b) \ + (__builtin_nds32_ksll16 ((a), (b))) +#define __nds32__v_ksll16(a, b) \ + (__builtin_nds32_v_ksll16 ((a), (b))) +#define __nds32__kslra16(a, b) \ + (__builtin_nds32_kslra16 ((a), (b))) +#define __nds32__v_kslra16(a, b) \ + (__builtin_nds32_v_kslra16 ((a), (b))) +#define __nds32__kslra16_u(a, b) \ + (__builtin_nds32_kslra16_u ((a), (b))) +#define __nds32__v_kslra16_u(a, b) \ + (__builtin_nds32_v_kslra16_u ((a), (b))) + +#define __nds32__cmpeq16(a, b) \ + (__builtin_nds32_cmpeq16 ((a), (b))) +#define __nds32__v_scmpeq16(a, b) \ + (__builtin_nds32_v_scmpeq16 ((a), (b))) +#define __nds32__v_ucmpeq16(a, b) \ + (__builtin_nds32_v_ucmpeq16 ((a), (b))) +#define __nds32__scmplt16(a, b) \ + (__builtin_nds32_scmplt16 ((a), (b))) +#define __nds32__v_scmplt16(a, b) \ + (__builtin_nds32_v_scmplt16 ((a), (b))) +#define __nds32__scmple16(a, b) \ + (__builtin_nds32_scmple16 ((a), (b))) +#define __nds32__v_scmple16(a, b) \ + (__builtin_nds32_v_scmple16 ((a), (b))) +#define __nds32__ucmplt16(a, b) \ + (__builtin_nds32_ucmplt16 ((a), (b))) +#define __nds32__v_ucmplt16(a, b) \ + (__builtin_nds32_v_ucmplt16 ((a), (b))) +#define __nds32__ucmple16(a, b) \ + (__builtin_nds32_ucmple16 ((a), (b))) +#define __nds32__v_ucmple16(a, b) \ + (__builtin_nds32_v_ucmple16 ((a), (b))) + +#define __nds32__cmpeq8(a, b) \ + (__builtin_nds32_cmpeq8 ((a), (b))) +#define __nds32__v_scmpeq8(a, b) \ + (__builtin_nds32_v_scmpeq8 ((a), (b))) +#define __nds32__v_ucmpeq8(a, b) \ + (__builtin_nds32_v_ucmpeq8 ((a), (b))) +#define __nds32__scmplt8(a, b) \ + (__builtin_nds32_scmplt8 ((a), (b))) +#define __nds32__v_scmplt8(a, b) \ + (__builtin_nds32_v_scmplt8 ((a), (b))) +#define __nds32__scmple8(a, b) \ + (__builtin_nds32_scmple8 ((a), (b))) +#define __nds32__v_scmple8(a, b) \ + (__builtin_nds32_v_scmple8 ((a), (b))) +#define __nds32__ucmplt8(a, b) \ + (__builtin_nds32_ucmplt8 ((a), (b))) +#define __nds32__v_ucmplt8(a, b) \ + (__builtin_nds32_v_ucmplt8 ((a), (b))) +#define __nds32__ucmple8(a, b) \ + (__builtin_nds32_ucmple8 ((a), (b))) +#define __nds32__v_ucmple8(a, b) \ + (__builtin_nds32_v_ucmple8 ((a), (b))) + +#define __nds32__smin16(a, b) \ + (__builtin_nds32_smin16 ((a), (b))) +#define __nds32__v_smin16(a, b) \ + (__builtin_nds32_v_smin16 ((a), (b))) +#define __nds32__umin16(a, b) \ + (__builtin_nds32_umin16 ((a), (b))) +#define __nds32__v_umin16(a, b) \ + (__builtin_nds32_v_umin16 ((a), (b))) +#define __nds32__smax16(a, b) \ + (__builtin_nds32_smax16 ((a), (b))) +#define __nds32__v_smax16(a, b) \ + (__builtin_nds32_v_smax16 ((a), (b))) +#define __nds32__umax16(a, b) \ + (__builtin_nds32_umax16 ((a), (b))) +#define __nds32__v_umax16(a, b) \ + (__builtin_nds32_v_umax16 ((a), (b))) +#define __nds32__sclip16(a, b) \ + (__builtin_nds32_sclip16 ((a), (b))) +#define __nds32__v_sclip16(a, b) \ + (__builtin_nds32_v_sclip16 ((a), (b))) +#define __nds32__uclip16(a, b) \ + (__builtin_nds32_uclip16 ((a), (b))) +#define __nds32__v_uclip16(a, b) \ + (__builtin_nds32_v_uclip16 ((a), (b))) +#define __nds32__khm16(a, b) \ + (__builtin_nds32_khm16 ((a), (b))) +#define __nds32__v_khm16(a, b) \ + (__builtin_nds32_v_khm16 ((a), (b))) +#define __nds32__khmx16(a, b) \ + (__builtin_nds32_khmx16 ((a), (b))) +#define __nds32__v_khmx16(a, b) \ + (__builtin_nds32_v_khmx16 ((a), (b))) +#define __nds32__kabs16(a) \ + (__builtin_nds32_kabs16 ((a))) +#define __nds32__v_kabs16(a) \ + (__builtin_nds32_v_kabs16 ((a))) + +#define __nds32__smin8(a, b) \ + (__builtin_nds32_smin8 ((a), (b))) +#define __nds32__v_smin8(a, b) \ + (__builtin_nds32_v_smin8 ((a), (b))) +#define __nds32__umin8(a, b) \ + (__builtin_nds32_umin8 ((a), (b))) +#define __nds32__v_umin8(a, b) \ + (__builtin_nds32_v_umin8 ((a), (b))) +#define __nds32__smax8(a, b) \ + (__builtin_nds32_smax8 ((a), (b))) +#define __nds32__v_smax8(a, b) \ + (__builtin_nds32_v_smax8 ((a), (b))) +#define __nds32__umax8(a, b) \ + (__builtin_nds32_umax8 ((a), (b))) +#define __nds32__v_umax8(a, b) \ + (__builtin_nds32_v_umax8 ((a), (b))) +#define __nds32__kabs8(a) \ + (__builtin_nds32_kabs8 ((a))) +#define __nds32__v_kabs8(a) \ + (__builtin_nds32_v_kabs8 ((a))) + +#define __nds32__sunpkd810(a) \ + (__builtin_nds32_sunpkd810 ((a))) +#define __nds32__v_sunpkd810(a) \ + (__builtin_nds32_v_sunpkd810 ((a))) +#define __nds32__sunpkd820(a) \ + (__builtin_nds32_sunpkd820 ((a))) +#define __nds32__v_sunpkd820(a) \ + (__builtin_nds32_v_sunpkd820 ((a))) +#define __nds32__sunpkd830(a) \ + (__builtin_nds32_sunpkd830 ((a))) +#define __nds32__v_sunpkd830(a) \ + (__builtin_nds32_v_sunpkd830 ((a))) +#define __nds32__sunpkd831(a) \ + (__builtin_nds32_sunpkd831 ((a))) +#define __nds32__v_sunpkd831(a) \ + (__builtin_nds32_v_sunpkd831 ((a))) +#define __nds32__zunpkd810(a) \ + (__builtin_nds32_zunpkd810 ((a))) +#define __nds32__v_zunpkd810(a) \ + (__builtin_nds32_v_zunpkd810 ((a))) +#define __nds32__zunpkd820(a) \ + (__builtin_nds32_zunpkd820 ((a))) +#define __nds32__v_zunpkd820(a) \ + (__builtin_nds32_v_zunpkd820 ((a))) +#define __nds32__zunpkd830(a) \ + (__builtin_nds32_zunpkd830 ((a))) +#define __nds32__v_zunpkd830(a) \ + (__builtin_nds32_v_zunpkd830 ((a))) +#define __nds32__zunpkd831(a) \ + (__builtin_nds32_zunpkd831 ((a))) +#define __nds32__v_zunpkd831(a) \ + (__builtin_nds32_v_zunpkd831 ((a))) + +#define __nds32__raddw(a, b) \ + (__builtin_nds32_raddw ((a), (b))) +#define __nds32__uraddw(a, b) \ + (__builtin_nds32_uraddw ((a), (b))) +#define __nds32__rsubw(a, b) \ + (__builtin_nds32_rsubw ((a), (b))) +#define __nds32__ursubw(a, b) \ + (__builtin_nds32_ursubw ((a), (b))) + +#define __nds32__sra_u(a, b) \ + (__builtin_nds32_sra_u ((a), (b))) +#define __nds32__ksll(a, b) \ + (__builtin_nds32_ksll ((a), (b))) +#define __nds32__pkbb16(a, b) \ + (__builtin_nds32_pkbb16 ((a), (b))) +#define __nds32__v_pkbb16(a, b) \ + (__builtin_nds32_v_pkbb16 ((a), (b))) +#define __nds32__pkbt16(a, b) \ + (__builtin_nds32_pkbt16 ((a), (b))) +#define __nds32__v_pkbt16(a, b) \ + (__builtin_nds32_v_pkbt16 ((a), (b))) +#define __nds32__pktb16(a, b) \ + (__builtin_nds32_pktb16 ((a), (b))) +#define __nds32__v_pktb16(a, b) \ + (__builtin_nds32_v_pktb16 ((a), (b))) +#define __nds32__pktt16(a, b) \ + (__builtin_nds32_pktt16 ((a), (b))) +#define __nds32__v_pktt16(a, b) \ + (__builtin_nds32_v_pktt16 ((a), (b))) + +#define __nds32__smmul(a, b) \ + (__builtin_nds32_smmul ((a), (b))) +#define __nds32__smmul_u(a, b) \ + (__builtin_nds32_smmul_u ((a), (b))) +#define __nds32__kmmac(r, a, b) \ + (__builtin_nds32_kmmac ((r), (a), (b))) +#define __nds32__kmmac_u(r, a, b) \ + (__builtin_nds32_kmmac_u ((r), (a), (b))) +#define __nds32__kmmsb(r, a, b) \ + (__builtin_nds32_kmmsb ((r), (a), (b))) +#define __nds32__kmmsb_u(r, a, b) \ + (__builtin_nds32_kmmsb_u ((r), (a), (b))) +#define __nds32__kwmmul(a, b) \ + (__builtin_nds32_kwmmul ((a), (b))) +#define __nds32__kwmmul_u(a, b) \ + (__builtin_nds32_kwmmul_u ((a), (b))) + +#define __nds32__smmwb(a, b) \ + (__builtin_nds32_smmwb ((a), (b))) +#define __nds32__v_smmwb(a, b) \ + (__builtin_nds32_v_smmwb ((a), (b))) +#define __nds32__smmwb_u(a, b) \ + (__builtin_nds32_smmwb_u ((a), (b))) +#define __nds32__v_smmwb_u(a, b) \ + (__builtin_nds32_v_smmwb_u ((a), (b))) +#define __nds32__smmwt(a, b) \ + (__builtin_nds32_smmwt ((a), (b))) +#define __nds32__v_smmwt(a, b) \ + (__builtin_nds32_v_smmwt ((a), (b))) +#define __nds32__smmwt_u(a, b) \ + (__builtin_nds32_smmwt_u ((a), (b))) +#define __nds32__v_smmwt_u(a, b) \ + (__builtin_nds32_v_smmwt_u ((a), (b))) +#define __nds32__kmmawb(r, a, b) \ + (__builtin_nds32_kmmawb ((r), (a), (b))) +#define __nds32__v_kmmawb(r, a, b) \ + (__builtin_nds32_v_kmmawb ((r), (a), (b))) +#define __nds32__kmmawb_u(r, a, b) \ + (__builtin_nds32_kmmawb_u ((r), (a), (b))) +#define __nds32__v_kmmawb_u(r, a, b) \ + (__builtin_nds32_v_kmmawb_u ((r), (a), (b))) +#define __nds32__kmmawt(r, a, b) \ + (__builtin_nds32_kmmawt ((r), (a), (b))) +#define __nds32__v_kmmawt(r, a, b) \ + (__builtin_nds32_v_kmmawt ((r), (a), (b))) +#define __nds32__kmmawt_u(r, a, b) \ + (__builtin_nds32_kmmawt_u ((r), (a), (b))) +#define __nds32__v_kmmawt_u(r, a, b) \ + (__builtin_nds32_v_kmmawt_u ((r), (a), (b))) + +#define __nds32__smbb(a, b) \ + (__builtin_nds32_smbb ((a), (b))) +#define __nds32__v_smbb(a, b) \ + (__builtin_nds32_v_smbb ((a), (b))) +#define __nds32__smbt(a, b) \ + (__builtin_nds32_smbt ((a), (b))) +#define __nds32__v_smbt(a, b) \ + (__builtin_nds32_v_smbt ((a), (b))) +#define __nds32__smtt(a, b) \ + (__builtin_nds32_smtt ((a), (b))) +#define __nds32__v_smtt(a, b) \ + (__builtin_nds32_v_smtt ((a), (b))) +#define __nds32__kmda(a, b) \ + (__builtin_nds32_kmda ((a), (b))) +#define __nds32__v_kmda(a, b) \ + (__builtin_nds32_v_kmda ((a), (b))) +#define __nds32__kmxda(a, b) \ + (__builtin_nds32_kmxda ((a), (b))) +#define __nds32__v_kmxda(a, b) \ + (__builtin_nds32_v_kmxda ((a), (b))) +#define __nds32__smds(a, b) \ + (__builtin_nds32_smds ((a), (b))) +#define __nds32__v_smds(a, b) \ + (__builtin_nds32_v_smds ((a), (b))) +#define __nds32__smdrs(a, b) \ + (__builtin_nds32_smdrs ((a), (b))) +#define __nds32__v_smdrs(a, b) \ + (__builtin_nds32_v_smdrs ((a), (b))) +#define __nds32__smxds(a, b) \ + (__builtin_nds32_smxds ((a), (b))) +#define __nds32__v_smxds(a, b) \ + (__builtin_nds32_v_smxds ((a), (b))) +#define __nds32__kmabb(r, a, b) \ + (__builtin_nds32_kmabb ((r), (a), (b))) +#define __nds32__v_kmabb(r, a, b) \ + (__builtin_nds32_v_kmabb ((r), (a), (b))) +#define __nds32__kmabt(r, a, b) \ + (__builtin_nds32_kmabt ((r), (a), (b))) +#define __nds32__v_kmabt(r, a, b) \ + (__builtin_nds32_v_kmabt ((r), (a), (b))) +#define __nds32__kmatt(r, a, b) \ + (__builtin_nds32_kmatt ((r), (a), (b))) +#define __nds32__v_kmatt(r, a, b) \ + (__builtin_nds32_v_kmatt ((r), (a), (b))) +#define __nds32__kmada(r, a, b) \ + (__builtin_nds32_kmada ((r), (a), (b))) +#define __nds32__v_kmada(r, a, b) \ + (__builtin_nds32_v_kmada ((r), (a), (b))) +#define __nds32__kmaxda(r, a, b) \ + (__builtin_nds32_kmaxda ((r), (a), (b))) +#define __nds32__v_kmaxda(r, a, b) \ + (__builtin_nds32_v_kmaxda ((r), (a), (b))) +#define __nds32__kmads(r, a, b) \ + (__builtin_nds32_kmads ((r), (a), (b))) +#define __nds32__v_kmads(r, a, b) \ + (__builtin_nds32_v_kmads ((r), (a), (b))) +#define __nds32__kmadrs(r, a, b) \ + (__builtin_nds32_kmadrs ((r), (a), (b))) +#define __nds32__v_kmadrs(r, a, b) \ + (__builtin_nds32_v_kmadrs ((r), (a), (b))) +#define __nds32__kmaxds(r, a, b) \ + (__builtin_nds32_kmaxds ((r), (a), (b))) +#define __nds32__v_kmaxds(r, a, b) \ + (__builtin_nds32_v_kmaxds ((r), (a), (b))) +#define __nds32__kmsda(r, a, b) \ + (__builtin_nds32_kmsda ((r), (a), (b))) +#define __nds32__v_kmsda(r, a, b) \ + (__builtin_nds32_v_kmsda ((r), (a), (b))) +#define __nds32__kmsxda(r, a, b) \ + (__builtin_nds32_kmsxda ((r), (a), (b))) +#define __nds32__v_kmsxda(r, a, b) \ + (__builtin_nds32_v_kmsxda ((r), (a), (b))) + +#define __nds32__smal(a, b) \ + (__builtin_nds32_smal ((a), (b))) +#define __nds32__v_smal(a, b) \ + (__builtin_nds32_v_smal ((a), (b))) + +#define __nds32__bitrev(a, b) \ + (__builtin_nds32_bitrev ((a), (b))) +#define __nds32__wext(a, b) \ + (__builtin_nds32_wext ((a), (b))) +#define __nds32__bpick(r, a, b) \ + (__builtin_nds32_bpick ((r), (a), (b))) +#define __nds32__insb(r, a, b) \ + (__builtin_nds32_insb ((r), (a), (b))) + +#define __nds32__sadd64(a, b) \ + (__builtin_nds32_sadd64 ((a), (b))) +#define __nds32__uadd64(a, b) \ + (__builtin_nds32_uadd64 ((a), (b))) +#define __nds32__radd64(a, b) \ + (__builtin_nds32_radd64 ((a), (b))) +#define __nds32__uradd64(a, b) \ + (__builtin_nds32_uradd64 ((a), (b))) +#define __nds32__kadd64(a, b) \ + (__builtin_nds32_kadd64 ((a), (b))) +#define __nds32__ukadd64(a, b) \ + (__builtin_nds32_ukadd64 ((a), (b))) +#define __nds32__ssub64(a, b) \ + (__builtin_nds32_ssub64 ((a), (b))) +#define __nds32__usub64(a, b) \ + (__builtin_nds32_usub64 ((a), (b))) +#define __nds32__rsub64(a, b) \ + (__builtin_nds32_rsub64 ((a), (b))) +#define __nds32__ursub64(a, b) \ + (__builtin_nds32_ursub64 ((a), (b))) +#define __nds32__ksub64(a, b) \ + (__builtin_nds32_ksub64 ((a), (b))) +#define __nds32__uksub64(a, b) \ + (__builtin_nds32_uksub64 ((a), (b))) + +#define __nds32__smar64(r, a, b) \ + (__builtin_nds32_smar64 ((r), (a), (b))) +#define __nds32__smsr64(r, a, b) \ + (__builtin_nds32_smsr64 ((r), (a), (b))) +#define __nds32__umar64(r, a, b) \ + (__builtin_nds32_umar64 ((r), (a), (b))) +#define __nds32__umsr64(r, a, b) \ + (__builtin_nds32_umsr64 ((r), (a), (b))) +#define __nds32__kmar64(r, a, b) \ + (__builtin_nds32_kmar64 ((r), (a), (b))) +#define __nds32__kmsr64(r, a, b) \ + (__builtin_nds32_kmsr64 ((r), (a), (b))) +#define __nds32__ukmar64(r, a, b) \ + (__builtin_nds32_ukmar64 ((r), (a), (b))) +#define __nds32__ukmsr64(r, a, b) \ + (__builtin_nds32_ukmsr64 ((r), (a), (b))) + +#define __nds32__smalbb(r, a, b) \ + (__builtin_nds32_smalbb ((r), (a), (b))) +#define __nds32__v_smalbb(r, a, b) \ + (__builtin_nds32_v_smalbb ((r), (a), (b))) +#define __nds32__smalbt(r, a, b) \ + (__builtin_nds32_smalbt ((r), (a), (b))) +#define __nds32__v_smalbt(r, a, b) \ + (__builtin_nds32_v_smalbt ((r), (a), (b))) +#define __nds32__smaltt(r, a, b) \ + (__builtin_nds32_smaltt ((r), (a), (b))) +#define __nds32__v_smaltt(r, a, b) \ + (__builtin_nds32_v_smaltt ((r), (a), (b))) +#define __nds32__smalda(r, a, b) \ + (__builtin_nds32_smalda ((r), (a), (b))) +#define __nds32__v_smalda(r, a, b) \ + (__builtin_nds32_v_smalda ((r), (a), (b))) +#define __nds32__smalxda(r, a, b) \ + (__builtin_nds32_smalxda ((r), (a), (b))) +#define __nds32__v_smalxda(r, a, b) \ + (__builtin_nds32_v_smalxda ((r), (a), (b))) +#define __nds32__smalds(r, a, b) \ + (__builtin_nds32_smalds ((r), (a), (b))) +#define __nds32__v_smalds(r, a, b) \ + (__builtin_nds32_v_smalds ((r), (a), (b))) +#define __nds32__smaldrs(r, a, b) \ + (__builtin_nds32_smaldrs ((r), (a), (b))) +#define __nds32__v_smaldrs(r, a, b) \ + (__builtin_nds32_v_smaldrs ((r), (a), (b))) +#define __nds32__smalxds(r, a, b) \ + (__builtin_nds32_smalxds ((r), (a), (b))) +#define __nds32__v_smalxds(r, a, b) \ + (__builtin_nds32_v_smalxds ((r), (a), (b))) +#define __nds32__smslda(r, a, b) \ + (__builtin_nds32_smslda ((r), (a), (b))) +#define __nds32__v_smslda(r, a, b) \ + (__builtin_nds32_v_smslda ((r), (a), (b))) +#define __nds32__smslxda(r, a, b) \ + (__builtin_nds32_smslxda ((r), (a), (b))) +#define __nds32__v_smslxda(r, a, b) \ + (__builtin_nds32_v_smslxda ((r), (a), (b))) + +#define __nds32__smul16(a, b) \ + (__builtin_nds32_smul16 ((a), (b))) +#define __nds32__v_smul16(a, b) \ + (__builtin_nds32_v_smul16 ((a), (b))) +#define __nds32__smulx16(a, b) \ + (__builtin_nds32_smulx16 ((a), (b))) +#define __nds32__v_smulx16(a, b) \ + (__builtin_nds32_v_smulx16 ((a), (b))) +#define __nds32__umul16(a, b) \ + (__builtin_nds32_umul16 ((a), (b))) +#define __nds32__v_umul16(a, b) \ + (__builtin_nds32_v_umul16 ((a), (b))) +#define __nds32__umulx16(a, b) \ + (__builtin_nds32_umulx16 ((a), (b))) +#define __nds32__v_umulx16(a, b) \ + (__builtin_nds32_v_umulx16 ((a), (b))) + +#define __nds32__uclip32(a, imm) \ + (__builtin_nds32_uclip32 ((a), (imm))) +#define __nds32__sclip32(a, imm) \ + (__builtin_nds32_sclip32 ((a), (imm))) +#define __nds32__kabs(a) \ + (__builtin_nds32_kabs ((a))) + #define __nds32__unaligned_feature() \ (__builtin_nds32_unaligned_feature()) #define __nds32__enable_unaligned() \ (__builtin_nds32_enable_unaligned()) #define __nds32__disable_unaligned() \ (__builtin_nds32_disable_unaligned()) + +#define __nds32__get_unaligned_u16x2(a) \ + (__builtin_nds32_get_unaligned_u16x2 ((a))) +#define __nds32__get_unaligned_s16x2(a) \ + (__builtin_nds32_get_unaligned_s16x2 ((a))) +#define __nds32__get_unaligned_u8x4(a) \ + (__builtin_nds32_get_unaligned_u8x4 ((a))) +#define __nds32__get_unaligned_s8x4(a) \ + (__builtin_nds32_get_unaligned_s8x4 ((a))) + +#define __nds32__put_unaligned_u16x2(a, data) \ + (__builtin_nds32_put_unaligned_u16x2 ((a), (data))) +#define __nds32__put_unaligned_s16x2(a, data) \ + (__builtin_nds32_put_unaligned_s16x2 ((a), (data))) +#define __nds32__put_unaligned_u8x4(a, data) \ + (__builtin_nds32_put_unaligned_u8x4 ((a), (data))) +#define __nds32__put_unaligned_s8x4(a, data) \ + (__builtin_nds32_put_unaligned_s8x4 ((a), (data))) + +#define NDS32ATTR_SIGNATURE __attribute__((signature)) + #endif /* nds32_intrinsic.h */ diff --git a/gcc/config/nds32/nds32_isr.h b/gcc/config/nds32/nds32_isr.h new file mode 100644 index 0000000..8ea58f9 --- /dev/null +++ b/gcc/config/nds32/nds32_isr.h @@ -0,0 +1,526 @@ +/* Intrinsic definitions of Andes NDS32 cpu for GNU compiler + Copyright (C) 2012-2018 Free Software Foundation, Inc. + Contributed by Andes Technology Corporation. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +#ifndef _NDS32_ISR_H +#define _NDS32_ISR_H + +/* Attribute of a interrupt or exception handler: + + NDS32_READY_NESTED: This handler is interruptible if user re-enable GIE bit. + NDS32_NESTED : This handler is interruptible. This is not suitable + exception handler. + NDS32_NOT_NESTED : This handler is NOT interruptible. Users have to do + some work if nested is wanted + NDS32_CRITICAL : This handler is critical ISR, which means it is small + and efficient. */ +#define NDS32_READY_NESTED 0 +#define NDS32_NESTED 1 +#define NDS32_NOT_NESTED 2 +#define NDS32_CRITICAL 3 + +/* Attribute of a interrupt or exception handler: + + NDS32_SAVE_ALL_REGS : Save all registers in a table. + NDS32_SAVE_PARTIAL_REGS: Save partial registers. */ +#define NDS32_SAVE_CALLER_REGS 0 +#define NDS32_SAVE_ALL_REGS 1 + +/* There are two version of Register table for interrupt and exception handler, + one for 16-register CPU the other for 32-register CPU. These structures are + used for context switching or system call handling. The address of this + data can be get from the input argument of the handler functions. + + For system call handling, r0 to r5 are used to pass arguments. If more + arguments are used they are put into the stack and its starting address is + in sp. Return value of system call can be put into r0 and r1 upon exit from + system call handler. System call ID is in a system register and it can be + fetched via intrinsic function. For more information please read ABI and + other related documents. + + For context switching, at least 2 values need to saved in kernel. One is + IPC and the other is the stack address of current task. Use intrinsic + function to get IPC and the input argument of the handler functions + 8 to + get stack address of current task. To do context switching, you replace + new_sp with the stack address of new task and replace IPC system register + with IPC of new task, then, just return from handler. The context switching + will happen. */ + +/* Register table for exception handler; 32-register version. */ +typedef struct +{ + int r0; + int r1; + int r2; + int r3; + int r4; + int r5; + int r6; + int r7; + int r8; + int r9; + int r10; + int r11; + int r12; + int r13; + int r14; + int r15; + int r16; + int r17; + int r18; + int r19; + int r20; + int r21; + int r22; + int r23; + int r24; + int r25; + int r26; + int r27; + int fp; + int gp; + int lp; + int sp; +} NDS32_GPR32; + +/* Register table for exception handler; 16-register version. */ +typedef struct +{ + int r0; + int r1; + int r2; + int r3; + int r4; + int r5; + int r6; + int r7; + int r8; + int r9; + int r10; + int r15; + int fp; + int gp; + int lp; + int sp; +} NDS32_GPR16; + + +/* Use NDS32_REG32_TAB or NDS32_REG16_TAB in your program to + access register table. */ +typedef struct +{ + union + { + int reg_a[32] ; + NDS32_GPR32 reg_s ; + } u ; +} NDS32_REG32_TAB; + +typedef struct +{ + union + { + int reg_a[16] ; + NDS32_GPR16 reg_s ; + } u ; +} NDS32_REG16_TAB; + +typedef struct +{ + int d0lo; + int d0hi; + int d1lo; + int d1hi; +} NDS32_DX_TAB; + +typedef struct +{ +#ifdef __NDS32_EB__ + float fsr0; + float fsr1; + float fsr2; + float fsr3; + float fsr4; + float fsr5; + float fsr6; + float fsr7; +#else + float fsr1; + float fsr0; + float fsr3; + float fsr2; + float fsr5; + float fsr4; + float fsr7; + float fsr6; +#endif +} NDS32_FSR8; + +typedef struct +{ + double dsr0; + double dsr1; + double dsr2; + double dsr3; +} NDS32_DSR4; + +typedef struct +{ +#ifdef __NDS32_EB__ + float fsr0; + float fsr1; + float fsr2; + float fsr3; + float fsr4; + float fsr5; + float fsr6; + float fsr7; + float fsr8; + float fsr9; + float fsr10; + float fsr11; + float fsr12; + float fsr13; + float fsr14; + float fsr15; +#else + float fsr1; + float fsr0; + float fsr3; + float fsr2; + float fsr5; + float fsr4; + float fsr7; + float fsr6; + float fsr9; + float fsr8; + float fsr11; + float fsr10; + float fsr13; + float fsr12; + float fsr15; + float fsr14; +#endif +} NDS32_FSR16; + +typedef struct +{ + double dsr0; + double dsr1; + double dsr2; + double dsr3; + double dsr4; + double dsr5; + double dsr6; + double dsr7; +} NDS32_DSR8; + +typedef struct +{ +#ifdef __NDS32_EB__ + float fsr0; + float fsr1; + float fsr2; + float fsr3; + float fsr4; + float fsr5; + float fsr6; + float fsr7; + float fsr8; + float fsr9; + float fsr10; + float fsr11; + float fsr12; + float fsr13; + float fsr14; + float fsr15; + float fsr16; + float fsr17; + float fsr18; + float fsr19; + float fsr20; + float fsr21; + float fsr22; + float fsr23; + float fsr24; + float fsr25; + float fsr26; + float fsr27; + float fsr28; + float fsr29; + float fsr30; + float fsr31; +#else + float fsr1; + float fsr0; + float fsr3; + float fsr2; + float fsr5; + float fsr4; + float fsr7; + float fsr6; + float fsr9; + float fsr8; + float fsr11; + float fsr10; + float fsr13; + float fsr12; + float fsr15; + float fsr14; + float fsr17; + float fsr16; + float fsr19; + float fsr18; + float fsr21; + float fsr20; + float fsr23; + float fsr22; + float fsr25; + float fsr24; + float fsr27; + float fsr26; + float fsr29; + float fsr28; + float fsr31; + float fsr30; +#endif +} NDS32_FSR32; + +typedef struct +{ + double dsr0; + double dsr1; + double dsr2; + double dsr3; + double dsr4; + double dsr5; + double dsr6; + double dsr7; + double dsr8; + double dsr9; + double dsr10; + double dsr11; + double dsr12; + double dsr13; + double dsr14; + double dsr15; +} NDS32_DSR16; + +typedef struct +{ + double dsr0; + double dsr1; + double dsr2; + double dsr3; + double dsr4; + double dsr5; + double dsr6; + double dsr7; + double dsr8; + double dsr9; + double dsr10; + double dsr11; + double dsr12; + double dsr13; + double dsr14; + double dsr15; + double dsr16; + double dsr17; + double dsr18; + double dsr19; + double dsr20; + double dsr21; + double dsr22; + double dsr23; + double dsr24; + double dsr25; + double dsr26; + double dsr27; + double dsr28; + double dsr29; + double dsr30; + double dsr31; +} NDS32_DSR32; + +typedef struct +{ + union + { + NDS32_FSR8 fsr_s ; + NDS32_DSR4 dsr_s ; + } u ; +} NDS32_FPU8_TAB; + +typedef struct +{ + union + { + NDS32_FSR16 fsr_s ; + NDS32_DSR8 dsr_s ; + } u ; +} NDS32_FPU16_TAB; + +typedef struct +{ + union + { + NDS32_FSR32 fsr_s ; + NDS32_DSR16 dsr_s ; + } u ; +} NDS32_FPU32_TAB; + +typedef struct +{ + union + { + NDS32_FSR32 fsr_s ; + NDS32_DSR32 dsr_s ; + } u ; +} NDS32_FPU64_TAB; + +typedef struct +{ + int ipc; + int ipsw; +#if defined(NDS32_EXT_FPU_CONFIG_0) + NDS32_FPU8_TAB fpr; +#elif defined(NDS32_EXT_FPU_CONFIG_1) + NDS32_FPU16_TAB fpr; +#elif defined(NDS32_EXT_FPU_CONFIG_2) + NDS32_FPU32_TAB fpr; +#elif defined(NDS32_EXT_FPU_CONFIG_3) + NDS32_FPU64_TAB fpr; +#endif +#if __NDS32_DX_REGS__ + NDS32_DX_TAB dxr; +#endif +#if __NDS32_EXT_IFC__ + int ifc_lp; + int filler; +#endif +#if __NDS32_REDUCED_REGS__ || __NDS32_REDUCE_REGS + NDS32_REG16_TAB gpr; +#else + NDS32_REG32_TAB gpr; +#endif +} NDS32_CONTEXT; + +/* Predefined Vector Definition. + + For IVIC Mode: 9 to 14 are for hardware interrupt + and 15 is for software interrupt. + For EVIC Mode: 9 to 72 are for hardware interrupt + and software interrupt can be routed to any one of them. + + You may want to define your hardware interrupts in the following way + for easy maintainance. + + IVIC mode: + #define MY_HW_IVIC_TIMER NDS32_VECTOR_INTERRUPT_HW0 + 1 + #define MY_HW_IVIC_USB NDS32_VECTOR_INTERRUPT_HW0 + 3 + EVIC mode: + #define MY_HW_EVIC_DMA NDS32_VECTOR_INTERRUPT_HW0 + 2 + #define MY_HW_EVIC_SWI NDS32_VECTOR_INTERRUPT_HW0 + 10 */ +#define NDS32_VECTOR_RESET 0 +#define NDS32_VECTOR_TLB_FILL 1 +#define NDS32_VECTOR_PTE_NOT_PRESENT 2 +#define NDS32_VECTOR_TLB_MISC 3 +#define NDS32_VECTOR_TLB_VLPT_MISS 4 +#define NDS32_VECTOR_MACHINE_ERROR 5 +#define NDS32_VECTOR_DEBUG_RELATED 6 +#define NDS32_VECTOR_GENERAL_EXCEPTION 7 +#define NDS32_VECTOR_SYSCALL 8 +#define NDS32_VECTOR_INTERRUPT_HW0 9 +#define NDS32_VECTOR_INTERRUPT_HW1 10 +#define NDS32_VECTOR_INTERRUPT_HW2 11 +#define NDS32_VECTOR_INTERRUPT_HW3 12 +#define NDS32_VECTOR_INTERRUPT_HW4 13 +#define NDS32_VECTOR_INTERRUPT_HW5 14 +#define NDS32_VECTOR_INTERRUPT_HW6 15 +#define NDS32_VECTOR_SWI 15 /* THIS IS FOR IVIC MODE ONLY */ +#define NDS32_VECTOR_INTERRUPT_HW7 16 +#define NDS32_VECTOR_INTERRUPT_HW8 17 +#define NDS32_VECTOR_INTERRUPT_HW9 18 +#define NDS32_VECTOR_INTERRUPT_HW10 19 +#define NDS32_VECTOR_INTERRUPT_HW11 20 +#define NDS32_VECTOR_INTERRUPT_HW12 21 +#define NDS32_VECTOR_INTERRUPT_HW13 22 +#define NDS32_VECTOR_INTERRUPT_HW14 23 +#define NDS32_VECTOR_INTERRUPT_HW15 24 +#define NDS32_VECTOR_INTERRUPT_HW16 25 +#define NDS32_VECTOR_INTERRUPT_HW17 26 +#define NDS32_VECTOR_INTERRUPT_HW18 27 +#define NDS32_VECTOR_INTERRUPT_HW19 28 +#define NDS32_VECTOR_INTERRUPT_HW20 29 +#define NDS32_VECTOR_INTERRUPT_HW21 30 +#define NDS32_VECTOR_INTERRUPT_HW22 31 +#define NDS32_VECTOR_INTERRUPT_HW23 32 +#define NDS32_VECTOR_INTERRUPT_HW24 33 +#define NDS32_VECTOR_INTERRUPT_HW25 34 +#define NDS32_VECTOR_INTERRUPT_HW26 35 +#define NDS32_VECTOR_INTERRUPT_HW27 36 +#define NDS32_VECTOR_INTERRUPT_HW28 37 +#define NDS32_VECTOR_INTERRUPT_HW29 38 +#define NDS32_VECTOR_INTERRUPT_HW30 39 +#define NDS32_VECTOR_INTERRUPT_HW31 40 +#define NDS32_VECTOR_INTERRUPT_HW32 41 +#define NDS32_VECTOR_INTERRUPT_HW33 42 +#define NDS32_VECTOR_INTERRUPT_HW34 43 +#define NDS32_VECTOR_INTERRUPT_HW35 44 +#define NDS32_VECTOR_INTERRUPT_HW36 45 +#define NDS32_VECTOR_INTERRUPT_HW37 46 +#define NDS32_VECTOR_INTERRUPT_HW38 47 +#define NDS32_VECTOR_INTERRUPT_HW39 48 +#define NDS32_VECTOR_INTERRUPT_HW40 49 +#define NDS32_VECTOR_INTERRUPT_HW41 50 +#define NDS32_VECTOR_INTERRUPT_HW42 51 +#define NDS32_VECTOR_INTERRUPT_HW43 52 +#define NDS32_VECTOR_INTERRUPT_HW44 53 +#define NDS32_VECTOR_INTERRUPT_HW45 54 +#define NDS32_VECTOR_INTERRUPT_HW46 55 +#define NDS32_VECTOR_INTERRUPT_HW47 56 +#define NDS32_VECTOR_INTERRUPT_HW48 57 +#define NDS32_VECTOR_INTERRUPT_HW49 58 +#define NDS32_VECTOR_INTERRUPT_HW50 59 +#define NDS32_VECTOR_INTERRUPT_HW51 60 +#define NDS32_VECTOR_INTERRUPT_HW52 61 +#define NDS32_VECTOR_INTERRUPT_HW53 62 +#define NDS32_VECTOR_INTERRUPT_HW54 63 +#define NDS32_VECTOR_INTERRUPT_HW55 64 +#define NDS32_VECTOR_INTERRUPT_HW56 65 +#define NDS32_VECTOR_INTERRUPT_HW57 66 +#define NDS32_VECTOR_INTERRUPT_HW58 67 +#define NDS32_VECTOR_INTERRUPT_HW59 68 +#define NDS32_VECTOR_INTERRUPT_HW60 69 +#define NDS32_VECTOR_INTERRUPT_HW61 70 +#define NDS32_VECTOR_INTERRUPT_HW62 71 +#define NDS32_VECTOR_INTERRUPT_HW63 72 + +#define NDS32ATTR_RESET(option) __attribute__((reset(option))) +#define NDS32ATTR_EXCEPT(type) __attribute__((exception(type))) +#define NDS32ATTR_EXCEPTION(type) __attribute__((exception(type))) +#define NDS32ATTR_INTERRUPT(type) __attribute__((interrupt(type))) +#define NDS32ATTR_ISR(type) __attribute__((interrupt(type))) + +#endif /* nds32_isr.h */ diff --git a/gcc/config/nds32/pipelines.md b/gcc/config/nds32/pipelines.md index 3428807..12cd262 100644 --- a/gcc/config/nds32/pipelines.md +++ b/gcc/config/nds32/pipelines.md @@ -44,6 +44,24 @@ ;; ------------------------------------------------------------------------ +;; Include N10 pipeline settings. +;; ------------------------------------------------------------------------ +(include "nds32-n10.md") + + +;; ------------------------------------------------------------------------ +;; Include Graywolf pipeline settings. +;; ------------------------------------------------------------------------ +(include "nds32-graywolf.md") + + +;; ------------------------------------------------------------------------ +;; Include N12/N13 pipeline settings. +;; ------------------------------------------------------------------------ +(include "nds32-n13.md") + + +;; ------------------------------------------------------------------------ ;; Define simple pipeline settings. ;; ------------------------------------------------------------------------ diff --git a/gcc/config/nds32/predicates.md b/gcc/config/nds32/predicates.md index 9eb8468..ee4cf3c 100644 --- a/gcc/config/nds32/predicates.md +++ b/gcc/config/nds32/predicates.md @@ -40,7 +40,15 @@ (match_code "mult,and,ior,xor")) (define_predicate "nds32_symbolic_operand" - (match_code "const,symbol_ref,label_ref")) + (and (match_code "const,symbol_ref,label_ref") + (match_test "!(TARGET_ICT_MODEL_LARGE + && nds32_indirect_call_referenced_p (op))"))) + +(define_predicate "nds32_nonunspec_symbolic_operand" + (and (match_code "const,symbol_ref,label_ref") + (match_test "!flag_pic && nds32_const_unspec_p (op) + && !(TARGET_ICT_MODEL_LARGE + && nds32_indirect_call_referenced_p (op))"))) (define_predicate "nds32_reg_constant_operand" (ior (match_operand 0 "register_operand") @@ -56,14 +64,51 @@ (and (match_operand 0 "const_int_operand") (match_test "satisfies_constraint_Is11 (op)")))) +(define_predicate "nds32_imm_0_1_operand" + (and (match_operand 0 "const_int_operand") + (ior (match_test "satisfies_constraint_Iv00 (op)") + (match_test "satisfies_constraint_Iv01 (op)")))) + +(define_predicate "nds32_imm_1_2_operand" + (and (match_operand 0 "const_int_operand") + (ior (match_test "satisfies_constraint_Iv01 (op)") + (match_test "satisfies_constraint_Iv02 (op)")))) + +(define_predicate "nds32_imm_1_2_4_8_operand" + (and (match_operand 0 "const_int_operand") + (ior (ior (match_test "satisfies_constraint_Iv01 (op)") + (match_test "satisfies_constraint_Iv02 (op)")) + (ior (match_test "satisfies_constraint_Iv04 (op)") + (match_test "satisfies_constraint_Iv08 (op)"))))) + +(define_predicate "nds32_imm2u_operand" + (and (match_operand 0 "const_int_operand") + (match_test "satisfies_constraint_Iu02 (op)"))) + +(define_predicate "nds32_imm4u_operand" + (and (match_operand 0 "const_int_operand") + (match_test "satisfies_constraint_Iu04 (op)"))) + (define_predicate "nds32_imm5u_operand" (and (match_operand 0 "const_int_operand") (match_test "satisfies_constraint_Iu05 (op)"))) +(define_predicate "nds32_imm6u_operand" + (and (match_operand 0 "const_int_operand") + (match_test "satisfies_constraint_Iu06 (op)"))) + +(define_predicate "nds32_rimm4u_operand" + (ior (match_operand 0 "register_operand") + (match_operand 0 "nds32_imm4u_operand"))) + (define_predicate "nds32_rimm5u_operand" (ior (match_operand 0 "register_operand") (match_operand 0 "nds32_imm5u_operand"))) +(define_predicate "nds32_rimm6u_operand" + (ior (match_operand 0 "register_operand") + (match_operand 0 "nds32_imm6u_operand"))) + (define_predicate "nds32_move_operand" (and (match_operand 0 "general_operand") (not (match_code "high,const,symbol_ref,label_ref"))) @@ -78,6 +123,20 @@ return true; }) +(define_predicate "nds32_vmove_operand" + (and (match_operand 0 "general_operand") + (not (match_code "high,const,symbol_ref,label_ref"))) +{ + /* If the constant op does NOT satisfy Is20 nor Ihig, + we can not perform move behavior by a single instruction. */ + if (GET_CODE (op) == CONST_VECTOR + && !satisfies_constraint_CVs2 (op) + && !satisfies_constraint_CVhi (op)) + return false; + + return true; +}) + (define_predicate "nds32_and_operand" (match_operand 0 "nds32_reg_constant_operand") { @@ -127,6 +186,15 @@ (ior (match_operand 0 "nds32_symbolic_operand") (match_operand 0 "nds32_general_register_operand"))) +(define_predicate "nds32_insv_operand" + (match_code "const_int") +{ + return INTVAL (op) == 0 + || INTVAL (op) == 8 + || INTVAL (op) == 16 + || INTVAL (op) == 24; +}) + (define_predicate "nds32_lmw_smw_base_operand" (and (match_code "mem") (match_test "nds32_valid_smw_lwm_base_p (op)"))) diff --git a/gcc/config/nds32/t-elf b/gcc/config/nds32/t-elf new file mode 100644 index 0000000..3401dae --- /dev/null +++ b/gcc/config/nds32/t-elf @@ -0,0 +1,42 @@ +# The multilib settings of Andes NDS32 cpu for GNU compiler +# Copyright (C) 2012-2018 Free Software Foundation, Inc. +# Contributed by Andes Technology Corporation. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published +# by the Free Software Foundation; either version 3, or (at your +# option) any later version. +# +# GCC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +# License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +# We also define a macro MULTILIB_DEFAULTS in nds32.h that tells the +# driver program which options are defaults for this target and thus +# do not need to be handled specially. +MULTILIB_OPTIONS += mcmodel=small/mcmodel=medium/mcmodel=large mvh + +ifneq ($(filter graywolf,$(TM_MULTILIB_CONFIG)),) +MULTILIB_OPTIONS += mcpu=graywolf +endif + +ifneq ($(filter dsp,$(TM_MULTILIB_CONFIG)),) +MULTILIB_OPTIONS += mext-dsp +endif + +ifneq ($(filter zol,$(TM_MULTILIB_CONFIG)),) +MULTILIB_OPTIONS += mext-zol +endif + +ifneq ($(filter v3m+,$(TM_MULTILIB_CONFIG)),) +MULTILIB_OPTIONS += march=v3m+ +endif + +# ------------------------------------------------------------------------ diff --git a/gcc/config/nds32/t-linux b/gcc/config/nds32/t-linux new file mode 100644 index 0000000..33328f6 --- /dev/null +++ b/gcc/config/nds32/t-linux @@ -0,0 +1,26 @@ +# The multilib settings of Andes NDS32 cpu for GNU compiler +# Copyright (C) 2012-2018 Free Software Foundation, Inc. +# Contributed by Andes Technology Corporation. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published +# by the Free Software Foundation; either version 3, or (at your +# option) any later version. +# +# GCC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +# License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +# We also define a macro MULTILIB_DEFAULTS in nds32.h that tells the +# driver program which options are defaults for this target and thus +# do not need to be handled specially. +MULTILIB_OPTIONS += + +# ------------------------------------------------------------------------ diff --git a/gcc/configure b/gcc/configure index 6121e16..07a485e 100755 --- a/gcc/configure +++ b/gcc/configure @@ -27784,7 +27784,7 @@ esac # version to the per-target configury. case "$cpu_type" in aarch64 | alpha | arc | arm | avr | bfin | cris | i386 | m32c | m68k \ - | microblaze | mips | nios2 | pa | riscv | rs6000 | score | sparc | spu \ + | microblaze | mips | nds32 | nios2 | pa | riscv | rs6000 | score | sparc | spu \ | tilegx | tilepro | visium | xstormy16 | xtensa) insn="nop" ;; diff --git a/gcc/configure.ac b/gcc/configure.ac index b066cc6..ae73df3 100644 --- a/gcc/configure.ac +++ b/gcc/configure.ac @@ -4910,7 +4910,7 @@ esac # version to the per-target configury. case "$cpu_type" in aarch64 | alpha | arc | arm | avr | bfin | cris | i386 | m32c | m68k \ - | microblaze | mips | nios2 | pa | riscv | rs6000 | score | sparc | spu \ + | microblaze | mips | nds32 | nios2 | pa | riscv | rs6000 | score | sparc | spu \ | tilegx | tilepro | visium | xstormy16 | xtensa) insn="nop" ;; diff --git a/gcc/testsuite/gcc.c-torture/execute/20010122-1.c b/gcc/testsuite/gcc.c-torture/execute/20010122-1.c index 4eeb8c7..6cd02bc 100644 --- a/gcc/testsuite/gcc.c-torture/execute/20010122-1.c +++ b/gcc/testsuite/gcc.c-torture/execute/20010122-1.c @@ -1,4 +1,5 @@ /* { dg-skip-if "requires frame pointers" { *-*-* } "-fomit-frame-pointer" "" } */ +/* { dg-additional-options "-malways-save-lp" { target nds32*-*-* } } */ /* { dg-require-effective-target return_address } */ extern void exit (int); diff --git a/gcc/testsuite/gcc.dg/lower-subreg-1.c b/gcc/testsuite/gcc.dg/lower-subreg-1.c index 6bae730..4a5099b 100644 --- a/gcc/testsuite/gcc.dg/lower-subreg-1.c +++ b/gcc/testsuite/gcc.dg/lower-subreg-1.c @@ -1,4 +1,4 @@ -/* { dg-do compile { target { ! { mips64 || { aarch64*-*-* arm*-*-* ia64-*-* sparc*-*-* spu-*-* tilegx-*-* } } } } } */ +/* { dg-do compile { target { ! { mips64 || { aarch64*-*-* arm*-*-* ia64-*-* nds32*-*-* sparc*-*-* spu-*-* tilegx-*-* } } } } } */ /* { dg-options "-O -fdump-rtl-subreg1" } */ /* { dg-additional-options "-mno-stv" { target ia32 } } */ /* { dg-skip-if "" { { i?86-*-* x86_64-*-* } && x32 } } */ diff --git a/gcc/testsuite/gcc.dg/stack-usage-1.c b/gcc/testsuite/gcc.dg/stack-usage-1.c index 45d2c7b..b9ae9dc 100644 --- a/gcc/testsuite/gcc.dg/stack-usage-1.c +++ b/gcc/testsuite/gcc.dg/stack-usage-1.c @@ -2,6 +2,7 @@ /* { dg-options "-fstack-usage" } */ /* nvptx doesn't have a reg allocator, and hence no stack usage data. */ /* { dg-skip-if "" { nvptx-*-* } } */ +/* { dg-options "-fstack-usage -fno-omit-frame-pointer" { target { nds32*-*-* } } } */ /* This is aimed at testing basic support for -fstack-usage in the back-ends. See the SPARC back-end for example (grep flag_stack_usage_info in sparc.c). diff --git a/gcc/testsuite/gcc.target/nds32/builtin-setgie-dis.c b/gcc/testsuite/gcc.target/nds32/builtin-setgie-dis.c deleted file mode 100644 index 2dceed9..0000000 --- a/gcc/testsuite/gcc.target/nds32/builtin-setgie-dis.c +++ /dev/null @@ -1,11 +0,0 @@ -/* Verify that we generate setgie.d instruction with builtin function. */ - -/* { dg-do compile } */ -/* { dg-options "-O0" } */ -/* { dg-final { scan-assembler "\\tsetgie.d" } } */ - -void -test (void) -{ - __builtin_nds32_setgie_dis (); -} diff --git a/gcc/testsuite/gcc.target/nds32/builtin-setgie-en.c b/gcc/testsuite/gcc.target/nds32/builtin-setgie-en.c deleted file mode 100644 index 8928870..0000000 --- a/gcc/testsuite/gcc.target/nds32/builtin-setgie-en.c +++ /dev/null @@ -1,11 +0,0 @@ -/* Verify that we generate setgie.e instruction with builtin function. */ - -/* { dg-do compile } */ -/* { dg-options "-O0" } */ -/* { dg-final { scan-assembler "\\tsetgie.e" } } */ - -void -test (void) -{ - __builtin_nds32_setgie_en (); -} diff --git a/gcc/testsuite/gcc.target/nds32/builtin-setgie_mtsr_mfsr.c b/gcc/testsuite/gcc.target/nds32/builtin-setgie_mtsr_mfsr.c new file mode 100644 index 0000000..3b4eede --- /dev/null +++ b/gcc/testsuite/gcc.target/nds32/builtin-setgie_mtsr_mfsr.c @@ -0,0 +1,36 @@ +/* This is a test program for checking gie with + mtsr/mfsr instruction. */ + +/* { dg-do run } */ +/* { dg-options "-O0" } */ + +#include +#include + +int +main () +{ + unsigned int psw; + unsigned int gie; + unsigned int pfm_ctl; + + __nds32__setgie_en (); + __nds32__dsb(); /* This is needed for waiting pipeline. */ + psw = __nds32__mfsr (NDS32_SR_PSW); + + gie = psw & 0x00000001; + + if (gie != 1) + abort (); + + psw = psw & 0xFFFFFFFE; + __nds32__mtsr (psw,NDS32_SR_PSW); + __nds32__dsb(); /* This is needed for waiting pipeline. */ + psw = __nds32__mfsr (NDS32_SR_PSW); + gie = psw & 0x00000001; + + if (gie != 0) + abort (); + else + exit (0); +} diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-clr-pending-hw.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-clr-pending-hw.c new file mode 100644 index 0000000..fce90e9 --- /dev/null +++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-clr-pending-hw.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-O1" } */ + +#include + +void +main (void) +{ + __nds32__clr_pending_hwint (NDS32_INT_H0); + __nds32__clr_pending_hwint (NDS32_INT_H1); + __nds32__clr_pending_hwint (NDS32_INT_H2); + + __nds32__clr_pending_hwint (NDS32_INT_H15); + __nds32__clr_pending_hwint (NDS32_INT_H16); + __nds32__clr_pending_hwint (NDS32_INT_H31); +} diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-clr-pending.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-clr-pending.c new file mode 100644 index 0000000..08e1dd0 --- /dev/null +++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-clr-pending.c @@ -0,0 +1,10 @@ +/* { dg-do compile } */ +/* { dg-options "-O1" } */ + +#include + +void +main (void) +{ + __nds32__clr_pending_swint (); +} diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-disable.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-disable.c new file mode 100644 index 0000000..a3a1f44 --- /dev/null +++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-disable.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-O1" } */ + +#include + +void +main (void) +{ + __nds32__disable_int (NDS32_INT_H15); + __nds32__disable_int (NDS32_INT_H16); + __nds32__disable_int (NDS32_INT_H31); + __nds32__disable_int (NDS32_INT_SWI); +} diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-enable.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-enable.c new file mode 100644 index 0000000..e18ed7a --- /dev/null +++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-enable.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-O1" } */ + +#include + +void +main (void) +{ + __nds32__enable_int (NDS32_INT_H15); + __nds32__enable_int (NDS32_INT_H16); + __nds32__enable_int (NDS32_INT_H31); + __nds32__enable_int (NDS32_INT_SWI); +} diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-get-pending-int.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-get-pending-int.c new file mode 100644 index 0000000..4ced0a5 --- /dev/null +++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-get-pending-int.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O1" } */ + +#include + +int +main (void) +{ + int a = __nds32__get_pending_int (NDS32_INT_H15); + int b = __nds32__get_pending_int (NDS32_INT_SWI); + int c = __nds32__get_pending_int (NDS32_INT_H16); + + return a + b + c; +} diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-get-trig.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-get-trig.c new file mode 100644 index 0000000..a394a60 --- /dev/null +++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-get-trig.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O1" } */ + +#include + +int +main (void) +{ + int a = __nds32__get_trig_type (NDS32_INT_H0); + int b = __nds32__get_trig_type (NDS32_INT_H15); + int c = __nds32__get_trig_type (NDS32_INT_H16); + int d = __nds32__get_trig_type (NDS32_INT_H31); + return a + b + c + d; +} diff --git a/gcc/testsuite/gcc.target/nds32/builtin-isb.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-isb.c similarity index 100% rename from gcc/testsuite/gcc.target/nds32/builtin-isb.c rename to gcc/testsuite/gcc.target/nds32/compile/builtin-isb.c diff --git a/gcc/testsuite/gcc.target/nds32/builtin-isync.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-isync.c similarity index 100% rename from gcc/testsuite/gcc.target/nds32/builtin-isync.c rename to gcc/testsuite/gcc.target/nds32/compile/builtin-isync.c diff --git a/gcc/testsuite/gcc.target/nds32/builtin-mfsr-mtsr.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-mfsr-mtsr.c similarity index 100% rename from gcc/testsuite/gcc.target/nds32/builtin-mfsr-mtsr.c rename to gcc/testsuite/gcc.target/nds32/compile/builtin-mfsr-mtsr.c diff --git a/gcc/testsuite/gcc.target/nds32/builtin-mfusr-mtusr.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-mfusr-mtusr.c similarity index 100% rename from gcc/testsuite/gcc.target/nds32/builtin-mfusr-mtusr.c rename to gcc/testsuite/gcc.target/nds32/compile/builtin-mfusr-mtusr.c diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-set-pending.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-set-pending.c new file mode 100644 index 0000000..f10b83d --- /dev/null +++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-set-pending.c @@ -0,0 +1,10 @@ +/* { dg-do compile } */ +/* { dg-options "-O1" } */ + +#include + +int +main (void) +{ + __nds32__set_pending_swint (); +} diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-set-trig-edge.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-set-trig-edge.c new file mode 100644 index 0000000..bd8178c --- /dev/null +++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-set-trig-edge.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-O1" } */ + +#include + +void +main (void) +{ + __nds32__set_trig_type_edge (NDS32_INT_H0); + __nds32__set_trig_type_edge (NDS32_INT_H15); + __nds32__set_trig_type_edge (NDS32_INT_H16); + __nds32__set_trig_type_edge (NDS32_INT_H31); +} diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-set-trig-level.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-set-trig-level.c new file mode 100644 index 0000000..1780543 --- /dev/null +++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-set-trig-level.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-O1" } */ + +#include + +void +main (void) +{ + __nds32__set_trig_type_level (NDS32_INT_H0); + __nds32__set_trig_type_level (NDS32_INT_H15); + __nds32__set_trig_type_level (NDS32_INT_H16); + __nds32__set_trig_type_level (NDS32_INT_H31); +} diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-setgie-dis.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-setgie-dis.c new file mode 100644 index 0000000..e143d3f --- /dev/null +++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-setgie-dis.c @@ -0,0 +1,13 @@ +/* Verify that we generate setgie.d instruction with builtin function. */ + +/* { dg-do compile } */ +/* { dg-options "-O0" } */ +/* { dg-final { scan-assembler "\\tsetgie.d" } } */ + +#include + +void +test (void) +{ + __nds32__setgie_dis (); +} diff --git a/gcc/testsuite/gcc.target/nds32/compile/builtin-setgie-en.c b/gcc/testsuite/gcc.target/nds32/compile/builtin-setgie-en.c new file mode 100644 index 0000000..ed95782 --- /dev/null +++ b/gcc/testsuite/gcc.target/nds32/compile/builtin-setgie-en.c @@ -0,0 +1,13 @@ +/* Verify that we generate setgie.e instruction with builtin function. */ + +/* { dg-do compile */ +/* { dg-options "-O0" } */ +/* { dg-final { scan-assembler "\\tsetgie.e" } } */ + +#include + +void +test (void) +{ + __nds32__setgie_en (); +} diff --git a/gcc/testsuite/gcc.target/nds32/nds32.exp b/gcc/testsuite/gcc.target/nds32/nds32.exp index 44ce72d..2f1bff6 100644 --- a/gcc/testsuite/gcc.target/nds32/nds32.exp +++ b/gcc/testsuite/gcc.target/nds32/nds32.exp @@ -38,8 +38,10 @@ if ![info exists DEFAULT_CFLAGS] then { dg-init # Main loop. -dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[cS\]]] \ +dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/compile/*.\[cS\]]] \ "" $DEFAULT_CFLAGS +gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[cS\]]] \ + "" "" # All done. dg-finish diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 50665df..fbf7998 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -8767,6 +8767,7 @@ proc check_effective_target_logical_op_short_circuit {} { || [istarget avr*-*-*] || [istarget crisv32-*-*] || [istarget cris-*-*] || [istarget mmix-*-*] + || [istarget nds32*-*-*] || [istarget s390*-*-*] || [istarget powerpc*-*-*] || [istarget nios2*-*-*] diff --git a/libgcc/ChangeLog b/libgcc/ChangeLog index 138d72d..69dff41 100644 --- a/libgcc/ChangeLog +++ b/libgcc/ChangeLog @@ -1,3 +1,8 @@ +2018-04-25 Chung-Ju Wu + + * config/nds32/sfp-machine.h: Fix settings for NDS32_ABI_2FP_PLUS. + * config/nds32/t-nds32-newlib (HOST_LIBGCC2_CFLAGS): Use -fwrapv. + 2018-05-02 Release Manager * GCC 8.1.0 released. diff --git a/libgcc/config.host b/libgcc/config.host index 11b4aca..fbbc921 100644 --- a/libgcc/config.host +++ b/libgcc/config.host @@ -974,6 +974,23 @@ msp430*-*-elf) tmake_file="$tm_file t-crtstuff t-fdpbit msp430/t-msp430" extra_parts="$extra_parts libmul_none.a libmul_16.a libmul_32.a libmul_f5.a" ;; +nds32*-linux*) + # Basic makefile fragment and extra_parts for crt stuff. + # We also append c-isr library implementation. + tmake_file="${tmake_file} t-slibgcc-libgcc" + tmake_file="${tmake_file} nds32/t-nds32-glibc nds32/t-crtstuff t-softfp-sfdf t-softfp" + # The header file of defining MD_FALLBACK_FRAME_STATE_FOR. + md_unwind_header=nds32/linux-unwind.h + # Append library definition makefile fragment according to --with-nds32-lib=X setting. + case "${with_nds32_lib}" in + "" | glibc | uclibc ) + ;; + *) + echo "Cannot accept --with-nds32-lib=$with_nds32_lib, available values are: glibc uclibc" 1>&2 + exit 1 + ;; + esac + ;; nds32*-elf*) # Basic makefile fragment and extra_parts for crt stuff. # We also append c-isr library implementation. diff --git a/libgcc/config/nds32/initfini.c b/libgcc/config/nds32/initfini.c index 49ca44f..dfbcc43 100644 --- a/libgcc/config/nds32/initfini.c +++ b/libgcc/config/nds32/initfini.c @@ -25,6 +25,10 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see . */ +#include +/* Need header file for `struct object' type. */ +#include "../libgcc/unwind-dw2-fde.h" + /* Declare a pointer to void function type. */ typedef void (*func_ptr) (void); @@ -42,11 +46,59 @@ typedef void (*func_ptr) (void); refer to only the __CTOR_END__ symbol in crtfini.o and the __DTOR_LIST__ symbol in crtinit.o, where they are defined. */ -static func_ptr __CTOR_LIST__[1] __attribute__ ((section (".ctors"))) - = { (func_ptr) (-1) }; +static func_ptr __CTOR_LIST__[1] __attribute__ ((section (".ctors"), used)) + = { (func_ptr) 0 }; + +static func_ptr __DTOR_LIST__[1] __attribute__ ((section (".dtors"), used)) + = { (func_ptr) 0 }; + + +#ifdef SUPPORT_UNWINDING_DWARF2 +/* Preparation of exception handling with dwar2 mechanism registration. */ -static func_ptr __DTOR_LIST__[1] __attribute__ ((section (".dtors"))) - = { (func_ptr) (-1) }; +asm ("\n\ + .section .eh_frame,\"aw\",@progbits\n\ + .global __EH_FRAME_BEGIN__\n\ + .type __EH_FRAME_BEGIN__, @object\n\ + .align 2\n\ +__EH_FRAME_BEGIN__:\n\ + ! Beginning location of eh_frame section\n\ + .previous\n\ +"); + +extern func_ptr __EH_FRAME_BEGIN__[]; + + +/* Note that the following two functions are going to be chained into + constructor and destructor list, repectively. So these two declarations + must be placed after __CTOR_LIST__ and __DTOR_LIST. */ +extern void __nds32_register_eh(void) __attribute__((constructor, used)); +extern void __nds32_deregister_eh(void) __attribute__((destructor, used)); + +/* Register the exception handling table as the first constructor. */ +void +__nds32_register_eh (void) +{ + static struct object object; + if (__register_frame_info) + __register_frame_info (__EH_FRAME_BEGIN__, &object); +} + +/* Unregister the exception handling table as a deconstructor. */ +void +__nds32_deregister_eh (void) +{ + static int completed = 0; + + if (completed) + return; + + if (__deregister_frame_info) + __deregister_frame_info (__EH_FRAME_BEGIN__); + + completed = 1; +} +#endif /* Run all the global destructors on exit from the program. */ @@ -63,7 +115,7 @@ static func_ptr __DTOR_LIST__[1] __attribute__ ((section (".dtors"))) same particular root executable or shared library file. */ static void __do_global_dtors (void) -asm ("__do_global_dtors") __attribute__ ((section (".text"))); +asm ("__do_global_dtors") __attribute__ ((section (".text"), used)); static void __do_global_dtors (void) @@ -116,23 +168,37 @@ void *__dso_handle = 0; last, these words naturally end up at the very ends of the two lists contained in these two sections. */ -static func_ptr __CTOR_END__[1] __attribute__ ((section (".ctors"))) +static func_ptr __CTOR_END__[1] __attribute__ ((section (".ctors"), used)) = { (func_ptr) 0 }; -static func_ptr __DTOR_END__[1] __attribute__ ((section (".dtors"))) +static func_ptr __DTOR_END__[1] __attribute__ ((section (".dtors"), used)) = { (func_ptr) 0 }; +#ifdef SUPPORT_UNWINDING_DWARF2 +/* ZERO terminator in .eh_frame section. */ +asm ("\n\ + .section .eh_frame,\"aw\",@progbits\n\ + .global __EH_FRAME_END__\n\ + .type __EH_FRAME_END__, @object\n\ + .align 2\n\ +__EH_FRAME_END__:\n\ + ! End location of eh_frame section with ZERO terminator\n\ + .word 0\n\ + .previous\n\ +"); +#endif + /* Run all global constructors for the program. Note that they are run in reverse order. */ static void __do_global_ctors (void) -asm ("__do_global_ctors") __attribute__ ((section (".text"))); +asm ("__do_global_ctors") __attribute__ ((section (".text"), used)); static void __do_global_ctors (void) { func_ptr *p; - for (p = __CTOR_END__ - 1; *p != (func_ptr) -1; p--) + for (p = __CTOR_END__ - 1; *p; p--) (*p) (); } diff --git a/libgcc/config/nds32/isr-library/adj_intr_lvl.inc b/libgcc/config/nds32/isr-library/adj_intr_lvl.inc index 5cc1a6f..275e558 100644 --- a/libgcc/config/nds32/isr-library/adj_intr_lvl.inc +++ b/libgcc/config/nds32/isr-library/adj_intr_lvl.inc @@ -26,13 +26,26 @@ .macro ADJ_INTR_LVL #if defined(NDS32_NESTED) /* Nested handler. */ mfsr $r3, $PSW + /* By substracting 1 from $PSW, we can lower PSW.INTL + and enable GIE simultaneously. */ addi $r3, $r3, #-0x1 + #if __NDS32_EXT_ZOL__ || __NDS32_EXT_DSP__ + ori $r3, $r3, 0x2000 /* Set PSW.AEN(b'13) */ + #endif mtsr $r3, $PSW #elif defined(NDS32_NESTED_READY) /* Nested ready handler. */ /* Save ipc and ipsw and lower INT level. */ mfsr $r3, $PSW addi $r3, $r3, #-0x2 + #if __NDS32_EXT_ZOL__ || __NDS32_EXT_DSP__ + ori $r3, $r3, 0x2000 /* Set PSW.AEN(b'13) */ + #endif mtsr $r3, $PSW #else /* Not nested handler. */ + #if __NDS32_EXT_ZOL__ || __NDS32_EXT_DSP__ + mfsr $r3, $PSW + ori $r3, $r3, 0x2000 /* Set PSW.AEN(b'13) */ + mtsr $r3, $PSW + #endif #endif .endm diff --git a/libgcc/config/nds32/isr-library/excp_isr.S b/libgcc/config/nds32/isr-library/excp_isr.S index f24f856..6e7de5f 100644 --- a/libgcc/config/nds32/isr-library/excp_isr.S +++ b/libgcc/config/nds32/isr-library/excp_isr.S @@ -23,6 +23,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see . */ +#include "save_usr_regs.inc" #include "save_mac_regs.inc" #include "save_fpu_regs.inc" #include "save_fpu_regs_00.inc" @@ -32,35 +33,33 @@ #include "save_all.inc" #include "save_partial.inc" #include "adj_intr_lvl.inc" -#include "restore_mac_regs.inc" #include "restore_fpu_regs_00.inc" #include "restore_fpu_regs_01.inc" #include "restore_fpu_regs_02.inc" #include "restore_fpu_regs_03.inc" #include "restore_fpu_regs.inc" +#include "restore_mac_regs.inc" +#include "restore_usr_regs.inc" #include "restore_all.inc" #include "restore_partial.inc" + .section .nds32_isr, "ax" /* Put it in the section of 1st level handler. */ .align 1 -/* - First Level Handlers - 1. First Level Handlers are invokded in vector section via jump instruction - with specific names for different configurations. - 2. Naming Format: _nds32_e_SR_NT for exception handlers. - _nds32_i_SR_NT for interrupt handlers. - 2.1 All upper case letters are replaced with specific lower case letters encodings. - 2.2 SR: Saved Registers - sa: Save All regs (context) - ps: Partial Save (all caller-saved regs) - 2.3 NT: Nested Type - ns: nested - nn: not nested - nr: nested ready -*/ - -/* - This is original 16-byte vector size version. -*/ + +/* First Level Handlers + 1. First Level Handlers are invokded in vector section via jump instruction + with specific names for different configurations. + 2. Naming Format: _nds32_e_SR_NT for exception handlers. + _nds32_i_SR_NT for interrupt handlers. + 2.1 All upper case letters are replaced with specific lower case letters encodings. + 2.2 SR -- Saved Registers + sa: Save All regs (context) + ps: Partial Save (all caller-saved regs) + 2.3 NT -- Nested Type + ns: nested + nn: not nested + nr: nested ready */ + #ifdef NDS32_SAVE_ALL_REGS #if defined(NDS32_NESTED) .globl _nds32_e_sa_ns @@ -91,21 +90,26 @@ _nds32_e_ps_nn: #endif /* endif for Nest Type */ #endif /* not NDS32_SAVE_ALL_REGS */ -/* - This is 16-byte vector size version. - The vector id was restored into $r0 in vector by compiler. -*/ + +/* For 4-byte vector size version, the vector id is + extracted from $ITYPE and is set into $r0 by library. + For 16-byte vector size version, the vector id + is set into $r0 in vector section by compiler. */ + +/* Save used registers. */ #ifdef NDS32_SAVE_ALL_REGS SAVE_ALL #else SAVE_PARTIAL #endif + /* Prepare to call 2nd level handler. */ la $r2, _nds32_jmptbl_00 lw $r2, [$r2 + $r0 << #2] ADJ_INTR_LVL /* Adjust INTR level. $r3 is clobbered. */ jral $r2 - /* Restore used registers. */ + +/* Restore used registers. */ #ifdef NDS32_SAVE_ALL_REGS RESTORE_ALL #else @@ -113,6 +117,7 @@ _nds32_e_ps_nn: #endif iret + #ifdef NDS32_SAVE_ALL_REGS #if defined(NDS32_NESTED) .size _nds32_e_sa_ns, .-_nds32_e_sa_ns diff --git a/libgcc/config/nds32/isr-library/intr_isr.S b/libgcc/config/nds32/isr-library/intr_isr.S index 0431ac1..23ffa10 100644 --- a/libgcc/config/nds32/isr-library/intr_isr.S +++ b/libgcc/config/nds32/isr-library/intr_isr.S @@ -23,6 +23,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see . */ +#include "save_usr_regs.inc" #include "save_mac_regs.inc" #include "save_fpu_regs.inc" #include "save_fpu_regs_00.inc" @@ -32,35 +33,33 @@ #include "save_all.inc" #include "save_partial.inc" #include "adj_intr_lvl.inc" -#include "restore_mac_regs.inc" #include "restore_fpu_regs_00.inc" #include "restore_fpu_regs_01.inc" #include "restore_fpu_regs_02.inc" #include "restore_fpu_regs_03.inc" #include "restore_fpu_regs.inc" +#include "restore_mac_regs.inc" +#include "restore_usr_regs.inc" #include "restore_all.inc" #include "restore_partial.inc" + .section .nds32_isr, "ax" /* Put it in the section of 1st level handler. */ .align 1 -/* - First Level Handlers - 1. First Level Handlers are invokded in vector section via jump instruction - with specific names for different configurations. - 2. Naming Format: _nds32_e_SR_NT for exception handlers. - _nds32_i_SR_NT for interrupt handlers. - 2.1 All upper case letters are replaced with specific lower case letters encodings. - 2.2 SR: Saved Registers - sa: Save All regs (context) - ps: Partial Save (all caller-saved regs) - 2.3 NT: Nested Type - ns: nested - nn: not nested - nr: nested ready -*/ - -/* - This is original 16-byte vector size version. -*/ + +/* First Level Handlers + 1. First Level Handlers are invokded in vector section via jump instruction + with specific names for different configurations. + 2. Naming Format: _nds32_e_SR_NT for exception handlers. + _nds32_i_SR_NT for interrupt handlers. + 2.1 All upper case letters are replaced with specific lower case letters encodings. + 2.2 SR -- Saved Registers + sa: Save All regs (context) + ps: Partial Save (all caller-saved regs) + 2.3 NT -- Nested Type + ns: nested + nn: not nested + nr: nested ready */ + #ifdef NDS32_SAVE_ALL_REGS #if defined(NDS32_NESTED) .globl _nds32_i_sa_ns @@ -91,21 +90,36 @@ _nds32_i_ps_nn: #endif /* endif for Nest Type */ #endif /* not NDS32_SAVE_ALL_REGS */ -/* - This is 16-byte vector size version. - The vector id was restored into $r0 in vector by compiler. -*/ + +/* For 4-byte vector size version, the vector id is + extracted from $ITYPE and is set into $r0 by library. + For 16-byte vector size version, the vector id + is set into $r0 in vector section by compiler. */ + +/* Save used registers first. */ #ifdef NDS32_SAVE_ALL_REGS SAVE_ALL #else SAVE_PARTIAL #endif - /* Prepare to call 2nd level handler. */ + +/* According to vector size, we need to have different implementation. */ +#if __NDS32_ISR_VECTOR_SIZE_4__ + /* Prepare to call 2nd level handler. */ + la $r2, _nds32_jmptbl_00 + lw $r2, [$r2 + $r0 << #2] + addi $r0, $r0, #-9 /* Make interrput vector id zero-based. */ + ADJ_INTR_LVL /* Adjust INTR level. $r3 is clobbered. */ + jral $r2 +#else /* not __NDS32_ISR_VECTOR_SIZE_4__ */ + /* Prepare to call 2nd level handler. */ la $r2, _nds32_jmptbl_09 /* For zero-based vcetor id. */ lw $r2, [$r2 + $r0 << #2] ADJ_INTR_LVL /* Adjust INTR level. $r3 is clobbered. */ jral $r2 - /* Restore used registers. */ +#endif /* not __NDS32_ISR_VECTOR_SIZE_4__ */ + +/* Restore used registers. */ #ifdef NDS32_SAVE_ALL_REGS RESTORE_ALL #else @@ -113,6 +127,7 @@ _nds32_i_ps_nn: #endif iret + #ifdef NDS32_SAVE_ALL_REGS #if defined(NDS32_NESTED) .size _nds32_i_sa_ns, .-_nds32_i_sa_ns diff --git a/libgcc/config/nds32/isr-library/reset.S b/libgcc/config/nds32/isr-library/reset.S index 78abeb2..2ac247e 100644 --- a/libgcc/config/nds32/isr-library/reset.S +++ b/libgcc/config/nds32/isr-library/reset.S @@ -26,22 +26,18 @@ .section .nds32_isr, "ax" /* Put it in the section of 1st level handler. */ .align 1 .weak _SDA_BASE_ /* For reset handler only. */ - .weak _FP_BASE_ /* For reset handler only. */ .weak _nds32_init_mem /* User defined memory initialization function. */ .globl _start .globl _nds32_reset .type _nds32_reset, @function _nds32_reset: _start: -#ifdef NDS32_EXT_EX9 - .no_ex9_begin -#endif /* Handle NMI and warm boot if any of them exists. */ beqz $sp, 1f /* Reset, NMI or warm boot? */ /* Either NMI or warm boot; save all regs. */ /* Preserve registers for context-switching. */ -#ifdef __NDS32_REDUCED_REGS__ +#if __NDS32_REDUCED_REGS__ || __NDS32_REDUCE_REGS /* For 16-reg mode. */ smw.adm $r0, [$sp], $r10, #0x0 smw.adm $r15, [$sp], $r15, #0xf @@ -49,10 +45,9 @@ _start: /* For 32-reg mode. */ smw.adm $r0, [$sp], $r27, #0xf #endif -#ifdef NDS32_EXT_IFC +#if __NDS32_EXT_IFC__ mfusr $r1, $IFC_LP - smw.adm $r1, [$sp], $r2, #0x0 /* Save extra $r2 to keep - stack 8-byte alignment. */ + smw.adm $r1, [$sp], $r2, #0x0 /* Save extra $r2 to keep stack 8-byte alignment. */ #endif la $gp, _SDA_BASE_ /* Init GP for small data access. */ @@ -71,12 +66,11 @@ _start: bnez $r0, 1f /* If fail to resume, do cold boot. */ /* Restore registers for context-switching. */ -#ifdef NDS32_EXT_IFC - lmw.bim $r1, [$sp], $r2, #0x0 /* Restore extra $r2 to keep - stack 8-byte alignment. */ +#if __NDS32_EXT_IFC__ + lmw.bim $r1, [$sp], $r2, #0x0 /* Restore extra $r2 to keep stack 8-byte alignment. */ mtusr $r1, $IFC_LP #endif -#ifdef __NDS32_REDUCED_REGS__ +#if __NDS32_REDUCED_REGS__ || __NDS32_REDUCE_REGS /* For 16-reg mode. */ lmw.bim $r15, [$sp], $r15, #0xf lmw.bim $r0, [$sp], $r10, #0x0 @@ -88,6 +82,17 @@ _start: 1: /* Cold boot. */ +#if __NDS32_ISR_VECTOR_SIZE_4__ + /* With vector ID feature for v3 architecture, default vector size is 4-byte. */ + /* Set IVB.ESZ = 0 (vector table entry size = 4 bytes) */ + mfsr $r0, $IVB + li $r1, #0xc000 + or $r0, $r0, $r1 + xor $r0, $r0, $r1 + mtsr $r0, $IVB + dsb +#else + /* There is no vector ID feature, so the vector size must be 16-byte. */ /* Set IVB.ESZ = 1 (vector table entry size = 16 bytes) */ mfsr $r0, $IVB li $r1, #0xffff3fff @@ -95,36 +100,54 @@ _start: ori $r0, $r0, #0x4000 mtsr $r0, $IVB dsb +#endif la $gp, _SDA_BASE_ /* Init $gp. */ - la $fp, _FP_BASE_ /* Init $fp. */ la $sp, _stack /* Init $sp. */ -#ifdef NDS32_EXT_EX9 -/* - * Initialize the table base of EX9 instruction - * ex9 generation needs to disable before the ITB is set - */ - mfsr $r0, $MSC_CFG /* Check if HW support of EX9. */ + +#if __NDS32_EXT_EX9__ +.L_init_itb: + /* Initialization for Instruction Table Base (ITB). + The symbol _ITB_BASE_ is determined by Linker. + Set $ITB only if MSC_CFG.EIT (cr4.b'24) is set. */ + mfsr $r0, $MSC_CFG srli $r0, $r0, 24 andi $r0, $r0, 0x1 - beqz $r0, 4f /* Zero means HW does not support EX9. */ - la $r0, _ITB_BASE_ /* Init $ITB. */ + beqz $r0, 4f /* Fall through ? */ + la $r0, _ITB_BASE_ mtusr $r0, $ITB - .no_ex9_end 4: #endif - la $r15, _nds32_init_mem /* Call DRAM init. _nds32_init_mem - may written by C language. */ + +#if __NDS32_EXT_FPU_SP__ || __NDS32_EXT_FPU_DP__ +.L_init_fpu: + /* Initialize FPU + Set FUCOP_CTL.CP0EN (fucpr.b'0). */ + mfsr $r0, $FUCOP_CTL + ori $r0, $r0, 0x1 + mtsr $r0, $FUCOP_CTL + dsb + /* According to [bugzilla #9425], set flush-to-zero mode. + That is, set $FPCSR.DNZ(b'12) = 1. */ + FMFCSR $r0 + ori $r0, $r0, 0x1000 + FMTCSR $r0 + dsb +#endif + + /* Call DRAM init. _nds32_init_mem may written by C language. */ + la $r15, _nds32_init_mem beqz $r15, 6f jral $r15 6: l.w $r15, _nds32_jmptbl_00 /* Load reset handler. */ jral $r15 -/* Reset handler() should never return in a RTOS or non-OS system. - In case it does return, an exception will be generated. - This exception will be caught either by default break handler or by EDM. - Default break handle may just do an infinite loop. - EDM will notify GDB and GDB will regain control when the ID is 0x7fff. */ + + /* Reset handler() should never return in a RTOS or non-OS system. + In case it does return, an exception will be generated. + This exception will be caught either by default break handler or by EDM. + Default break handle may just do an infinite loop. + EDM will notify GDB and GDB will regain control when the ID is 0x7fff. */ 5: break #0x7fff .size _nds32_reset, .-_nds32_reset diff --git a/libgcc/config/nds32/isr-library/restore_all.inc b/libgcc/config/nds32/isr-library/restore_all.inc index 7455646..23cdf8c 100644 --- a/libgcc/config/nds32/isr-library/restore_all.inc +++ b/libgcc/config/nds32/isr-library/restore_all.inc @@ -31,15 +31,11 @@ mtsr $r2, $IPSW RESTORE_FPU_REGS RESTORE_MAC_REGS -#ifdef NDS32_EXT_IFC - lmw.bim $r1, [$sp], $r2, #0x0 /* Restore extra $r2 to keep - stack 8-byte alignment. */ - mtusr $r1, $IFC_LP -#endif -#ifdef __NDS32_REDUCED_REGS__ + RESTORE_USR_REGS +#if __NDS32_REDUCED_REGS__ || __NDS32_REDUCE_REGS lmw.bim $r0, [$sp], $r10, #0x0 /* Restore all regs. */ lmw.bim $r15, [$sp], $r15, #0xf -#else /* not __NDS32_REDUCED_REGS__ */ +#else lmw.bim $r0, [$sp], $r27, #0xf /* Restore all regs. */ #endif .endm diff --git a/libgcc/config/nds32/isr-library/restore_mac_regs.inc b/libgcc/config/nds32/isr-library/restore_mac_regs.inc index 1e6aac6..a434083 100644 --- a/libgcc/config/nds32/isr-library/restore_mac_regs.inc +++ b/libgcc/config/nds32/isr-library/restore_mac_regs.inc @@ -24,7 +24,7 @@ . */ .macro RESTORE_MAC_REGS -#ifdef NDS32_DX_REGS +#if __NDS32_DX_REGS__ lmw.bim $r1, [$sp], $r4, #0x0 mtusr $r1, $d0.lo mtusr $r2, $d0.hi diff --git a/libgcc/config/nds32/isr-library/restore_partial.inc b/libgcc/config/nds32/isr-library/restore_partial.inc index d406a99..c43ad16 100644 --- a/libgcc/config/nds32/isr-library/restore_partial.inc +++ b/libgcc/config/nds32/isr-library/restore_partial.inc @@ -31,15 +31,11 @@ mtsr $r1, $IPC /* Set IPC. */ mtsr $r2, $IPSW /* Set IPSW. */ #endif - RESTORE_FPU_REGS - RESTORE_MAC_REGS -#ifdef NDS32_EXT_IFC - lmw.bim $r1, [$sp], $r2, #0x0 /* Restore extra $r2 to keep - stack 8-byte alignment. */ - mtusr $r1, $IFC_LP -#endif + RESTORE_FPU_REGS + RESTORE_MAC_REGS + RESTORE_USR_REGS lmw.bim $r0, [$sp], $r5, #0x0 /* Restore all regs. */ -#ifdef __NDS32_REDUCED_REGS__ +#if __NDS32_REDUCED_REGS__ || __NDS32_REDUCE_REGS lmw.bim $r15, [$sp], $r15, #0x2 #else lmw.bim $r15, [$sp], $r27, #0x2 /* Restore all regs. */ diff --git a/libgcc/config/nds32/isr-library/restore_usr_regs.inc b/libgcc/config/nds32/isr-library/restore_usr_regs.inc new file mode 100644 index 0000000..9602c74 --- /dev/null +++ b/libgcc/config/nds32/isr-library/restore_usr_regs.inc @@ -0,0 +1,42 @@ +/* c-isr library stuff of Andes NDS32 cpu for GNU compiler + Copyright (C) 2012-2018 Free Software Foundation, Inc. + Contributed by Andes Technology Corporation. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +.macro RESTORE_USR_REGS +#if __NDS32_EXT_IFC__ && (__NDS32_EXT_ZOL__ || __NDS32_EXT_DSP__) + lmw.bim $r1, [$sp], $r4, #0x0 + mtusr $r1, $IFC_LP + mtusr $r2, $LB + mtusr $r3, $LE + mtusr $r4, $LC +#elif __NDS32_EXT_IFC__ + lmw.bim $r1, [$sp], $r2, #0x0 + mtusr $r1, $IFC_LP +#elif __NDS32_EXT_ZOL__ || __NDS32_EXT_DSP__ + lmw.bim $r1, [$sp], $r4, #0x0 + mtusr $r1, $LB + mtusr $r2, $LE + mtusr $r3, $LC +#endif +.endm diff --git a/libgcc/config/nds32/isr-library/save_all.inc b/libgcc/config/nds32/isr-library/save_all.inc index fa08b39..8886edb 100644 --- a/libgcc/config/nds32/isr-library/save_all.inc +++ b/libgcc/config/nds32/isr-library/save_all.inc @@ -23,45 +23,42 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see . */ -.macro SAVE_ALL_4B -#ifdef __NDS32_REDUCED_REGS__ +#if __NDS32_ISR_VECTOR_SIZE_4__ + +/* If vector size is 4-byte, we have to save registers + in the macro implementation. */ +.macro SAVE_ALL +#if __NDS32_REDUCED_REGS__ || __NDS32_REDUCE_REGS smw.adm $r15, [$sp], $r15, #0xf smw.adm $r0, [$sp], $r10, #0x0 -#else /* not __NDS32_REDUCED_REGS__ */ +#else smw.adm $r0, [$sp], $r27, #0xf -#endif /* not __NDS32_REDUCED_REGS__ */ -#ifdef NDS32_EXT_IFC - mfusr $r1, $IFC_LP - smw.adm $r1, [$sp], $r2, #0x0 /* Save extra $r2 to keep - stack 8-byte alignment. */ #endif - SAVE_MAC_REGS - SAVE_FPU_REGS + SAVE_USR_REGS + SAVE_MAC_REGS + SAVE_FPU_REGS mfsr $r1, $IPC /* Get IPC. */ mfsr $r2, $IPSW /* Get IPSW. */ smw.adm $r1, [$sp], $r2, #0x0 /* Push IPC, IPSW. */ move $r1, $sp /* $r1 is ptr to NDS32_CONTEXT. */ mfsr $r0, $ITYPE /* Get VID to $r0. */ srli $r0, $r0, #5 -#ifdef __NDS32_ISA_V2__ andi $r0, $r0, #127 -#else - fexti33 $r0, #6 -#endif .endm +#else /* not __NDS32_ISR_VECTOR_SIZE_4__ */ + +/* If vector size is 16-byte, some works can be done in + the vector section generated by compiler, so that we + can implement less in the macro. */ .macro SAVE_ALL -/* SAVE_REG_TBL code has been moved to - vector table generated by compiler. */ -#ifdef NDS32_EXT_IFC - mfusr $r1, $IFC_LP - smw.adm $r1, [$sp], $r2, #0x0 /* Save extra $r2 to keep - stack 8-byte alignment. */ -#endif - SAVE_MAC_REGS - SAVE_FPU_REGS + SAVE_USR_REGS + SAVE_MAC_REGS + SAVE_FPU_REGS mfsr $r1, $IPC /* Get IPC. */ mfsr $r2, $IPSW /* Get IPSW. */ smw.adm $r1, [$sp], $r2, #0x0 /* Push IPC, IPSW. */ move $r1, $sp /* $r1 is ptr to NDS32_CONTEXT. */ .endm + +#endif /* not __NDS32_ISR_VECTOR_SIZE_4__ */ diff --git a/libgcc/config/nds32/isr-library/save_mac_regs.inc b/libgcc/config/nds32/isr-library/save_mac_regs.inc index ff120e8..a6a9230 100644 --- a/libgcc/config/nds32/isr-library/save_mac_regs.inc +++ b/libgcc/config/nds32/isr-library/save_mac_regs.inc @@ -24,7 +24,7 @@ . */ .macro SAVE_MAC_REGS -#ifdef NDS32_DX_REGS +#if __NDS32_DX_REGS__ mfusr $r1, $d0.lo mfusr $r2, $d0.hi mfusr $r3, $d1.lo diff --git a/libgcc/config/nds32/isr-library/save_partial.inc b/libgcc/config/nds32/isr-library/save_partial.inc index 2445e48..c81ebaa 100644 --- a/libgcc/config/nds32/isr-library/save_partial.inc +++ b/libgcc/config/nds32/isr-library/save_partial.inc @@ -23,20 +23,20 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see . */ -.macro SAVE_PARTIAL_4B -#ifdef __NDS32_REDUCED_REGS__ +#if __NDS32_ISR_VECTOR_SIZE_4__ + +/* If vector size is 4-byte, we have to save registers + in the macro implementation. */ +.macro SAVE_PARTIAL +#if __NDS32_REDUCED_REGS__ || __NDS32_REDUCE_REGS smw.adm $r15, [$sp], $r15, #0x2 -#else /* not __NDS32_REDUCED_REGS__ */ +#else smw.adm $r15, [$sp], $r27, #0x2 -#endif /* not __NDS32_REDUCED_REGS__ */ - smw.adm $r0, [$sp], $r5, #0x0 -#ifdef NDS32_EXT_IFC - mfusr $r1, $IFC_LP - smw.adm $r1, [$sp], $r2, #0x0 /* Save extra $r2 to keep - stack 8-byte alignment. */ #endif - SAVE_MAC_REGS - SAVE_FPU_REGS + smw.adm $r0, [$sp], $r5, #0x0 + SAVE_USR_REGS + SAVE_MAC_REGS + SAVE_FPU_REGS #if defined(NDS32_NESTED) || defined(NDS32_NESTED_READY) mfsr $r1, $IPC /* Get IPC. */ mfsr $r2, $IPSW /* Get IPSW. */ @@ -44,26 +44,24 @@ #endif mfsr $r0, $ITYPE /* Get VID to $r0. */ srli $r0, $r0, #5 -#ifdef __NDS32_ISA_V2__ andi $r0, $r0, #127 -#else - fexti33 $r0, #6 -#endif .endm +#else /* not __NDS32_ISR_VECTOR_SIZE_4__ */ + +/* If vector size is 16-byte, some works can be done in + the vector section generated by compiler, so that we + can implement less in the macro. */ + .macro SAVE_PARTIAL -/* SAVE_CALLER_REGS code has been moved to - vector table generated by compiler. */ -#ifdef NDS32_EXT_IFC - mfusr $r1, $IFC_LP - smw.adm $r1, [$sp], $r2, #0x0 /* Save extra $r2 to keep - stack 8-byte alignment. */ -#endif - SAVE_MAC_REGS - SAVE_FPU_REGS + SAVE_USR_REGS + SAVE_MAC_REGS + SAVE_FPU_REGS #if defined(NDS32_NESTED) || defined(NDS32_NESTED_READY) mfsr $r1, $IPC /* Get IPC. */ mfsr $r2, $IPSW /* Get IPSW. */ smw.adm $r1, [$sp], $r2, #0x0 /* Push IPC, IPSW. */ #endif .endm + +#endif /* not __NDS32_ISR_VECTOR_SIZE_4__ */ diff --git a/libgcc/config/nds32/isr-library/save_usr_regs.inc b/libgcc/config/nds32/isr-library/save_usr_regs.inc new file mode 100644 index 0000000..5a3f618 --- /dev/null +++ b/libgcc/config/nds32/isr-library/save_usr_regs.inc @@ -0,0 +1,44 @@ +/* c-isr library stuff of Andes NDS32 cpu for GNU compiler + Copyright (C) 2012-2018 Free Software Foundation, Inc. + Contributed by Andes Technology Corporation. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +.macro SAVE_USR_REGS +/* Store User Special Registers according to supported ISA extension + !!! WATCH OUT !!! Take care of 8-byte alignment issue. */ +#if __NDS32_EXT_IFC__ && (__NDS32_EXT_ZOL__ || __NDS32_EXT_DSP__) + mfusr $r1, $IFC_LP + mfusr $r2, $LB + mfusr $r3, $LE + mfusr $r4, $LC + smw.adm $r1, [$sp], $r4, #0x0 /* Save even. Ok! */ +#elif __NDS32_EXT_IFC__ + mfusr $r1, $IFC_LP + smw.adm $r1, [$sp], $r2, #0x0 /* Save extra $r2 to keep stack 8-byte aligned. */ +#elif (__NDS32_EXT_ZOL__ || __NDS32_EXT_DSP__) + mfusr $r1, $LB + mfusr $r2, $LE + mfusr $r3, $LC + smw.adm $r1, [$sp], $r4, #0x0 /* Save extra $r4 to keep stack 8-byte aligned. */ +#endif +.endm diff --git a/libgcc/config/nds32/isr-library/vec_vid00.S b/libgcc/config/nds32/isr-library/vec_vid00.S index b2a645c..643009e 100644 --- a/libgcc/config/nds32/isr-library/vec_vid00.S +++ b/libgcc/config/nds32/isr-library/vec_vid00.S @@ -24,8 +24,15 @@ . */ .section .nds32_vector.00, "ax" +#if __NDS32_ISR_VECTOR_SIZE_4__ + /* The vector size is default 4-byte for v3 architecture. */ + .vec_size 4 + .align 2 +#else + /* The vector size is default 16-byte for other architectures. */ .vec_size 16 .align 4 +#endif .weak _nds32_vector_00 .type _nds32_vector_00, @function _nds32_vector_00: diff --git a/libgcc/config/nds32/isr-library/vec_vid01.S b/libgcc/config/nds32/isr-library/vec_vid01.S index 9e796c7..fd9bc8b 100644 --- a/libgcc/config/nds32/isr-library/vec_vid01.S +++ b/libgcc/config/nds32/isr-library/vec_vid01.S @@ -24,8 +24,15 @@ . */ .section .nds32_vector.01, "ax" +#if __NDS32_ISR_VECTOR_SIZE_4__ + /* The vector size is default 4-byte for v3 architecture. */ + .vec_size 4 + .align 2 +#else + /* The vector size is default 16-byte for other architectures. */ .vec_size 16 .align 4 +#endif .weak _nds32_vector_01 .type _nds32_vector_01, @function _nds32_vector_01: diff --git a/libgcc/config/nds32/isr-library/vec_vid02.S b/libgcc/config/nds32/isr-library/vec_vid02.S index a6b34b7..c5a8843 100644 --- a/libgcc/config/nds32/isr-library/vec_vid02.S +++ b/libgcc/config/nds32/isr-library/vec_vid02.S @@ -24,8 +24,15 @@ . */ .section .nds32_vector.02, "ax" +#if __NDS32_ISR_VECTOR_SIZE_4__ + /* The vector size is default 4-byte for v3 architecture. */ + .vec_size 4 + .align 2 +#else + /* The vector size is default 16-byte for other architectures. */ .vec_size 16 .align 4 +#endif .weak _nds32_vector_02 .type _nds32_vector_02, @function _nds32_vector_02: diff --git a/libgcc/config/nds32/isr-library/vec_vid03.S b/libgcc/config/nds32/isr-library/vec_vid03.S index 680f6d9..7f11fb9 100644 --- a/libgcc/config/nds32/isr-library/vec_vid03.S +++ b/libgcc/config/nds32/isr-library/vec_vid03.S @@ -24,8 +24,15 @@ . */ .section .nds32_vector.03, "ax" +#if __NDS32_ISR_VECTOR_SIZE_4__ + /* The vector size is default 4-byte for v3 architecture. */ + .vec_size 4 + .align 2 +#else + /* The vector size is default 16-byte for other architectures. */ .vec_size 16 .align 4 +#endif .weak _nds32_vector_03 .type _nds32_vector_03, @function _nds32_vector_03: diff --git a/libgcc/config/nds32/isr-library/vec_vid04.S b/libgcc/config/nds32/isr-library/vec_vid04.S index f0b616c..de2e249 100644 --- a/libgcc/config/nds32/isr-library/vec_vid04.S +++ b/libgcc/config/nds32/isr-library/vec_vid04.S @@ -24,8 +24,15 @@ . */ .section .nds32_vector.04, "ax" +#if __NDS32_ISR_VECTOR_SIZE_4__ + /* The vector size is default 4-byte for v3 architecture. */ + .vec_size 4 + .align 2 +#else + /* The vector size is default 16-byte for other architectures. */ .vec_size 16 .align 4 +#endif .weak _nds32_vector_04 .type _nds32_vector_04, @function _nds32_vector_04: diff --git a/libgcc/config/nds32/isr-library/vec_vid05.S b/libgcc/config/nds32/isr-library/vec_vid05.S index 47cbcea..62e1cda 100644 --- a/libgcc/config/nds32/isr-library/vec_vid05.S +++ b/libgcc/config/nds32/isr-library/vec_vid05.S @@ -24,8 +24,15 @@ . */ .section .nds32_vector.05, "ax" +#if __NDS32_ISR_VECTOR_SIZE_4__ + /* The vector size is default 4-byte for v3 architecture. */ + .vec_size 4 + .align 2 +#else + /* The vector size is default 16-byte for other architectures. */ .vec_size 16 .align 4 +#endif .weak _nds32_vector_05 .type _nds32_vector_05, @function _nds32_vector_05: diff --git a/libgcc/config/nds32/isr-library/vec_vid06.S b/libgcc/config/nds32/isr-library/vec_vid06.S index 851836c..e41a60c 100644 --- a/libgcc/config/nds32/isr-library/vec_vid06.S +++ b/libgcc/config/nds32/isr-library/vec_vid06.S @@ -24,8 +24,15 @@ . */ .section .nds32_vector.06, "ax" +#if __NDS32_ISR_VECTOR_SIZE_4__ + /* The vector size is default 4-byte for v3 architecture. */ + .vec_size 4 + .align 2 +#else + /* The vector size is default 16-byte for other architectures. */ .vec_size 16 .align 4 +#endif .weak _nds32_vector_06 .type _nds32_vector_06, @function _nds32_vector_06: diff --git a/libgcc/config/nds32/isr-library/vec_vid07.S b/libgcc/config/nds32/isr-library/vec_vid07.S index 664ee0c..b5447a8 100644 --- a/libgcc/config/nds32/isr-library/vec_vid07.S +++ b/libgcc/config/nds32/isr-library/vec_vid07.S @@ -24,8 +24,15 @@ . */ .section .nds32_vector.07, "ax" +#if __NDS32_ISR_VECTOR_SIZE_4__ + /* The vector size is default 4-byte for v3 architecture. */ + .vec_size 4 + .align 2 +#else + /* The vector size is default 16-byte for other architectures. */ .vec_size 16 .align 4 +#endif .weak _nds32_vector_07 .type _nds32_vector_07, @function _nds32_vector_07: diff --git a/libgcc/config/nds32/isr-library/vec_vid08.S b/libgcc/config/nds32/isr-library/vec_vid08.S index 1b5534c..2c07dd3 100644 --- a/libgcc/config/nds32/isr-library/vec_vid08.S +++ b/libgcc/config/nds32/isr-library/vec_vid08.S @@ -24,8 +24,15 @@ . */ .section .nds32_vector.08, "ax" +#if __NDS32_ISR_VECTOR_SIZE_4__ + /* The vector size is default 4-byte for v3 architecture. */ + .vec_size 4 + .align 2 +#else + /* The vector size is default 16-byte for other architectures. */ .vec_size 16 .align 4 +#endif .weak _nds32_vector_08 .type _nds32_vector_08, @function _nds32_vector_08: diff --git a/libgcc/config/nds32/isr-library/vec_vid09.S b/libgcc/config/nds32/isr-library/vec_vid09.S index 81a5675..e858cea 100644 --- a/libgcc/config/nds32/isr-library/vec_vid09.S +++ b/libgcc/config/nds32/isr-library/vec_vid09.S @@ -24,8 +24,15 @@ . */ .section .nds32_vector.09, "ax" +#if __NDS32_ISR_VECTOR_SIZE_4__ + /* The vector size is default 4-byte for v3 architecture. */ + .vec_size 4 + .align 2 +#else + /* The vector size is default 16-byte for other architectures. */ .vec_size 16 .align 4 +#endif .weak _nds32_vector_09 .type _nds32_vector_09, @function _nds32_vector_09: diff --git a/libgcc/config/nds32/isr-library/vec_vid10.S b/libgcc/config/nds32/isr-library/vec_vid10.S index 102f7cf..e8bbc0b 100644 --- a/libgcc/config/nds32/isr-library/vec_vid10.S +++ b/libgcc/config/nds32/isr-library/vec_vid10.S @@ -24,8 +24,15 @@ . */ .section .nds32_vector.10, "ax" +#if __NDS32_ISR_VECTOR_SIZE_4__ + /* The vector size is default 4-byte for v3 architecture. */ + .vec_size 4 + .align 2 +#else + /* The vector size is default 16-byte for other architectures. */ .vec_size 16 .align 4 +#endif .weak _nds32_vector_10 .type _nds32_vector_10, @function _nds32_vector_10: diff --git a/libgcc/config/nds32/isr-library/vec_vid11.S b/libgcc/config/nds32/isr-library/vec_vid11.S index ade2ee5..92aebb4 100644 --- a/libgcc/config/nds32/isr-library/vec_vid11.S +++ b/libgcc/config/nds32/isr-library/vec_vid11.S @@ -24,8 +24,15 @@ . */ .section .nds32_vector.11, "ax" +#if __NDS32_ISR_VECTOR_SIZE_4__ + /* The vector size is default 4-byte for v3 architecture. */ + .vec_size 4 + .align 2 +#else + /* The vector size is default 16-byte for other architectures. */ .vec_size 16 .align 4 +#endif .weak _nds32_vector_11 .type _nds32_vector_11, @function _nds32_vector_11: diff --git a/libgcc/config/nds32/isr-library/vec_vid12.S b/libgcc/config/nds32/isr-library/vec_vid12.S index a595811..6fd050a 100644 --- a/libgcc/config/nds32/isr-library/vec_vid12.S +++ b/libgcc/config/nds32/isr-library/vec_vid12.S @@ -24,8 +24,15 @@ . */ .section .nds32_vector.12, "ax" +#if __NDS32_ISR_VECTOR_SIZE_4__ + /* The vector size is default 4-byte for v3 architecture. */ + .vec_size 4 + .align 2 +#else + /* The vector size is default 16-byte for other architectures. */ .vec_size 16 .align 4 +#endif .weak _nds32_vector_12 .type _nds32_vector_12, @function _nds32_vector_12: diff --git a/libgcc/config/nds32/isr-library/vec_vid13.S b/libgcc/config/nds32/isr-library/vec_vid13.S index 55863be..0a45c45 100644 --- a/libgcc/config/nds32/isr-library/vec_vid13.S +++ b/libgcc/config/nds32/isr-library/vec_vid13.S @@ -24,8 +24,15 @@ . */ .section .nds32_vector.13, "ax" +#if __NDS32_ISR_VECTOR_SIZE_4__ + /* The vector size is default 4-byte for v3 architecture. */ + .vec_size 4 + .align 2 +#else + /* The vector size is default 16-byte for other architectures. */ .vec_size 16 .align 4 +#endif .weak _nds32_vector_13 .type _nds32_vector_13, @function _nds32_vector_13: diff --git a/libgcc/config/nds32/isr-library/vec_vid14.S b/libgcc/config/nds32/isr-library/vec_vid14.S index abe7f42..837b848 100644 --- a/libgcc/config/nds32/isr-library/vec_vid14.S +++ b/libgcc/config/nds32/isr-library/vec_vid14.S @@ -24,8 +24,15 @@ . */ .section .nds32_vector.14, "ax" +#if __NDS32_ISR_VECTOR_SIZE_4__ + /* The vector size is default 4-byte for v3 architecture. */ + .vec_size 4 + .align 2 +#else + /* The vector size is default 16-byte for other architectures. */ .vec_size 16 .align 4 +#endif .weak _nds32_vector_14 .type _nds32_vector_14, @function _nds32_vector_14: diff --git a/libgcc/config/nds32/isr-library/vec_vid15.S b/libgcc/config/nds32/isr-library/vec_vid15.S index 890819f..c639aa4 100644 --- a/libgcc/config/nds32/isr-library/vec_vid15.S +++ b/libgcc/config/nds32/isr-library/vec_vid15.S @@ -24,8 +24,15 @@ . */ .section .nds32_vector.15, "ax" +#if __NDS32_ISR_VECTOR_SIZE_4__ + /* The vector size is default 4-byte for v3 architecture. */ + .vec_size 4 + .align 2 +#else + /* The vector size is default 16-byte for other architectures. */ .vec_size 16 .align 4 +#endif .weak _nds32_vector_15 .type _nds32_vector_15, @function _nds32_vector_15: diff --git a/libgcc/config/nds32/isr-library/vec_vid16.S b/libgcc/config/nds32/isr-library/vec_vid16.S index 20db625..a762130 100644 --- a/libgcc/config/nds32/isr-library/vec_vid16.S +++ b/libgcc/config/nds32/isr-library/vec_vid16.S @@ -24,8 +24,15 @@ . */ .section .nds32_vector.16, "ax" +#if __NDS32_ISR_VECTOR_SIZE_4__ + /* The vector size is default 4-byte for v3 architecture. */ + .vec_size 4 + .align 2 +#else + /* The vector size is default 16-byte for other architectures. */ .vec_size 16 .align 4 +#endif .weak _nds32_vector_16 .type _nds32_vector_16, @function _nds32_vector_16: diff --git a/libgcc/config/nds32/isr-library/vec_vid17.S b/libgcc/config/nds32/isr-library/vec_vid17.S index c1ca9f6..b17681f 100644 --- a/libgcc/config/nds32/isr-library/vec_vid17.S +++ b/libgcc/config/nds32/isr-library/vec_vid17.S @@ -24,8 +24,15 @@ . */ .section .nds32_vector.17, "ax" +#if __NDS32_ISR_VECTOR_SIZE_4__ + /* The vector size is default 4-byte for v3 architecture. */ + .vec_size 4 + .align 2 +#else + /* The vector size is default 16-byte for other architectures. */ .vec_size 16 .align 4 +#endif .weak _nds32_vector_17 .type _nds32_vector_17, @function _nds32_vector_17: diff --git a/libgcc/config/nds32/isr-library/vec_vid18.S b/libgcc/config/nds32/isr-library/vec_vid18.S index ef4cbee..4166fa1 100644 --- a/libgcc/config/nds32/isr-library/vec_vid18.S +++ b/libgcc/config/nds32/isr-library/vec_vid18.S @@ -24,8 +24,15 @@ . */ .section .nds32_vector.18, "ax" +#if __NDS32_ISR_VECTOR_SIZE_4__ + /* The vector size is default 4-byte for v3 architecture. */ + .vec_size 4 + .align 2 +#else + /* The vector size is default 16-byte for other architectures. */ .vec_size 16 .align 4 +#endif .weak _nds32_vector_18 .type _nds32_vector_18, @function _nds32_vector_18: diff --git a/libgcc/config/nds32/isr-library/vec_vid19.S b/libgcc/config/nds32/isr-library/vec_vid19.S index 5efab98..0d7d1de 100644 --- a/libgcc/config/nds32/isr-library/vec_vid19.S +++ b/libgcc/config/nds32/isr-library/vec_vid19.S @@ -24,8 +24,15 @@ . */ .section .nds32_vector.19, "ax" +#if __NDS32_ISR_VECTOR_SIZE_4__ + /* The vector size is default 4-byte for v3 architecture. */ + .vec_size 4 + .align 2 +#else + /* The vector size is default 16-byte for other architectures. */ .vec_size 16 .align 4 +#endif .weak _nds32_vector_19 .type _nds32_vector_19, @function _nds32_vector_19: diff --git a/libgcc/config/nds32/isr-library/vec_vid20.S b/libgcc/config/nds32/isr-library/vec_vid20.S index 95e1247..d39d74b 100644 --- a/libgcc/config/nds32/isr-library/vec_vid20.S +++ b/libgcc/config/nds32/isr-library/vec_vid20.S @@ -24,8 +24,15 @@ . */ .section .nds32_vector.20, "ax" +#if __NDS32_ISR_VECTOR_SIZE_4__ + /* The vector size is default 4-byte for v3 architecture. */ + .vec_size 4 + .align 2 +#else + /* The vector size is default 16-byte for other architectures. */ .vec_size 16 .align 4 +#endif .weak _nds32_vector_20 .type _nds32_vector_20, @function _nds32_vector_20: diff --git a/libgcc/config/nds32/isr-library/vec_vid21.S b/libgcc/config/nds32/isr-library/vec_vid21.S index f3f401e..deff0cf 100644 --- a/libgcc/config/nds32/isr-library/vec_vid21.S +++ b/libgcc/config/nds32/isr-library/vec_vid21.S @@ -24,8 +24,15 @@ . */ .section .nds32_vector.21, "ax" +#if __NDS32_ISR_VECTOR_SIZE_4__ + /* The vector size is default 4-byte for v3 architecture. */ + .vec_size 4 + .align 2 +#else + /* The vector size is default 16-byte for other architectures. */ .vec_size 16 .align 4 +#endif .weak _nds32_vector_21 .type _nds32_vector_21, @function _nds32_vector_21: diff --git a/libgcc/config/nds32/isr-library/vec_vid22.S b/libgcc/config/nds32/isr-library/vec_vid22.S index 28d0d99..ebd3891 100644 --- a/libgcc/config/nds32/isr-library/vec_vid22.S +++ b/libgcc/config/nds32/isr-library/vec_vid22.S @@ -24,8 +24,15 @@ . */ .section .nds32_vector.22, "ax" +#if __NDS32_ISR_VECTOR_SIZE_4__ + /* The vector size is default 4-byte for v3 architecture. */ + .vec_size 4 + .align 2 +#else + /* The vector size is default 16-byte for other architectures. */ .vec_size 16 .align 4 +#endif .weak _nds32_vector_22 .type _nds32_vector_22, @function _nds32_vector_22: diff --git a/libgcc/config/nds32/isr-library/vec_vid23.S b/libgcc/config/nds32/isr-library/vec_vid23.S index a824629..90562e7 100644 --- a/libgcc/config/nds32/isr-library/vec_vid23.S +++ b/libgcc/config/nds32/isr-library/vec_vid23.S @@ -24,8 +24,15 @@ . */ .section .nds32_vector.23, "ax" +#if __NDS32_ISR_VECTOR_SIZE_4__ + /* The vector size is default 4-byte for v3 architecture. */ + .vec_size 4 + .align 2 +#else + /* The vector size is default 16-byte for other architectures. */ .vec_size 16 .align 4 +#endif .weak _nds32_vector_23 .type _nds32_vector_23, @function _nds32_vector_23: diff --git a/libgcc/config/nds32/isr-library/vec_vid24.S b/libgcc/config/nds32/isr-library/vec_vid24.S index 2c0e2d8..7bd344c 100644 --- a/libgcc/config/nds32/isr-library/vec_vid24.S +++ b/libgcc/config/nds32/isr-library/vec_vid24.S @@ -24,8 +24,15 @@ . */ .section .nds32_vector.24, "ax" +#if __NDS32_ISR_VECTOR_SIZE_4__ + /* The vector size is default 4-byte for v3 architecture. */ + .vec_size 4 + .align 2 +#else + /* The vector size is default 16-byte for other architectures. */ .vec_size 16 .align 4 +#endif .weak _nds32_vector_24 .type _nds32_vector_24, @function _nds32_vector_24: diff --git a/libgcc/config/nds32/isr-library/vec_vid25.S b/libgcc/config/nds32/isr-library/vec_vid25.S index 56f7886..245db6e 100644 --- a/libgcc/config/nds32/isr-library/vec_vid25.S +++ b/libgcc/config/nds32/isr-library/vec_vid25.S @@ -24,8 +24,15 @@ . */ .section .nds32_vector.25, "ax" +#if __NDS32_ISR_VECTOR_SIZE_4__ + /* The vector size is default 4-byte for v3 architecture. */ + .vec_size 4 + .align 2 +#else + /* The vector size is default 16-byte for other architectures. */ .vec_size 16 .align 4 +#endif .weak _nds32_vector_25 .type _nds32_vector_25, @function _nds32_vector_25: diff --git a/libgcc/config/nds32/isr-library/vec_vid26.S b/libgcc/config/nds32/isr-library/vec_vid26.S index b02163e..4df61ff 100644 --- a/libgcc/config/nds32/isr-library/vec_vid26.S +++ b/libgcc/config/nds32/isr-library/vec_vid26.S @@ -24,8 +24,15 @@ . */ .section .nds32_vector.26, "ax" +#if __NDS32_ISR_VECTOR_SIZE_4__ + /* The vector size is default 4-byte for v3 architecture. */ + .vec_size 4 + .align 2 +#else + /* The vector size is default 16-byte for other architectures. */ .vec_size 16 .align 4 +#endif .weak _nds32_vector_26 .type _nds32_vector_26, @function _nds32_vector_26: diff --git a/libgcc/config/nds32/isr-library/vec_vid27.S b/libgcc/config/nds32/isr-library/vec_vid27.S index 276d1f0..50960db 100644 --- a/libgcc/config/nds32/isr-library/vec_vid27.S +++ b/libgcc/config/nds32/isr-library/vec_vid27.S @@ -24,8 +24,15 @@ . */ .section .nds32_vector.27, "ax" +#if __NDS32_ISR_VECTOR_SIZE_4__ + /* The vector size is default 4-byte for v3 architecture. */ + .vec_size 4 + .align 2 +#else + /* The vector size is default 16-byte for other architectures. */ .vec_size 16 .align 4 +#endif .weak _nds32_vector_27 .type _nds32_vector_27, @function _nds32_vector_27: diff --git a/libgcc/config/nds32/isr-library/vec_vid28.S b/libgcc/config/nds32/isr-library/vec_vid28.S index 59e8cc2..e44adbb 100644 --- a/libgcc/config/nds32/isr-library/vec_vid28.S +++ b/libgcc/config/nds32/isr-library/vec_vid28.S @@ -24,8 +24,15 @@ . */ .section .nds32_vector.28, "ax" +#if __NDS32_ISR_VECTOR_SIZE_4__ + /* The vector size is default 4-byte for v3 architecture. */ + .vec_size 4 + .align 2 +#else + /* The vector size is default 16-byte for other architectures. */ .vec_size 16 .align 4 +#endif .weak _nds32_vector_28 .type _nds32_vector_28, @function _nds32_vector_28: diff --git a/libgcc/config/nds32/isr-library/vec_vid29.S b/libgcc/config/nds32/isr-library/vec_vid29.S index 7119e25..f7e6c77 100644 --- a/libgcc/config/nds32/isr-library/vec_vid29.S +++ b/libgcc/config/nds32/isr-library/vec_vid29.S @@ -24,8 +24,15 @@ . */ .section .nds32_vector.29, "ax" +#if __NDS32_ISR_VECTOR_SIZE_4__ + /* The vector size is default 4-byte for v3 architecture. */ + .vec_size 4 + .align 2 +#else + /* The vector size is default 16-byte for other architectures. */ .vec_size 16 .align 4 +#endif .weak _nds32_vector_29 .type _nds32_vector_29, @function _nds32_vector_29: diff --git a/libgcc/config/nds32/isr-library/vec_vid30.S b/libgcc/config/nds32/isr-library/vec_vid30.S index 7c7bd5f..7fac25d 100644 --- a/libgcc/config/nds32/isr-library/vec_vid30.S +++ b/libgcc/config/nds32/isr-library/vec_vid30.S @@ -24,8 +24,15 @@ . */ .section .nds32_vector.30, "ax" +#if __NDS32_ISR_VECTOR_SIZE_4__ + /* The vector size is default 4-byte for v3 architecture. */ + .vec_size 4 + .align 2 +#else + /* The vector size is default 16-byte for other architectures. */ .vec_size 16 .align 4 +#endif .weak _nds32_vector_30 .type _nds32_vector_30, @function _nds32_vector_30: diff --git a/libgcc/config/nds32/isr-library/vec_vid31.S b/libgcc/config/nds32/isr-library/vec_vid31.S index bd29e03..5857765 100644 --- a/libgcc/config/nds32/isr-library/vec_vid31.S +++ b/libgcc/config/nds32/isr-library/vec_vid31.S @@ -24,8 +24,15 @@ . */ .section .nds32_vector.31, "ax" +#if __NDS32_ISR_VECTOR_SIZE_4__ + /* The vector size is default 4-byte for v3 architecture. */ + .vec_size 4 + .align 2 +#else + /* The vector size is default 16-byte for other architectures. */ .vec_size 16 .align 4 +#endif .weak _nds32_vector_31 .type _nds32_vector_31, @function _nds32_vector_31: diff --git a/libgcc/config/nds32/isr-library/vec_vid32.S b/libgcc/config/nds32/isr-library/vec_vid32.S index 57b8db0..bcd5dbf 100644 --- a/libgcc/config/nds32/isr-library/vec_vid32.S +++ b/libgcc/config/nds32/isr-library/vec_vid32.S @@ -24,8 +24,15 @@ . */ .section .nds32_vector.32, "ax" +#if __NDS32_ISR_VECTOR_SIZE_4__ + /* The vector size is default 4-byte for v3 architecture. */ + .vec_size 4 + .align 2 +#else + /* The vector size is default 16-byte for other architectures. */ .vec_size 16 .align 4 +#endif .weak _nds32_vector_32 .type _nds32_vector_32, @function _nds32_vector_32: diff --git a/libgcc/config/nds32/isr-library/vec_vid33.S b/libgcc/config/nds32/isr-library/vec_vid33.S index 609735e..abfed4e 100644 --- a/libgcc/config/nds32/isr-library/vec_vid33.S +++ b/libgcc/config/nds32/isr-library/vec_vid33.S @@ -24,8 +24,15 @@ . */ .section .nds32_vector.33, "ax" +#if __NDS32_ISR_VECTOR_SIZE_4__ + /* The vector size is default 4-byte for v3 architecture. */ + .vec_size 4 + .align 2 +#else + /* The vector size is default 16-byte for other architectures. */ .vec_size 16 .align 4 +#endif .weak _nds32_vector_33 .type _nds32_vector_33, @function _nds32_vector_33: diff --git a/libgcc/config/nds32/isr-library/vec_vid34.S b/libgcc/config/nds32/isr-library/vec_vid34.S index 2a91328..f9446bb 100644 --- a/libgcc/config/nds32/isr-library/vec_vid34.S +++ b/libgcc/config/nds32/isr-library/vec_vid34.S @@ -24,8 +24,15 @@ . */ .section .nds32_vector.34, "ax" +#if __NDS32_ISR_VECTOR_SIZE_4__ + /* The vector size is default 4-byte for v3 architecture. */ + .vec_size 4 + .align 2 +#else + /* The vector size is default 16-byte for other architectures. */ .vec_size 16 .align 4 +#endif .weak _nds32_vector_34 .type _nds32_vector_34, @function _nds32_vector_34: diff --git a/libgcc/config/nds32/isr-library/vec_vid35.S b/libgcc/config/nds32/isr-library/vec_vid35.S index 65dd081..8862137 100644 --- a/libgcc/config/nds32/isr-library/vec_vid35.S +++ b/libgcc/config/nds32/isr-library/vec_vid35.S @@ -24,8 +24,15 @@ . */ .section .nds32_vector.35, "ax" +#if __NDS32_ISR_VECTOR_SIZE_4__ + /* The vector size is default 4-byte for v3 architecture. */ + .vec_size 4 + .align 2 +#else + /* The vector size is default 16-byte for other architectures. */ .vec_size 16 .align 4 +#endif .weak _nds32_vector_35 .type _nds32_vector_35, @function _nds32_vector_35: diff --git a/libgcc/config/nds32/isr-library/vec_vid36.S b/libgcc/config/nds32/isr-library/vec_vid36.S index fa47b8e..dbcbbf4 100644 --- a/libgcc/config/nds32/isr-library/vec_vid36.S +++ b/libgcc/config/nds32/isr-library/vec_vid36.S @@ -24,8 +24,15 @@ . */ .section .nds32_vector.36, "ax" +#if __NDS32_ISR_VECTOR_SIZE_4__ + /* The vector size is default 4-byte for v3 architecture. */ + .vec_size 4 + .align 2 +#else + /* The vector size is default 16-byte for other architectures. */ .vec_size 16 .align 4 +#endif .weak _nds32_vector_36 .type _nds32_vector_36, @function _nds32_vector_36: diff --git a/libgcc/config/nds32/isr-library/vec_vid37.S b/libgcc/config/nds32/isr-library/vec_vid37.S index ece8456..392f18b 100644 --- a/libgcc/config/nds32/isr-library/vec_vid37.S +++ b/libgcc/config/nds32/isr-library/vec_vid37.S @@ -24,8 +24,15 @@ . */ .section .nds32_vector.37, "ax" +#if __NDS32_ISR_VECTOR_SIZE_4__ + /* The vector size is default 4-byte for v3 architecture. */ + .vec_size 4 + .align 2 +#else + /* The vector size is default 16-byte for other architectures. */ .vec_size 16 .align 4 +#endif .weak _nds32_vector_37 .type _nds32_vector_37, @function _nds32_vector_37: diff --git a/libgcc/config/nds32/isr-library/vec_vid38.S b/libgcc/config/nds32/isr-library/vec_vid38.S index c4a12f5..efe6619 100644 --- a/libgcc/config/nds32/isr-library/vec_vid38.S +++ b/libgcc/config/nds32/isr-library/vec_vid38.S @@ -24,8 +24,15 @@ . */ .section .nds32_vector.38, "ax" +#if __NDS32_ISR_VECTOR_SIZE_4__ + /* The vector size is default 4-byte for v3 architecture. */ + .vec_size 4 + .align 2 +#else + /* The vector size is default 16-byte for other architectures. */ .vec_size 16 .align 4 +#endif .weak _nds32_vector_38 .type _nds32_vector_38, @function _nds32_vector_38: diff --git a/libgcc/config/nds32/isr-library/vec_vid39.S b/libgcc/config/nds32/isr-library/vec_vid39.S index b3e56ed..238c43a 100644 --- a/libgcc/config/nds32/isr-library/vec_vid39.S +++ b/libgcc/config/nds32/isr-library/vec_vid39.S @@ -24,8 +24,15 @@ . */ .section .nds32_vector.39, "ax" +#if __NDS32_ISR_VECTOR_SIZE_4__ + /* The vector size is default 4-byte for v3 architecture. */ + .vec_size 4 + .align 2 +#else + /* The vector size is default 16-byte for other architectures. */ .vec_size 16 .align 4 +#endif .weak _nds32_vector_39 .type _nds32_vector_39, @function _nds32_vector_39: diff --git a/libgcc/config/nds32/isr-library/vec_vid40.S b/libgcc/config/nds32/isr-library/vec_vid40.S index 01364aa..cf3eaa2 100644 --- a/libgcc/config/nds32/isr-library/vec_vid40.S +++ b/libgcc/config/nds32/isr-library/vec_vid40.S @@ -24,8 +24,15 @@ . */ .section .nds32_vector.40, "ax" +#if __NDS32_ISR_VECTOR_SIZE_4__ + /* The vector size is default 4-byte for v3 architecture. */ + .vec_size 4 + .align 2 +#else + /* The vector size is default 16-byte for other architectures. */ .vec_size 16 .align 4 +#endif .weak _nds32_vector_40 .type _nds32_vector_40, @function _nds32_vector_40: diff --git a/libgcc/config/nds32/isr-library/vec_vid41.S b/libgcc/config/nds32/isr-library/vec_vid41.S index f20beec..27b7aac 100644 --- a/libgcc/config/nds32/isr-library/vec_vid41.S +++ b/libgcc/config/nds32/isr-library/vec_vid41.S @@ -24,8 +24,15 @@ . */ .section .nds32_vector.41, "ax" +#if __NDS32_ISR_VECTOR_SIZE_4__ + /* The vector size is default 4-byte for v3 architecture. */ + .vec_size 4 + .align 2 +#else + /* The vector size is default 16-byte for other architectures. */ .vec_size 16 .align 4 +#endif .weak _nds32_vector_41 .type _nds32_vector_41, @function _nds32_vector_41: diff --git a/libgcc/config/nds32/isr-library/vec_vid42.S b/libgcc/config/nds32/isr-library/vec_vid42.S index 6c29f1f..bfeed46 100644 --- a/libgcc/config/nds32/isr-library/vec_vid42.S +++ b/libgcc/config/nds32/isr-library/vec_vid42.S @@ -24,8 +24,15 @@ . */ .section .nds32_vector.42, "ax" +#if __NDS32_ISR_VECTOR_SIZE_4__ + /* The vector size is default 4-byte for v3 architecture. */ + .vec_size 4 + .align 2 +#else + /* The vector size is default 16-byte for other architectures. */ .vec_size 16 .align 4 +#endif .weak _nds32_vector_42 .type _nds32_vector_42, @function _nds32_vector_42: diff --git a/libgcc/config/nds32/isr-library/vec_vid43.S b/libgcc/config/nds32/isr-library/vec_vid43.S index 8767f99..54640c9 100644 --- a/libgcc/config/nds32/isr-library/vec_vid43.S +++ b/libgcc/config/nds32/isr-library/vec_vid43.S @@ -24,8 +24,15 @@ . */ .section .nds32_vector.43, "ax" +#if __NDS32_ISR_VECTOR_SIZE_4__ + /* The vector size is default 4-byte for v3 architecture. */ + .vec_size 4 + .align 2 +#else + /* The vector size is default 16-byte for other architectures. */ .vec_size 16 .align 4 +#endif .weak _nds32_vector_43 .type _nds32_vector_43, @function _nds32_vector_43: diff --git a/libgcc/config/nds32/isr-library/vec_vid44.S b/libgcc/config/nds32/isr-library/vec_vid44.S index 8b6f53d..f617243 100644 --- a/libgcc/config/nds32/isr-library/vec_vid44.S +++ b/libgcc/config/nds32/isr-library/vec_vid44.S @@ -24,8 +24,15 @@ . */ .section .nds32_vector.44, "ax" +#if __NDS32_ISR_VECTOR_SIZE_4__ + /* The vector size is default 4-byte for v3 architecture. */ + .vec_size 4 + .align 2 +#else + /* The vector size is default 16-byte for other architectures. */ .vec_size 16 .align 4 +#endif .weak _nds32_vector_44 .type _nds32_vector_44, @function _nds32_vector_44: diff --git a/libgcc/config/nds32/isr-library/vec_vid45.S b/libgcc/config/nds32/isr-library/vec_vid45.S index 52e344b..2cfeb78 100644 --- a/libgcc/config/nds32/isr-library/vec_vid45.S +++ b/libgcc/config/nds32/isr-library/vec_vid45.S @@ -24,8 +24,15 @@ . */ .section .nds32_vector.45, "ax" +#if __NDS32_ISR_VECTOR_SIZE_4__ + /* The vector size is default 4-byte for v3 architecture. */ + .vec_size 4 + .align 2 +#else + /* The vector size is default 16-byte for other architectures. */ .vec_size 16 .align 4 +#endif .weak _nds32_vector_45 .type _nds32_vector_45, @function _nds32_vector_45: diff --git a/libgcc/config/nds32/isr-library/vec_vid46.S b/libgcc/config/nds32/isr-library/vec_vid46.S index f9dc0d1..45c8847 100644 --- a/libgcc/config/nds32/isr-library/vec_vid46.S +++ b/libgcc/config/nds32/isr-library/vec_vid46.S @@ -24,8 +24,15 @@ . */ .section .nds32_vector.46, "ax" +#if __NDS32_ISR_VECTOR_SIZE_4__ + /* The vector size is default 4-byte for v3 architecture. */ + .vec_size 4 + .align 2 +#else + /* The vector size is default 16-byte for other architectures. */ .vec_size 16 .align 4 +#endif .weak _nds32_vector_46 .type _nds32_vector_46, @function _nds32_vector_46: diff --git a/libgcc/config/nds32/isr-library/vec_vid47.S b/libgcc/config/nds32/isr-library/vec_vid47.S index 436e7e3..25469e4 100644 --- a/libgcc/config/nds32/isr-library/vec_vid47.S +++ b/libgcc/config/nds32/isr-library/vec_vid47.S @@ -24,8 +24,15 @@ . */ .section .nds32_vector.47, "ax" +#if __NDS32_ISR_VECTOR_SIZE_4__ + /* The vector size is default 4-byte for v3 architecture. */ + .vec_size 4 + .align 2 +#else + /* The vector size is default 16-byte for other architectures. */ .vec_size 16 .align 4 +#endif .weak _nds32_vector_47 .type _nds32_vector_47, @function _nds32_vector_47: diff --git a/libgcc/config/nds32/isr-library/vec_vid48.S b/libgcc/config/nds32/isr-library/vec_vid48.S index 219dfd4..5a00119 100644 --- a/libgcc/config/nds32/isr-library/vec_vid48.S +++ b/libgcc/config/nds32/isr-library/vec_vid48.S @@ -24,8 +24,15 @@ . */ .section .nds32_vector.48, "ax" +#if __NDS32_ISR_VECTOR_SIZE_4__ + /* The vector size is default 4-byte for v3 architecture. */ + .vec_size 4 + .align 2 +#else + /* The vector size is default 16-byte for other architectures. */ .vec_size 16 .align 4 +#endif .weak _nds32_vector_48 .type _nds32_vector_48, @function _nds32_vector_48: diff --git a/libgcc/config/nds32/isr-library/vec_vid49.S b/libgcc/config/nds32/isr-library/vec_vid49.S index e3ba753..dfe11f1 100644 --- a/libgcc/config/nds32/isr-library/vec_vid49.S +++ b/libgcc/config/nds32/isr-library/vec_vid49.S @@ -24,8 +24,15 @@ . */ .section .nds32_vector.49, "ax" +#if __NDS32_ISR_VECTOR_SIZE_4__ + /* The vector size is default 4-byte for v3 architecture. */ + .vec_size 4 + .align 2 +#else + /* The vector size is default 16-byte for other architectures. */ .vec_size 16 .align 4 +#endif .weak _nds32_vector_49 .type _nds32_vector_49, @function _nds32_vector_49: diff --git a/libgcc/config/nds32/isr-library/vec_vid50.S b/libgcc/config/nds32/isr-library/vec_vid50.S index b0b3fc2..0dacd26 100644 --- a/libgcc/config/nds32/isr-library/vec_vid50.S +++ b/libgcc/config/nds32/isr-library/vec_vid50.S @@ -24,8 +24,15 @@ . */ .section .nds32_vector.50, "ax" +#if __NDS32_ISR_VECTOR_SIZE_4__ + /* The vector size is default 4-byte for v3 architecture. */ + .vec_size 4 + .align 2 +#else + /* The vector size is default 16-byte for other architectures. */ .vec_size 16 .align 4 +#endif .weak _nds32_vector_50 .type _nds32_vector_50, @function _nds32_vector_50: diff --git a/libgcc/config/nds32/isr-library/vec_vid51.S b/libgcc/config/nds32/isr-library/vec_vid51.S index bf3011d..5ab28ef 100644 --- a/libgcc/config/nds32/isr-library/vec_vid51.S +++ b/libgcc/config/nds32/isr-library/vec_vid51.S @@ -24,8 +24,15 @@ . */ .section .nds32_vector.51, "ax" +#if __NDS32_ISR_VECTOR_SIZE_4__ + /* The vector size is default 4-byte for v3 architecture. */ + .vec_size 4 + .align 2 +#else + /* The vector size is default 16-byte for other architectures. */ .vec_size 16 .align 4 +#endif .weak _nds32_vector_51 .type _nds32_vector_51, @function _nds32_vector_51: diff --git a/libgcc/config/nds32/isr-library/vec_vid52.S b/libgcc/config/nds32/isr-library/vec_vid52.S index eaf5f14..ed00f40 100644 --- a/libgcc/config/nds32/isr-library/vec_vid52.S +++ b/libgcc/config/nds32/isr-library/vec_vid52.S @@ -24,8 +24,15 @@ . */ .section .nds32_vector.52, "ax" +#if __NDS32_ISR_VECTOR_SIZE_4__ + /* The vector size is default 4-byte for v3 architecture. */ + .vec_size 4 + .align 2 +#else + /* The vector size is default 16-byte for other architectures. */ .vec_size 16 .align 4 +#endif .weak _nds32_vector_52 .type _nds32_vector_52, @function _nds32_vector_52: diff --git a/libgcc/config/nds32/isr-library/vec_vid53.S b/libgcc/config/nds32/isr-library/vec_vid53.S index 3f92e56..564cadb 100644 --- a/libgcc/config/nds32/isr-library/vec_vid53.S +++ b/libgcc/config/nds32/isr-library/vec_vid53.S @@ -24,8 +24,15 @@ . */ .section .nds32_vector.53, "ax" +#if __NDS32_ISR_VECTOR_SIZE_4__ + /* The vector size is default 4-byte for v3 architecture. */ + .vec_size 4 + .align 2 +#else + /* The vector size is default 16-byte for other architectures. */ .vec_size 16 .align 4 +#endif .weak _nds32_vector_53 .type _nds32_vector_53, @function _nds32_vector_53: diff --git a/libgcc/config/nds32/isr-library/vec_vid54.S b/libgcc/config/nds32/isr-library/vec_vid54.S index f22793f..377c524 100644 --- a/libgcc/config/nds32/isr-library/vec_vid54.S +++ b/libgcc/config/nds32/isr-library/vec_vid54.S @@ -24,8 +24,15 @@ . */ .section .nds32_vector.54, "ax" +#if __NDS32_ISR_VECTOR_SIZE_4__ + /* The vector size is default 4-byte for v3 architecture. */ + .vec_size 4 + .align 2 +#else + /* The vector size is default 16-byte for other architectures. */ .vec_size 16 .align 4 +#endif .weak _nds32_vector_54 .type _nds32_vector_54, @function _nds32_vector_54: diff --git a/libgcc/config/nds32/isr-library/vec_vid55.S b/libgcc/config/nds32/isr-library/vec_vid55.S index 1017130..497252a 100644 --- a/libgcc/config/nds32/isr-library/vec_vid55.S +++ b/libgcc/config/nds32/isr-library/vec_vid55.S @@ -24,8 +24,15 @@ . */ .section .nds32_vector.55, "ax" +#if __NDS32_ISR_VECTOR_SIZE_4__ + /* The vector size is default 4-byte for v3 architecture. */ + .vec_size 4 + .align 2 +#else + /* The vector size is default 16-byte for other architectures. */ .vec_size 16 .align 4 +#endif .weak _nds32_vector_55 .type _nds32_vector_55, @function _nds32_vector_55: diff --git a/libgcc/config/nds32/isr-library/vec_vid56.S b/libgcc/config/nds32/isr-library/vec_vid56.S index a0923e9..b62534b 100644 --- a/libgcc/config/nds32/isr-library/vec_vid56.S +++ b/libgcc/config/nds32/isr-library/vec_vid56.S @@ -24,8 +24,15 @@ . */ .section .nds32_vector.56, "ax" +#if __NDS32_ISR_VECTOR_SIZE_4__ + /* The vector size is default 4-byte for v3 architecture. */ + .vec_size 4 + .align 2 +#else + /* The vector size is default 16-byte for other architectures. */ .vec_size 16 .align 4 +#endif .weak _nds32_vector_56 .type _nds32_vector_56, @function _nds32_vector_56: diff --git a/libgcc/config/nds32/isr-library/vec_vid57.S b/libgcc/config/nds32/isr-library/vec_vid57.S index e711b89..b1bb42d 100644 --- a/libgcc/config/nds32/isr-library/vec_vid57.S +++ b/libgcc/config/nds32/isr-library/vec_vid57.S @@ -24,8 +24,15 @@ . */ .section .nds32_vector.57, "ax" +#if __NDS32_ISR_VECTOR_SIZE_4__ + /* The vector size is default 4-byte for v3 architecture. */ + .vec_size 4 + .align 2 +#else + /* The vector size is default 16-byte for other architectures. */ .vec_size 16 .align 4 +#endif .weak _nds32_vector_57 .type _nds32_vector_57, @function _nds32_vector_57: diff --git a/libgcc/config/nds32/isr-library/vec_vid58.S b/libgcc/config/nds32/isr-library/vec_vid58.S index f8d9064..14595a5 100644 --- a/libgcc/config/nds32/isr-library/vec_vid58.S +++ b/libgcc/config/nds32/isr-library/vec_vid58.S @@ -24,8 +24,15 @@ . */ .section .nds32_vector.58, "ax" +#if __NDS32_ISR_VECTOR_SIZE_4__ + /* The vector size is default 4-byte for v3 architecture. */ + .vec_size 4 + .align 2 +#else + /* The vector size is default 16-byte for other architectures. */ .vec_size 16 .align 4 +#endif .weak _nds32_vector_58 .type _nds32_vector_58, @function _nds32_vector_58: diff --git a/libgcc/config/nds32/isr-library/vec_vid59.S b/libgcc/config/nds32/isr-library/vec_vid59.S index 58fb6e6..e5be177 100644 --- a/libgcc/config/nds32/isr-library/vec_vid59.S +++ b/libgcc/config/nds32/isr-library/vec_vid59.S @@ -24,8 +24,15 @@ . */ .section .nds32_vector.59, "ax" +#if __NDS32_ISR_VECTOR_SIZE_4__ + /* The vector size is default 4-byte for v3 architecture. */ + .vec_size 4 + .align 2 +#else + /* The vector size is default 16-byte for other architectures. */ .vec_size 16 .align 4 +#endif .weak _nds32_vector_59 .type _nds32_vector_59, @function _nds32_vector_59: diff --git a/libgcc/config/nds32/isr-library/vec_vid60.S b/libgcc/config/nds32/isr-library/vec_vid60.S index 94aa6e0..f6df971 100644 --- a/libgcc/config/nds32/isr-library/vec_vid60.S +++ b/libgcc/config/nds32/isr-library/vec_vid60.S @@ -24,8 +24,15 @@ . */ .section .nds32_vector.60, "ax" +#if __NDS32_ISR_VECTOR_SIZE_4__ + /* The vector size is default 4-byte for v3 architecture. */ + .vec_size 4 + .align 2 +#else + /* The vector size is default 16-byte for other architectures. */ .vec_size 16 .align 4 +#endif .weak _nds32_vector_60 .type _nds32_vector_60, @function _nds32_vector_60: diff --git a/libgcc/config/nds32/isr-library/vec_vid61.S b/libgcc/config/nds32/isr-library/vec_vid61.S index 869f6c8..4f97b04 100644 --- a/libgcc/config/nds32/isr-library/vec_vid61.S +++ b/libgcc/config/nds32/isr-library/vec_vid61.S @@ -24,8 +24,15 @@ . */ .section .nds32_vector.61, "ax" +#if __NDS32_ISR_VECTOR_SIZE_4__ + /* The vector size is default 4-byte for v3 architecture. */ + .vec_size 4 + .align 2 +#else + /* The vector size is default 16-byte for other architectures. */ .vec_size 16 .align 4 +#endif .weak _nds32_vector_61 .type _nds32_vector_61, @function _nds32_vector_61: diff --git a/libgcc/config/nds32/isr-library/vec_vid62.S b/libgcc/config/nds32/isr-library/vec_vid62.S index acc846c..08d1bbb 100644 --- a/libgcc/config/nds32/isr-library/vec_vid62.S +++ b/libgcc/config/nds32/isr-library/vec_vid62.S @@ -24,8 +24,15 @@ . */ .section .nds32_vector.62, "ax" +#if __NDS32_ISR_VECTOR_SIZE_4__ + /* The vector size is default 4-byte for v3 architecture. */ + .vec_size 4 + .align 2 +#else + /* The vector size is default 16-byte for other architectures. */ .vec_size 16 .align 4 +#endif .weak _nds32_vector_62 .type _nds32_vector_62, @function _nds32_vector_62: diff --git a/libgcc/config/nds32/isr-library/vec_vid63.S b/libgcc/config/nds32/isr-library/vec_vid63.S index d0727ec..2b2068c 100644 --- a/libgcc/config/nds32/isr-library/vec_vid63.S +++ b/libgcc/config/nds32/isr-library/vec_vid63.S @@ -24,8 +24,15 @@ . */ .section .nds32_vector.63, "ax" +#if __NDS32_ISR_VECTOR_SIZE_4__ + /* The vector size is default 4-byte for v3 architecture. */ + .vec_size 4 + .align 2 +#else + /* The vector size is default 16-byte for other architectures. */ .vec_size 16 .align 4 +#endif .weak _nds32_vector_63 .type _nds32_vector_63, @function _nds32_vector_63: diff --git a/libgcc/config/nds32/isr-library/vec_vid64.S b/libgcc/config/nds32/isr-library/vec_vid64.S index cb1659a..2c06ea0 100644 --- a/libgcc/config/nds32/isr-library/vec_vid64.S +++ b/libgcc/config/nds32/isr-library/vec_vid64.S @@ -24,8 +24,15 @@ . */ .section .nds32_vector.64, "ax" +#if __NDS32_ISR_VECTOR_SIZE_4__ + /* The vector size is default 4-byte for v3 architecture. */ + .vec_size 4 + .align 2 +#else + /* The vector size is default 16-byte for other architectures. */ .vec_size 16 .align 4 +#endif .weak _nds32_vector_64 .type _nds32_vector_64, @function _nds32_vector_64: diff --git a/libgcc/config/nds32/isr-library/vec_vid65.S b/libgcc/config/nds32/isr-library/vec_vid65.S index da46481..d2359fd 100644 --- a/libgcc/config/nds32/isr-library/vec_vid65.S +++ b/libgcc/config/nds32/isr-library/vec_vid65.S @@ -24,8 +24,15 @@ . */ .section .nds32_vector.65, "ax" +#if __NDS32_ISR_VECTOR_SIZE_4__ + /* The vector size is default 4-byte for v3 architecture. */ + .vec_size 4 + .align 2 +#else + /* The vector size is default 16-byte for other architectures. */ .vec_size 16 .align 4 +#endif .weak _nds32_vector_65 .type _nds32_vector_65, @function _nds32_vector_65: diff --git a/libgcc/config/nds32/isr-library/vec_vid66.S b/libgcc/config/nds32/isr-library/vec_vid66.S index a8c18b8..69ccf36 100644 --- a/libgcc/config/nds32/isr-library/vec_vid66.S +++ b/libgcc/config/nds32/isr-library/vec_vid66.S @@ -24,8 +24,15 @@ . */ .section .nds32_vector.66, "ax" +#if __NDS32_ISR_VECTOR_SIZE_4__ + /* The vector size is default 4-byte for v3 architecture. */ + .vec_size 4 + .align 2 +#else + /* The vector size is default 16-byte for other architectures. */ .vec_size 16 .align 4 +#endif .weak _nds32_vector_66 .type _nds32_vector_66, @function _nds32_vector_66: diff --git a/libgcc/config/nds32/isr-library/vec_vid67.S b/libgcc/config/nds32/isr-library/vec_vid67.S index d2996a3..78a68cb 100644 --- a/libgcc/config/nds32/isr-library/vec_vid67.S +++ b/libgcc/config/nds32/isr-library/vec_vid67.S @@ -24,8 +24,15 @@ . */ .section .nds32_vector.67, "ax" +#if __NDS32_ISR_VECTOR_SIZE_4__ + /* The vector size is default 4-byte for v3 architecture. */ + .vec_size 4 + .align 2 +#else + /* The vector size is default 16-byte for other architectures. */ .vec_size 16 .align 4 +#endif .weak _nds32_vector_67 .type _nds32_vector_67, @function _nds32_vector_67: diff --git a/libgcc/config/nds32/isr-library/vec_vid68.S b/libgcc/config/nds32/isr-library/vec_vid68.S index 0c9de86..a120ec3 100644 --- a/libgcc/config/nds32/isr-library/vec_vid68.S +++ b/libgcc/config/nds32/isr-library/vec_vid68.S @@ -24,8 +24,15 @@ . */ .section .nds32_vector.68, "ax" +#if __NDS32_ISR_VECTOR_SIZE_4__ + /* The vector size is default 4-byte for v3 architecture. */ + .vec_size 4 + .align 2 +#else + /* The vector size is default 16-byte for other architectures. */ .vec_size 16 .align 4 +#endif .weak _nds32_vector_68 .type _nds32_vector_68, @function _nds32_vector_68: diff --git a/libgcc/config/nds32/isr-library/vec_vid69.S b/libgcc/config/nds32/isr-library/vec_vid69.S index 43cf748..e2bdd5f 100644 --- a/libgcc/config/nds32/isr-library/vec_vid69.S +++ b/libgcc/config/nds32/isr-library/vec_vid69.S @@ -24,8 +24,15 @@ . */ .section .nds32_vector.69, "ax" +#if __NDS32_ISR_VECTOR_SIZE_4__ + /* The vector size is default 4-byte for v3 architecture. */ + .vec_size 4 + .align 2 +#else + /* The vector size is default 16-byte for other architectures. */ .vec_size 16 .align 4 +#endif .weak _nds32_vector_69 .type _nds32_vector_69, @function _nds32_vector_69: diff --git a/libgcc/config/nds32/isr-library/vec_vid70.S b/libgcc/config/nds32/isr-library/vec_vid70.S index aba3e6a..a5ac1f3 100644 --- a/libgcc/config/nds32/isr-library/vec_vid70.S +++ b/libgcc/config/nds32/isr-library/vec_vid70.S @@ -24,8 +24,15 @@ . */ .section .nds32_vector.70, "ax" +#if __NDS32_ISR_VECTOR_SIZE_4__ + /* The vector size is default 4-byte for v3 architecture. */ + .vec_size 4 + .align 2 +#else + /* The vector size is default 16-byte for other architectures. */ .vec_size 16 .align 4 +#endif .weak _nds32_vector_70 .type _nds32_vector_70, @function _nds32_vector_70: diff --git a/libgcc/config/nds32/isr-library/vec_vid71.S b/libgcc/config/nds32/isr-library/vec_vid71.S index be8aaa5..06ed89c 100644 --- a/libgcc/config/nds32/isr-library/vec_vid71.S +++ b/libgcc/config/nds32/isr-library/vec_vid71.S @@ -24,8 +24,15 @@ . */ .section .nds32_vector.71, "ax" +#if __NDS32_ISR_VECTOR_SIZE_4__ + /* The vector size is default 4-byte for v3 architecture. */ + .vec_size 4 + .align 2 +#else + /* The vector size is default 16-byte for other architectures. */ .vec_size 16 .align 4 +#endif .weak _nds32_vector_71 .type _nds32_vector_71, @function _nds32_vector_71: diff --git a/libgcc/config/nds32/isr-library/vec_vid72.S b/libgcc/config/nds32/isr-library/vec_vid72.S index 041c895..2163201b 100644 --- a/libgcc/config/nds32/isr-library/vec_vid72.S +++ b/libgcc/config/nds32/isr-library/vec_vid72.S @@ -24,8 +24,15 @@ . */ .section .nds32_vector.72, "ax" +#if __NDS32_ISR_VECTOR_SIZE_4__ + /* The vector size is default 4-byte for v3 architecture. */ + .vec_size 4 + .align 2 +#else + /* The vector size is default 16-byte for other architectures. */ .vec_size 16 .align 4 +#endif .weak _nds32_vector_72 .type _nds32_vector_72, @function _nds32_vector_72: diff --git a/libgcc/config/nds32/linux-atomic.c b/libgcc/config/nds32/linux-atomic.c new file mode 100644 index 0000000..6da7be9 --- /dev/null +++ b/libgcc/config/nds32/linux-atomic.c @@ -0,0 +1,282 @@ +/* Linux-specific atomic operations for NDS32 Linux. + Copyright (C) 2012-2018 Free Software Foundation, Inc. + +This file is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3, or (at your option) any +later version. + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +/* We implement byte, short and int versions of each atomic operation + using the kernel helper defined below. There is no support for + 64-bit operations yet. */ + +/* This function copy form NDS32 Linux-kernal. */ +static inline int +__kernel_cmpxchg (int oldval, int newval, int *mem) +{ + int temp1, temp2, temp3, offset; + + asm volatile ("msync\tall\n" + "movi\t%0, #0\n" + "1:\n" + "\tllw\t%1, [%4+%0]\n" + "\tsub\t%3, %1, %6\n" + "\tcmovz\t%2, %5, %3\n" + "\tcmovn\t%2, %1, %3\n" + "\tscw\t%2, [%4+%0]\n" + "\tbeqz\t%2, 1b\n" + : "=&r" (offset), "=&r" (temp3), "=&r" (temp2), "=&r" (temp1) + : "r" (mem), "r" (newval), "r" (oldval) : "memory"); + + return temp1; +} + +#define HIDDEN __attribute__ ((visibility ("hidden"))) + +#ifdef __NDS32_EL__ +#define INVERT_MASK_1 0 +#define INVERT_MASK_2 0 +#else +#define INVERT_MASK_1 24 +#define INVERT_MASK_2 16 +#endif + +#define MASK_1 0xffu +#define MASK_2 0xffffu + +#define FETCH_AND_OP_WORD(OP, PFX_OP, INF_OP) \ + int HIDDEN \ + __sync_fetch_and_##OP##_4 (int *ptr, int val) \ + { \ + int failure, tmp; \ + \ + do { \ + tmp = __atomic_load_n (ptr, __ATOMIC_SEQ_CST); \ + failure = __kernel_cmpxchg (tmp, PFX_OP (tmp INF_OP val), ptr); \ + } while (failure != 0); \ + \ + return tmp; \ + } + +FETCH_AND_OP_WORD (add, , +) +FETCH_AND_OP_WORD (sub, , -) +FETCH_AND_OP_WORD (or, , |) +FETCH_AND_OP_WORD (and, , &) +FETCH_AND_OP_WORD (xor, , ^) +FETCH_AND_OP_WORD (nand, ~, &) + +#define NAME_oldval(OP, WIDTH) __sync_fetch_and_##OP##_##WIDTH +#define NAME_newval(OP, WIDTH) __sync_##OP##_and_fetch_##WIDTH + +/* Implement both __sync__and_fetch and __sync_fetch_and_ for + subword-sized quantities. */ + +#define SUBWORD_SYNC_OP(OP, PFX_OP, INF_OP, TYPE, WIDTH, RETURN) \ + TYPE HIDDEN \ + NAME##_##RETURN (OP, WIDTH) (TYPE *ptr, TYPE val) \ + { \ + int *wordptr = (int *) ((unsigned long) ptr & ~3); \ + unsigned int mask, shift, oldval, newval; \ + int failure; \ + \ + shift = (((unsigned long) ptr & 3) << 3) ^ INVERT_MASK_##WIDTH; \ + mask = MASK_##WIDTH << shift; \ + \ + do { \ + oldval = __atomic_load_n (wordptr, __ATOMIC_SEQ_CST); \ + newval = ((PFX_OP (((oldval & mask) >> shift) \ + INF_OP (unsigned int) val)) << shift) & mask; \ + newval |= oldval & ~mask; \ + failure = __kernel_cmpxchg (oldval, newval, wordptr); \ + } while (failure != 0); \ + \ + return (RETURN & mask) >> shift; \ + } + + +SUBWORD_SYNC_OP (add, , +, unsigned short, 2, oldval) +SUBWORD_SYNC_OP (sub, , -, unsigned short, 2, oldval) +SUBWORD_SYNC_OP (or, , |, unsigned short, 2, oldval) +SUBWORD_SYNC_OP (and, , &, unsigned short, 2, oldval) +SUBWORD_SYNC_OP (xor, , ^, unsigned short, 2, oldval) +SUBWORD_SYNC_OP (nand, ~, &, unsigned short, 2, oldval) + +SUBWORD_SYNC_OP (add, , +, unsigned char, 1, oldval) +SUBWORD_SYNC_OP (sub, , -, unsigned char, 1, oldval) +SUBWORD_SYNC_OP (or, , |, unsigned char, 1, oldval) +SUBWORD_SYNC_OP (and, , &, unsigned char, 1, oldval) +SUBWORD_SYNC_OP (xor, , ^, unsigned char, 1, oldval) +SUBWORD_SYNC_OP (nand, ~, &, unsigned char, 1, oldval) + +#define OP_AND_FETCH_WORD(OP, PFX_OP, INF_OP) \ + int HIDDEN \ + __sync_##OP##_and_fetch_4 (int *ptr, int val) \ + { \ + int tmp, failure; \ + \ + do { \ + tmp = __atomic_load_n (ptr, __ATOMIC_SEQ_CST); \ + failure = __kernel_cmpxchg (tmp, PFX_OP (tmp INF_OP val), ptr); \ + } while (failure != 0); \ + \ + return PFX_OP (tmp INF_OP val); \ + } + +OP_AND_FETCH_WORD (add, , +) +OP_AND_FETCH_WORD (sub, , -) +OP_AND_FETCH_WORD (or, , |) +OP_AND_FETCH_WORD (and, , &) +OP_AND_FETCH_WORD (xor, , ^) +OP_AND_FETCH_WORD (nand, ~, &) + +SUBWORD_SYNC_OP (add, , +, unsigned short, 2, newval) +SUBWORD_SYNC_OP (sub, , -, unsigned short, 2, newval) +SUBWORD_SYNC_OP (or, , |, unsigned short, 2, newval) +SUBWORD_SYNC_OP (and, , &, unsigned short, 2, newval) +SUBWORD_SYNC_OP (xor, , ^, unsigned short, 2, newval) +SUBWORD_SYNC_OP (nand, ~, &, unsigned short, 2, newval) + +SUBWORD_SYNC_OP (add, , +, unsigned char, 1, newval) +SUBWORD_SYNC_OP (sub, , -, unsigned char, 1, newval) +SUBWORD_SYNC_OP (or, , |, unsigned char, 1, newval) +SUBWORD_SYNC_OP (and, , &, unsigned char, 1, newval) +SUBWORD_SYNC_OP (xor, , ^, unsigned char, 1, newval) +SUBWORD_SYNC_OP (nand, ~, &, unsigned char, 1, newval) + +int HIDDEN +__sync_val_compare_and_swap_4 (int *ptr, int oldval, int newval) +{ + int actual_oldval, fail; + + while (1) + { + actual_oldval = __atomic_load_n (ptr, __ATOMIC_SEQ_CST); + + if (oldval != actual_oldval) + return actual_oldval; + + fail = __kernel_cmpxchg (actual_oldval, newval, ptr); + + if (!fail) + return oldval; + } +} + +#define SUBWORD_VAL_CAS(TYPE, WIDTH) \ + TYPE HIDDEN \ + __sync_val_compare_and_swap_##WIDTH (TYPE *ptr, TYPE oldval, \ + TYPE newval) \ + { \ + int *wordptr = (int *)((unsigned long) ptr & ~3), fail; \ + unsigned int mask, shift, actual_oldval, actual_newval; \ + \ + shift = (((unsigned long) ptr & 3) << 3) ^ INVERT_MASK_##WIDTH; \ + mask = MASK_##WIDTH << shift; \ + \ + while (1) \ + { \ + actual_oldval = __atomic_load_n (wordptr, __ATOMIC_SEQ_CST); \ + \ + if (((actual_oldval & mask) >> shift) != (unsigned int) oldval) \ + return (actual_oldval & mask) >> shift; \ + \ + actual_newval = (actual_oldval & ~mask) \ + | (((unsigned int) newval << shift) & mask); \ + \ + fail = __kernel_cmpxchg (actual_oldval, actual_newval, \ + wordptr); \ + \ + if (!fail) \ + return oldval; \ + } \ + } + +SUBWORD_VAL_CAS (unsigned short, 2) +SUBWORD_VAL_CAS (unsigned char, 1) + +typedef unsigned char bool; + +bool HIDDEN +__sync_bool_compare_and_swap_4 (int *ptr, int oldval, int newval) +{ + int failure = __kernel_cmpxchg (oldval, newval, ptr); + return (failure == 0); +} + +#define SUBWORD_BOOL_CAS(TYPE, WIDTH) \ + bool HIDDEN \ + __sync_bool_compare_and_swap_##WIDTH (TYPE *ptr, TYPE oldval, \ + TYPE newval) \ + { \ + TYPE actual_oldval \ + = __sync_val_compare_and_swap_##WIDTH (ptr, oldval, newval); \ + return (oldval == actual_oldval); \ + } + +SUBWORD_BOOL_CAS (unsigned short, 2) +SUBWORD_BOOL_CAS (unsigned char, 1) + +int HIDDEN +__sync_lock_test_and_set_4 (int *ptr, int val) +{ + int failure, oldval; + + do { + oldval = __atomic_load_n (ptr, __ATOMIC_SEQ_CST); + failure = __kernel_cmpxchg (oldval, val, ptr); + } while (failure != 0); + + return oldval; +} + +#define SUBWORD_TEST_AND_SET(TYPE, WIDTH) \ + TYPE HIDDEN \ + __sync_lock_test_and_set_##WIDTH (TYPE *ptr, TYPE val) \ + { \ + int failure; \ + unsigned int oldval, newval, shift, mask; \ + int *wordptr = (int *) ((unsigned long) ptr & ~3); \ + \ + shift = (((unsigned long) ptr & 3) << 3) ^ INVERT_MASK_##WIDTH; \ + mask = MASK_##WIDTH << shift; \ + \ + do { \ + oldval = __atomic_load_n (wordptr, __ATOMIC_SEQ_CST); \ + newval = (oldval & ~mask) \ + | (((unsigned int) val << shift) & mask); \ + failure = __kernel_cmpxchg (oldval, newval, wordptr); \ + } while (failure != 0); \ + \ + return (oldval & mask) >> shift; \ + } + +SUBWORD_TEST_AND_SET (unsigned short, 2) +SUBWORD_TEST_AND_SET (unsigned char, 1) + +#define SYNC_LOCK_RELEASE(TYPE, WIDTH) \ + void HIDDEN \ + __sync_lock_release_##WIDTH (TYPE *ptr) \ + { \ + /* All writes before this point must be seen before we release \ + the lock itself. */ \ + __builtin_nds32_msync_all (); \ + *ptr = 0; \ + } + +SYNC_LOCK_RELEASE (int, 4) +SYNC_LOCK_RELEASE (short, 2) +SYNC_LOCK_RELEASE (char, 1) diff --git a/libgcc/config/nds32/linux-unwind.h b/libgcc/config/nds32/linux-unwind.h new file mode 100644 index 0000000..00f2b2c --- /dev/null +++ b/libgcc/config/nds32/linux-unwind.h @@ -0,0 +1,143 @@ +/* DWARF2 EH unwinding support for NDS32 Linux signal frame. + Copyright (C) 2014-2015 Free Software Foundation, Inc. + Contributed by Andes Technology Corporation. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +#ifndef inhibit_libc + +/* Do code reading to identify a signal frame, and set the frame + state data appropriately. See unwind-dw2.c for the structs. + The corresponding bits in the Linux kernel are in + arch/nds32/kernel/signal.c. */ + +#include +#include +#include + +/* Exactly the same layout as the kernel structures, unique names. */ + +/* arch/nds32/kernel/signal.c */ +struct _rt_sigframe { + siginfo_t info; + struct ucontext_t uc; +}; + +#define RT_SIGRETURN 0x8b00f044 + +#define MD_FALLBACK_FRAME_STATE_FOR nds32_fallback_frame_state + +/* This function is supposed to be invoked by uw_frame_state_for() + when there is no unwind data available. + + Generally, given the _Unwind_Context CONTEXT for a stack frame, + we need to look up its caller and decode information into FS. + However, if the exception handling happens within a signal handler, + the return address of signal handler is a special module, which + contains signal return syscall and has no FDE in the .eh_frame section. + We need to implement MD_FALLBACK_FRAME_STATE_FOR so that we can + unwind through signal frames. */ +static _Unwind_Reason_Code +nds32_fallback_frame_state (struct _Unwind_Context *context, + _Unwind_FrameState *fs) +{ + u_int32_t *pc = (u_int32_t *) context->ra; + struct sigcontext *sc_; + _Unwind_Ptr new_cfa; + +#ifdef __NDS32_EB__ +#error "Signal handler is not supported for force unwind." +#endif + + if ((_Unwind_Ptr) pc & 3) + return _URC_END_OF_STACK; + + /* Check if we are going through a signal handler. + See arch/nds32/kernel/signal.c implementation. + FIXME: Currently we only handle little endian (EL) case. */ + if (pc[0] == RT_SIGRETURN) + { + /* Using '_sigfame' memory address to locate kernal's sigcontext. + The sigcontext structures in arch/nds32/include/asm/sigcontext.h. */ + struct _rt_sigframe *rt_; + rt_ = context->cfa; + sc_ = &rt_->uc.uc_mcontext; + } + else + return _URC_END_OF_STACK; + + /* Update cfa from sigcontext. */ + new_cfa = (_Unwind_Ptr) sc_; + fs->regs.cfa_how = CFA_REG_OFFSET; + fs->regs.cfa_reg = STACK_POINTER_REGNUM; + fs->regs.cfa_offset = new_cfa - (_Unwind_Ptr) context->cfa; + +#define NDS32_PUT_FS_REG(NUM, NAME) \ + (fs->regs.reg[NUM].how = REG_SAVED_OFFSET, \ + fs->regs.reg[NUM].loc.offset = (_Unwind_Ptr) &(sc_->NAME) - new_cfa) + + /* Restore all registers value. */ + NDS32_PUT_FS_REG (0, nds32_r0); + NDS32_PUT_FS_REG (1, nds32_r1); + NDS32_PUT_FS_REG (2, nds32_r2); + NDS32_PUT_FS_REG (3, nds32_r3); + NDS32_PUT_FS_REG (4, nds32_r4); + NDS32_PUT_FS_REG (5, nds32_r5); + NDS32_PUT_FS_REG (6, nds32_r6); + NDS32_PUT_FS_REG (7, nds32_r7); + NDS32_PUT_FS_REG (8, nds32_r8); + NDS32_PUT_FS_REG (9, nds32_r9); + NDS32_PUT_FS_REG (10, nds32_r10); + NDS32_PUT_FS_REG (11, nds32_r11); + NDS32_PUT_FS_REG (12, nds32_r12); + NDS32_PUT_FS_REG (13, nds32_r13); + NDS32_PUT_FS_REG (14, nds32_r14); + NDS32_PUT_FS_REG (15, nds32_r15); + NDS32_PUT_FS_REG (16, nds32_r16); + NDS32_PUT_FS_REG (17, nds32_r17); + NDS32_PUT_FS_REG (18, nds32_r18); + NDS32_PUT_FS_REG (19, nds32_r19); + NDS32_PUT_FS_REG (20, nds32_r20); + NDS32_PUT_FS_REG (21, nds32_r21); + NDS32_PUT_FS_REG (22, nds32_r22); + NDS32_PUT_FS_REG (23, nds32_r23); + NDS32_PUT_FS_REG (24, nds32_r24); + NDS32_PUT_FS_REG (25, nds32_r25); + + NDS32_PUT_FS_REG (28, nds32_fp); + NDS32_PUT_FS_REG (29, nds32_gp); + NDS32_PUT_FS_REG (30, nds32_lp); + NDS32_PUT_FS_REG (31, nds32_sp); + + /* Restore PC, point to trigger signal instruction. */ + NDS32_PUT_FS_REG (32, nds32_ipc); + +#undef NDS32_PUT_FS_REG + + /* The retaddr is PC, use PC to find FDE. */ + fs->retaddr_column = 32; + fs->signal_frame = 1; + + return _URC_NO_REASON; +} + +#endif diff --git a/libgcc/config/nds32/sfp-machine.h b/libgcc/config/nds32/sfp-machine.h index 499bdad..bfbdaf9 100644 --- a/libgcc/config/nds32/sfp-machine.h +++ b/libgcc/config/nds32/sfp-machine.h @@ -76,6 +76,25 @@ typedef int __gcc_CMPtype __attribute__ ((mode (__libgcc_cmp_return__))); R##_c = FP_CLS_NAN; \ } while (0) +#ifdef NDS32_ABI_2FP_PLUS +#define FP_RND_NEAREST 0x0 +#define FP_RND_PINF 0x1 +#define FP_RND_MINF 0x2 +#define FP_RND_ZERO 0x3 +#define FP_RND_MASK 0x3 + +#define _FP_DECL_EX \ + unsigned long int _fcsr __attribute__ ((unused)) = FP_RND_NEAREST + +#define FP_INIT_ROUNDMODE \ + do { \ + _fcsr = __builtin_nds32_fmfcsr (); \ + } while (0) + +#define FP_ROUNDMODE (_fcsr & FP_RND_MASK) + +#endif + /* Not checked. */ #define _FP_TININESS_AFTER_ROUNDING 0 diff --git a/libgcc/config/nds32/t-nds32-glibc b/libgcc/config/nds32/t-nds32-glibc new file mode 100644 index 0000000..4e22931 --- /dev/null +++ b/libgcc/config/nds32/t-nds32-glibc @@ -0,0 +1,34 @@ +# Rules of glibc library makefile of Andes NDS32 cpu for GNU compiler +# Copyright (C) 2012-2015 Free Software Foundation, Inc. +# Contributed by Andes Technology Corporation. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published +# by the Free Software Foundation; either version 3, or (at your +# option) any later version. +# +# GCC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +# License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +# Compiler flags to use when compiling 'libgcc2.c' +HOST_LIBGCC2_CFLAGS = -O2 -fPIC -fwrapv +LIB2ADD += $(srcdir)/config/nds32/linux-atomic.c + +#LIB1ASMSRC = nds32/lib1asmsrc-newlib.S +#LIB1ASMFUNCS = _divsi3 _modsi3 _udivsi3 _umodsi3 + +# List of functions not to build from libgcc2.c. +#LIB2FUNCS_EXCLUDE = _clzsi2 + +# List of extra C and assembler files(*.S) to add to static libgcc2. +#LIB2ADD_ST += $(srcdir)/config/nds32/lib2csrc-newlib/_clzsi2.c + +# ------------------------------------------------------------------------ diff --git a/libgcc/config/nds32/t-nds32-isr b/libgcc/config/nds32/t-nds32-isr index 4f86f90..abfd82b 100644 --- a/libgcc/config/nds32/t-nds32-isr +++ b/libgcc/config/nds32/t-nds32-isr @@ -23,11 +23,11 @@ # Makfile fragment rules for libnds32_isr.a to support ISR attribute extension ############################################################################### -# basic flags setting +# Basic flags setting. ISR_CFLAGS = $(CFLAGS) -c -# the object files we would like to create -LIBNDS32_ISR_16B_OBJS = \ +# The object files we would like to create. +LIBNDS32_ISR_VEC_OBJS = \ vec_vid00.o vec_vid01.o vec_vid02.o vec_vid03.o \ vec_vid04.o vec_vid05.o vec_vid06.o vec_vid07.o \ vec_vid08.o vec_vid09.o vec_vid10.o vec_vid11.o \ @@ -46,40 +46,9 @@ LIBNDS32_ISR_16B_OBJS = \ vec_vid60.o vec_vid61.o vec_vid62.o vec_vid63.o \ vec_vid64.o vec_vid65.o vec_vid66.o vec_vid67.o \ vec_vid68.o vec_vid69.o vec_vid70.o vec_vid71.o \ - vec_vid72.o \ - excp_isr_ps_nn.o excp_isr_ps_ns.o excp_isr_ps_nr.o \ - excp_isr_sa_nn.o excp_isr_sa_ns.o excp_isr_sa_nr.o \ - intr_isr_ps_nn.o intr_isr_ps_ns.o intr_isr_ps_nr.o \ - intr_isr_sa_nn.o intr_isr_sa_ns.o intr_isr_sa_nr.o \ - reset.o - -LIBNDS32_ISR_4B_OBJS = \ - vec_vid00_4b.o vec_vid01_4b.o vec_vid02_4b.o vec_vid03_4b.o \ - vec_vid04_4b.o vec_vid05_4b.o vec_vid06_4b.o vec_vid07_4b.o \ - vec_vid08_4b.o vec_vid09_4b.o vec_vid10_4b.o vec_vid11_4b.o \ - vec_vid12_4b.o vec_vid13_4b.o vec_vid14_4b.o vec_vid15_4b.o \ - vec_vid16_4b.o vec_vid17_4b.o vec_vid18_4b.o vec_vid19_4b.o \ - vec_vid20_4b.o vec_vid21_4b.o vec_vid22_4b.o vec_vid23_4b.o \ - vec_vid24_4b.o vec_vid25_4b.o vec_vid26_4b.o vec_vid27_4b.o \ - vec_vid28_4b.o vec_vid29_4b.o vec_vid30_4b.o vec_vid31_4b.o \ - vec_vid32_4b.o vec_vid33_4b.o vec_vid34_4b.o vec_vid35_4b.o \ - vec_vid36_4b.o vec_vid37_4b.o vec_vid38_4b.o vec_vid39_4b.o \ - vec_vid40_4b.o vec_vid41_4b.o vec_vid42_4b.o vec_vid43_4b.o \ - vec_vid44_4b.o vec_vid45_4b.o vec_vid46_4b.o vec_vid47_4b.o \ - vec_vid48_4b.o vec_vid49_4b.o vec_vid50_4b.o vec_vid51_4b.o \ - vec_vid52_4b.o vec_vid53_4b.o vec_vid54_4b.o vec_vid55_4b.o \ - vec_vid56_4b.o vec_vid57_4b.o vec_vid58_4b.o vec_vid59_4b.o \ - vec_vid60_4b.o vec_vid61_4b.o vec_vid62_4b.o vec_vid63_4b.o \ - vec_vid64_4b.o vec_vid65_4b.o vec_vid66_4b.o vec_vid67_4b.o \ - vec_vid68_4b.o vec_vid69_4b.o vec_vid70_4b.o vec_vid71_4b.o \ - vec_vid72_4b.o \ - excp_isr_ps_nn_4b.o excp_isr_ps_ns_4b.o excp_isr_ps_nr_4b.o \ - excp_isr_sa_nn_4b.o excp_isr_sa_ns_4b.o excp_isr_sa_nr_4b.o \ - intr_isr_ps_nn_4b.o intr_isr_ps_ns_4b.o intr_isr_ps_nr_4b.o \ - intr_isr_sa_nn_4b.o intr_isr_sa_ns_4b.o intr_isr_sa_nr_4b.o \ - reset_4b.o + vec_vid72.o -LIBNDS32_ISR_COMMON_OBJS = \ +LIBNDS32_ISR_JMP_OBJS = \ jmptbl_vid00.o jmptbl_vid01.o jmptbl_vid02.o jmptbl_vid03.o \ jmptbl_vid04.o jmptbl_vid05.o jmptbl_vid06.o jmptbl_vid07.o \ jmptbl_vid08.o jmptbl_vid09.o jmptbl_vid10.o jmptbl_vid11.o \ @@ -98,29 +67,32 @@ LIBNDS32_ISR_COMMON_OBJS = \ jmptbl_vid60.o jmptbl_vid61.o jmptbl_vid62.o jmptbl_vid63.o \ jmptbl_vid64.o jmptbl_vid65.o jmptbl_vid66.o jmptbl_vid67.o \ jmptbl_vid68.o jmptbl_vid69.o jmptbl_vid70.o jmptbl_vid71.o \ - jmptbl_vid72.o \ + jmptbl_vid72.o + +LIBNDS32_ISR_COMMON_OBJS = \ + excp_isr_ps_nn.o excp_isr_ps_ns.o excp_isr_ps_nr.o \ + excp_isr_sa_nn.o excp_isr_sa_ns.o excp_isr_sa_nr.o \ + intr_isr_ps_nn.o intr_isr_ps_ns.o intr_isr_ps_nr.o \ + intr_isr_sa_nn.o intr_isr_sa_ns.o intr_isr_sa_nr.o \ + reset.o \ nmih.o \ wrh.o -LIBNDS32_ISR_COMPLETE_OBJS = $(LIBNDS32_ISR_16B_OBJS) $(LIBNDS32_ISR_4B_OBJS) $(LIBNDS32_ISR_COMMON_OBJS) +LIBNDS32_ISR_COMPLETE_OBJS = $(LIBNDS32_ISR_VEC_OBJS) $(LIBNDS32_ISR_JMP_OBJS) $(LIBNDS32_ISR_COMMON_OBJS) -# Build common objects for ISR library -nmih.o: $(srcdir)/config/nds32/isr-library/nmih.S - $(GCC_FOR_TARGET) $(ISR_CFLAGS) $(srcdir)/config/nds32/isr-library/nmih.S -o nmih.o -wrh.o: $(srcdir)/config/nds32/isr-library/wrh.S - $(GCC_FOR_TARGET) $(ISR_CFLAGS) $(srcdir)/config/nds32/isr-library/wrh.S -o wrh.o - -jmptbl_vid%.o: $(srcdir)/config/nds32/isr-library/jmptbl_vid%.S +# Build vector vid objects for ISR library. +vec_vid%.o: $(srcdir)/config/nds32/isr-library/vec_vid%.S $(GCC_FOR_TARGET) $(ISR_CFLAGS) $< -o $@ - -# Build 16b version objects for ISR library. (no "_4b" postfix string) -vec_vid%.o: $(srcdir)/config/nds32/isr-library/vec_vid%.S +# Build jump table objects for ISR library. +jmptbl_vid%.o: $(srcdir)/config/nds32/isr-library/jmptbl_vid%.S $(GCC_FOR_TARGET) $(ISR_CFLAGS) $< -o $@ + +# Build commen objects for ISR library. excp_isr_ps_nn.o: $(srcdir)/config/nds32/isr-library/excp_isr.S $(GCC_FOR_TARGET) $(ISR_CFLAGS) $(srcdir)/config/nds32/isr-library/excp_isr.S -o excp_isr_ps_nn.o @@ -160,48 +132,12 @@ intr_isr_sa_nr.o: $(srcdir)/config/nds32/isr-library/intr_isr.S reset.o: $(srcdir)/config/nds32/isr-library/reset.S $(GCC_FOR_TARGET) $(ISR_CFLAGS) $(srcdir)/config/nds32/isr-library/reset.S -o reset.o -# Build 4b version objects for ISR library. -vec_vid%_4b.o: $(srcdir)/config/nds32/isr-library/vec_vid%_4b.S - $(GCC_FOR_TARGET) $(ISR_CFLAGS) $< -o $@ - -excp_isr_ps_nn_4b.o: $(srcdir)/config/nds32/isr-library/excp_isr_4b.S - $(GCC_FOR_TARGET) $(ISR_CFLAGS) $(srcdir)/config/nds32/isr-library/excp_isr_4b.S -o excp_isr_ps_nn_4b.o - -excp_isr_ps_ns_4b.o: $(srcdir)/config/nds32/isr-library/excp_isr_4b.S - $(GCC_FOR_TARGET) $(ISR_CFLAGS) -DNDS32_NESTED $(srcdir)/config/nds32/isr-library/excp_isr_4b.S -o excp_isr_ps_ns_4b.o - -excp_isr_ps_nr_4b.o: $(srcdir)/config/nds32/isr-library/excp_isr_4b.S - $(GCC_FOR_TARGET) $(ISR_CFLAGS) -DNDS32_NESTED_READY $(srcdir)/config/nds32/isr-library/excp_isr_4b.S -o excp_isr_ps_nr_4b.o - -excp_isr_sa_nn_4b.o: $(srcdir)/config/nds32/isr-library/excp_isr_4b.S - $(GCC_FOR_TARGET) $(ISR_CFLAGS) -DNDS32_SAVE_ALL_REGS $(srcdir)/config/nds32/isr-library/excp_isr_4b.S -o excp_isr_sa_nn_4b.o - -excp_isr_sa_ns_4b.o: $(srcdir)/config/nds32/isr-library/excp_isr_4b.S - $(GCC_FOR_TARGET) $(ISR_CFLAGS) -DNDS32_SAVE_ALL_REGS -DNDS32_NESTED $(srcdir)/config/nds32/isr-library/excp_isr_4b.S -o excp_isr_sa_ns_4b.o - -excp_isr_sa_nr_4b.o: $(srcdir)/config/nds32/isr-library/excp_isr_4b.S - $(GCC_FOR_TARGET) $(ISR_CFLAGS) -DNDS32_SAVE_ALL_REGS -DNDS32_NESTED_READY $(srcdir)/config/nds32/isr-library/excp_isr_4b.S -o excp_isr_sa_nr_4b.o - -intr_isr_ps_nn_4b.o: $(srcdir)/config/nds32/isr-library/intr_isr_4b.S - $(GCC_FOR_TARGET) $(ISR_CFLAGS) $(srcdir)/config/nds32/isr-library/intr_isr_4b.S -o intr_isr_ps_nn_4b.o - -intr_isr_ps_ns_4b.o: $(srcdir)/config/nds32/isr-library/intr_isr_4b.S - $(GCC_FOR_TARGET) $(ISR_CFLAGS) -DNDS32_NESTED $(srcdir)/config/nds32/isr-library/intr_isr_4b.S -o intr_isr_ps_ns_4b.o - -intr_isr_ps_nr_4b.o: $(srcdir)/config/nds32/isr-library/intr_isr_4b.S - $(GCC_FOR_TARGET) $(ISR_CFLAGS) -DNDS32_NESTED_READY $(srcdir)/config/nds32/isr-library/intr_isr_4b.S -o intr_isr_ps_nr_4b.o - -intr_isr_sa_nn_4b.o: $(srcdir)/config/nds32/isr-library/intr_isr_4b.S - $(GCC_FOR_TARGET) $(ISR_CFLAGS) -DNDS32_SAVE_ALL_REGS $(srcdir)/config/nds32/isr-library/intr_isr_4b.S -o intr_isr_sa_nn_4b.o - -intr_isr_sa_ns_4b.o: $(srcdir)/config/nds32/isr-library/intr_isr_4b.S - $(GCC_FOR_TARGET) $(ISR_CFLAGS) -DNDS32_SAVE_ALL_REGS -DNDS32_NESTED $(srcdir)/config/nds32/isr-library/intr_isr_4b.S -o intr_isr_sa_ns_4b.o +nmih.o: $(srcdir)/config/nds32/isr-library/nmih.S + $(GCC_FOR_TARGET) $(ISR_CFLAGS) $(srcdir)/config/nds32/isr-library/nmih.S -o nmih.o -intr_isr_sa_nr_4b.o: $(srcdir)/config/nds32/isr-library/intr_isr_4b.S - $(GCC_FOR_TARGET) $(ISR_CFLAGS) -DNDS32_SAVE_ALL_REGS -DNDS32_NESTED_READY $(srcdir)/config/nds32/isr-library/intr_isr_4b.S -o intr_isr_sa_nr_4b.o +wrh.o: $(srcdir)/config/nds32/isr-library/wrh.S + $(GCC_FOR_TARGET) $(ISR_CFLAGS) $(srcdir)/config/nds32/isr-library/wrh.S -o wrh.o -reset_4b.o: $(srcdir)/config/nds32/isr-library/reset_4b.S - $(GCC_FOR_TARGET) $(ISR_CFLAGS) $(srcdir)/config/nds32/isr-library/reset_4b.S -o reset_4b.o # The rule to create libnds32_isr.a file diff --git a/libgcc/config/nds32/t-nds32-newlib b/libgcc/config/nds32/t-nds32-newlib index 1ea2bc3..a59646f 100644 --- a/libgcc/config/nds32/t-nds32-newlib +++ b/libgcc/config/nds32/t-nds32-newlib @@ -19,7 +19,7 @@ # . # Compiler flags to use when compiling 'libgcc2.c' -HOST_LIBGCC2_CFLAGS = -O2 +HOST_LIBGCC2_CFLAGS = -O2 -fwrapv #LIB1ASMSRC = nds32/lib1asmsrc-newlib.S