diff --git a/config.sub b/config.sub index 38f3d037a785f..3f2c56f1b0a99 100755 --- a/config.sub +++ b/config.sub @@ -4,7 +4,7 @@ # shellcheck disable=SC2006,SC2268 # see below for rationale -timestamp='2021-10-27' +timestamp='2022-06-15' # This file is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by diff --git a/gcc/common/config/arc64/arc64-common.cc b/gcc/common/config/arc64/arc64-common.cc new file mode 100644 index 0000000000000..c51630cc09ec6 --- /dev/null +++ b/gcc/common/config/arc64/arc64-common.cc @@ -0,0 +1,52 @@ +/* Common hooks for Synopsys DesignWare ARC + Copyright (C) 2019 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "diagnostic-core.h" +#include "tm.h" +#include "common/common-target.h" +#include "opts.h" +#include "flags.h" + +/* Set default optimization options. */ +#define OPT_LEVELS_3_PLUS_SPEED_ONLY OPT_LEVELS_3_PLUS +static const struct default_options arc_option_optimization_table[] = + { + { OPT_LEVELS_SIZE, OPT_ftree_loop_optimize, NULL, 0}, + { OPT_LEVELS_SIZE, OPT_fmove_loop_invariants, NULL, 0}, + /* Disable fomit-frame-pointer by default. */ + { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 }, + /* Enable redundant extension instructions removal at -O2 and higher. */ + { OPT_LEVELS_2_PLUS, OPT_free, NULL, 1 }, + { OPT_LEVELS_NONE, 0, NULL, 0 } + }; + + +#undef TARGET_OPTION_OPTIMIZATION_TABLE +#define TARGET_OPTION_OPTIMIZATION_TABLE arc_option_optimization_table + +#undef TARGET_DEFAULT_TARGET_FLAGS +#define TARGET_DEFAULT_TARGET_FLAGS \ + (MASK_BITSCAN | MASK_CODE_DENSITY | ARC64_SUBTARGET_DEFAULT) + +#include "common/common-target-def.h" + +struct gcc_targetm_common targetm_common = TARGETM_COMMON_INITIALIZER; diff --git a/gcc/config.gcc b/gcc/config.gcc index 70d006b3f05ef..bd86169b643a2 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -344,13 +344,18 @@ amdgcn*) am33_2.0-*-linux*) cpu_type=mn10300 ;; -arc*-*-*) +arc-*-* | arceb-*-*) cpu_type=arc c_target_objs="arc-c.o" cxx_target_objs="arc-c.o" extra_options="${extra_options} arc/arc-tables.opt g.opt" extra_headers="arc-simd.h" ;; +arc[6432]*-*-*) + cpu_type=arc64 + c_target_objs="arc64-c.o" + cxx_target_objs="arc64-c.o" + ;; arm*-*-*) cpu_type=arm extra_objs="arm-builtins.o arm-mve-builtins.o aarch-common.o" @@ -1192,7 +1197,7 @@ alpha*-dec-*vms*) tm_file="${tm_file} vms/vms.h alpha/vms.h" tmake_file="${tmake_file} alpha/t-vms alpha/t-alpha" ;; -arc*-*-elf*) +arc-*-elf* | arceb-*-elf*) tm_file="arc/arc-arch.h dbxelf.h elfos.h newlib-stdint.h arc/elf.h ${tm_file}" tmake_file="arc/t-multilib arc/t-arc" extra_gcc_objs="driver-arc.o" @@ -1213,7 +1218,7 @@ arc*-*-elf*) big*) tm_file="arc/big.h ${tm_file}" esac ;; -arc*-*-linux*) +arc-*-linux* | arceb-*-linux*) tm_file="arc/arc-arch.h dbxelf.h elfos.h gnu-user.h linux.h linux-android.h glibc-stdint.h arc/linux.h ${tm_file}" tmake_file="${tmake_file} arc/t-multilib-linux arc/t-arc" extra_gcc_objs="driver-arc.o" @@ -1237,6 +1242,28 @@ arc*-*-linux*) # automatically detect that GAS supports it, yet we require it. gcc_cv_initfini_array=yes ;; +arc32-*-elf*) + tm_file="dbxelf.h elfos.h newlib-stdint.h arc64/elf.h arc64/elf32.h ${tm_file}" + tmake_file="${tmake_file} arc64/t-multilib32 arc64/t-arc64" + ;; +arc64-*-elf*) + tm_file="dbxelf.h elfos.h newlib-stdint.h arc64/elf.h arc64/elf64.h ${tm_file}" + tmake_file="${tmake_file} arc64/t-multilib arc64/t-arc64" + ;; +arc32-*-linux*) + tm_file="dbxelf.h elfos.h gnu-user.h linux.h arc64/linux.h arc64/linux32.h linux-android.h glibc-stdint.h ${tm_file}" + tmake_file="${tmake_file} arc64/t-arc64" + # Force .init_array support. The configure script cannot always + # automatically detect that GAS supports it, yet we require it. + gcc_cv_initfini_array=yes + ;; +arc64-*-linux*) + tm_file="dbxelf.h elfos.h gnu-user.h linux.h arc64/linux.h arc64/linux64.h linux-android.h glibc-stdint.h ${tm_file}" + tmake_file="${tmake_file} arc64/t-arc64" + # Force .init_array support. The configure script cannot always + # automatically detect that GAS supports it, yet we require it. + gcc_cv_initfini_array=yes + ;; arm-wrs-vxworks7*) # We only support VxWorks 7 now on ARM, post SR600. Pre SR600 # VxWorks 7 was transitory and major versions prior to 7 were based @@ -4311,7 +4338,7 @@ case "${target}" in done ;; - arc*-*-*) + arc-*-* | arceb-*-*) supported_defaults="cpu fpu" new_cpu=hs38_linux @@ -4360,6 +4387,29 @@ case "${target}" in fi ;; + arc[6432]*-*-*) + supported_defaults="fpu cpu" + case "$with_fpu" in + "" | fpus | fpud) + # OK + ;; + *) + echo "Unknown floating point type used in "\ + "--with-fpu=$with-fpu" 1>&2 + exit 1 + ;; + esac + + case "$with_cpu" in + "" | hs5* | hs6*) + # OK + ;; + *) + echo "Unknown cpu used in --with-cpu=$with_cpu" 1>&2 + exit 1 + esac + ;; + avr-*-*) # Handle --with-multilib-list. if test "x${with_multilib_list}" != xdefault; then diff --git a/gcc/config/arc64/arc32.md b/gcc/config/arc64/arc32.md new file mode 100644 index 0000000000000..12fd743215638 --- /dev/null +++ b/gcc/config/arc64/arc32.md @@ -0,0 +1,101 @@ +(define_mode_attr vectab [(V2HI "") (V4HI "d") (V2SI "d")]) +(define_mode_attr vmvtab [(V2HI "add") (V4HI "vadd2") (V2SI "vadd2")]) + +;; ARCv3:32 specific instructions. + +(define_insn_and_split "*arc32_movdi" + [(set (match_operand:DI 0 "arc64_dest_operand" "=r,r,Ustor") + (match_operand:DI 1 "nonimmediate_operand" "r,m,r"))] + "!TARGET_64BIT + && (register_operand (operands[0], DImode) + || register_operand (operands[1], DImode))" + "@ + vadd2\\t%0,%1,0 + ldd%U1\\t%0,%1 + std%U0\\t%1,%0" + "&& reload_completed && arc64_split_double_move_p (operands, DImode)" + [(const_int 0)] + { + arc64_split_double_move (operands, DImode); + DONE; + } + [(set_attr "type" "vadd,ld,st") + (set_attr "length" "4,*,*")]) + +(define_insn_and_split "*arc32_mov" + [(set (match_operand:VALL 0 "arc64_dest_operand" "=r,r,Ustor") + (match_operand:VALL 1 "nonimmediate_operand" "r,m,r"))] + "!TARGET_64BIT && TARGET_SIMD + && (register_operand (operands[0], mode) + || register_operand (operands[1], mode))" + "@ + \\t%0,%1,0 + ld%U1\\t%0,%1 + st%U0\\t%1,%0" + "&& reload_completed && arc64_split_double_move_p (operands, mode)" + [(const_int 0)] + { + arc64_split_double_move (operands, mode); + DONE; + } + [(set_attr "type" "vadd,ld,st") + (set_attr "length" "4,*,*")]) + +(define_insn "arc32_vmach_hi" + [(set (match_operand:V2SI 0 "register_operand" "=r") + (plus:V2SI + (mult:V2SI + (ANY_EXTEND:V2SI + (vec_select:V2HI (match_operand:V4HI 1 "register_operand" "r") + (parallel [(const_int 2) (const_int 3)]))) + (ANY_EXTEND:V2SI + (vec_select:V2HI (match_operand:V4HI 2 "register_operand" "r") + (parallel [(const_int 2) (const_int 3)])))) + (reg:V2SI R58_REGNUM))) + (clobber (reg:V2SI R58_REGNUM))] + "TARGET_SIMD && !TARGET_64BIT" + "vmac2h%?\\t%0,%H1,%H2" + [(set_attr "length" "4") + (set_attr "type" "vmac2h")]) + + (define_insn "arc32_vmpy2h_hi" + [(set (match_operand:V2SI 0 "register_operand" "=r") + (mult:V2SI + (ANY_EXTEND:V2SI + (vec_select:V2HI + (match_operand:V4HI 1 "register_operand" "r") + (parallel [(const_int 2) (const_int 3)]))) + (ANY_EXTEND:V2SI + (vec_select:V2HI + (match_operand:V4HI 2 "register_operand" "r") + (parallel [(const_int 2) (const_int 3)]))))) + (clobber (reg:V2SI R58_REGNUM))] + "TARGET_SIMD && !TARGET_64BIT" + "vmpy2h\\t%0,%H1,%H2" + [(set_attr "length" "4") + (set_attr "type" "vmpy2h")]) + +(define_insn_and_split "v2si3" + [(set (match_operand:V2SI 0 "register_operand" "=r") + (LSHIFT:V2SI (match_operand:V2SI 1 "register_operand" "r") + (match_operand:SI 2 "nonmemory_operand" "ri")))] + "!TARGET_64BIT" + "#" + "&& reload_completed" + [(set (match_dup 3) (LSHIFT:SI (match_dup 4) (match_dup 2))) + (set (match_dup 5) (LSHIFT:SI (match_dup 6) (match_dup 2)))] + { + operands[3] = gen_lowpart (SImode, operands[0]); + operands[5] = gen_highpart (SImode, operands[0]); + operands[4] = gen_lowpart (SImode, operands[1]); + operands[6] = gen_highpart (SImode, operands[1]); + if (REG_P (operands[2]) + && REGNO (operands[2]) == REGNO (operands[3])) + { + std::swap (operands[3], operands[5]); + std::swap (operands[4], operands[6]); + } + } + [(set_attr "length" "8") + (set_attr "type" "")]) + diff --git a/gcc/config/arc64/arc64-c.cc b/gcc/config/arc64/arc64-c.cc new file mode 100644 index 0000000000000..4190774ed0bf8 --- /dev/null +++ b/gcc/config/arc64/arc64-c.cc @@ -0,0 +1,84 @@ +/* Copyright (C) 2016-2019 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . +*/ + +#define IN_TARGET_CODE 1 + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "tree.h" +#include "memmodel.h" +#include "tm_p.h" +#include "cpplib.h" +#include "c-family/c-common.h" +#include "target.h" + +#define builtin_define(TXT) cpp_define (pfile, TXT) +#define builtin_assert(TXT) cpp_assert (pfile, TXT) + +/* Define or undefine macros based on the current target. */ + +static void +def_or_undef_macro (cpp_reader* pfile, const char *name, bool def_p) +{ + if (def_p) + cpp_define (pfile, name); + else + cpp_undef (pfile, name); +} + +/* Helper for TARGET_CPU_CPP_BUILTINS hook. */ + +void +arc64_cpu_cpp_builtins (cpp_reader * pfile) +{ + builtin_assert ("cpu=arc64"); + builtin_assert ("machine=arc64"); + + builtin_define ("__ARC64__"); + builtin_define ("__LITTLE_ENDIAN__"); + builtin_define ("__ARCV3__"); + + if (arc64_cmodel_var == ARC64_CMODEL_SMALL) + builtin_define ("__ARC64_CMODEL_SMALL__"); + else if (arc64_cmodel_var == ARC64_CMODEL_MEDIUM) + builtin_define ("__ARC64_CMODEL_MEDIUM__"); + else if (arc64_cmodel_var == ARC64_CMODEL_LARGE) + builtin_define ("__ARC64_CMODEL_LARGE__"); + + if (TARGET_HARD_FLOAT) + { + builtin_define ("__arc_hard_float__"); + builtin_define ("__ARC_HARD_FLOAT__"); + builtin_define ("__ARC_FLOAT_ABI_HARD__"); + } + else + { + builtin_define ("__arc_soft_float__"); + builtin_define ("__ARC_SOFT_FLOAT__"); + } + +#undef ARC64_C_DEF +#define ARC64_C_DEF(NAME, CONDITION) \ + def_or_undef_macro (pfile, NAME, CONDITION); + +#include "arc64-c.def" +#undef ARC64_C_DEF +} diff --git a/gcc/config/arc64/arc64-c.def b/gcc/config/arc64/arc64-c.def new file mode 100644 index 0000000000000..7f50ec3826e2d --- /dev/null +++ b/gcc/config/arc64/arc64-c.def @@ -0,0 +1,35 @@ +/* Copyright (C) 2021 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . +*/ + +ARC64_C_DEF ("__ARC64_ATOMIC_1__", arc64_atomic_option == 1) +ARC64_C_DEF ("__ARC64_ATOMIC_2__", arc64_atomic_option == 2) +ARC64_C_DEF ("__ARC64_ATOMIC_3__", arc64_atomic_option == 3) +ARC64_C_DEF ("__ARC64_UNALIGNED__", unaligned_access) +ARC64_C_DEF ("__ARC_FPU_SP__", ARC64_HAS_FPUS) +ARC64_C_DEF ("__ARC_FPU_DP__", ARC64_HAS_FPUD) +ARC64_C_DEF ("__ARC64_LL64__", TARGET_LL64) +ARC64_C_DEF ("__ARC64_M128__", TARGET_WIDE_LDST) +ARC64_C_DEF ("__ARC64_WIDE_VECTOR__", TARGET_WIDE_SIMD) +ARC64_C_DEF ("__ARC64_ARCH64__", TARGET_64BIT) +ARC64_C_DEF ("__ARC64_ARCH32__", !TARGET_64BIT) + + +/* Local Variables: */ +/* mode: c */ +/* End: */ diff --git a/gcc/config/arc64/arc64-modes.def b/gcc/config/arc64/arc64-modes.def new file mode 100644 index 0000000000000..7eeb80a881147 --- /dev/null +++ b/gcc/config/arc64/arc64-modes.def @@ -0,0 +1,38 @@ +/* Machine description for arc64 architecture. + Copyright (C) 2019 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +CC_MODE (CC_ZN); /* Only N and Z bits of condition flags are valid. */ +CC_MODE (CC_Z); /* Only Z bit of condition flags is valid. */ +CC_MODE (CC_C); /* C represents unsigned overflow of a simple addition. */ +CC_MODE (CC_V); /* Only V bit of condition flag is valid. */ +CC_MODE (CC_FPU); +CC_MODE (CC_FPUE); + +/* Half-precision floating point for __fp16. */ +FLOAT_MODE (HF, 2, 0); +ADJUST_FLOAT_FORMAT (HF, &ieee_half_format); + +/* Int vector modes. */ +VECTOR_MODES (INT, 4); /* V2HI */ +VECTOR_MODES (INT, 8); /* V4HI V2SI */ + +/* FP vector modes. */ +VECTOR_MODE (FLOAT, HF, 2); /* V2HF */ +VECTOR_MODES (FLOAT, 8); /* V4HF V2SF */ +VECTOR_MODES (FLOAT, 16); /* V8HF V4SF V2DF */ diff --git a/gcc/config/arc64/arc64-opts.h b/gcc/config/arc64/arc64-opts.h new file mode 100644 index 0000000000000..fe188200598c5 --- /dev/null +++ b/gcc/config/arc64/arc64-opts.h @@ -0,0 +1,34 @@ +/* Copyright (C) 2019 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +#ifndef ARC64_OPTS_H +#define ARC64_OPTS_H + +/* The code model defines the address generation strategy. */ +enum arc64_code_model { + /* Static code and data fit within a 1MB region. + The default non-PIC code model. */ + ARC64_CMODEL_SMALL, + /* The default for PIC code model, static code and data fit within + 4GB region. Local calls will fit within 16MB region. */ + ARC64_CMODEL_MEDIUM, + /* No assumptions about addresses of code and data. */ + ARC64_CMODEL_LARGE +}; + +#endif /* ARC64_OPTS_H */ diff --git a/gcc/config/arc64/arc64-passes.def b/gcc/config/arc64/arc64-passes.def new file mode 100644 index 0000000000000..34cbbe3dd0aaf --- /dev/null +++ b/gcc/config/arc64/arc64-passes.def @@ -0,0 +1,21 @@ +/* Description of target passes for ARC64. + Copyright (C) 2021 Free Software Foundation, Inc. */ + +/* This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3, or (at your option) any later + version. + + GCC is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +/* Re-run peephole2 before reorg. */ +INSERT_PASS_AFTER (pass_sched2, 1, pass_peephole2); diff --git a/gcc/config/arc64/arc64-protos.h b/gcc/config/arc64/arc64-protos.h new file mode 100644 index 0000000000000..1651fcad469d9 --- /dev/null +++ b/gcc/config/arc64/arc64-protos.h @@ -0,0 +1,55 @@ +#ifndef GCC_ARC64_PROTOS_H +#define GCC_ARC64_PROTOS_H + +extern int arc64_epilogue_uses (int); +extern int arc64_eh_uses (int); +extern HOST_WIDE_INT arc64_initial_elimination_offset (unsigned, unsigned); +extern void arc64_init_expanders (void); +extern void arc64_cpu_cpp_builtins (cpp_reader *); + +#ifdef RTX_CODE + +extern rtx arc64_return_addr (int, rtx); +extern machine_mode arc64_select_cc_mode (enum rtx_code, rtx, rtx); +extern bool arc64_can_use_return_insn_p (void); +extern void arc64_expand_call (rtx, rtx, bool); +extern rtx arc64_gen_compare_reg (enum rtx_code, rtx, rtx); +extern bool arc64_prepare_move_operands (rtx, rtx, machine_mode); +extern void arc64_expand_prologue (void); +extern void arc64_expand_epilogue (bool); +extern bool arc64_limm_addr_p (rtx); +extern bool arc64_is_long_call_p (rtx); +extern bool arc64_legitimate_store_address_p (machine_mode, rtx); +extern bool arc64_short_access_p (rtx, machine_mode, bool); +extern rtx arc64_eh_return_handler_rtx (void); +extern int arc64_asm_preferred_eh_data_format (int, int); + +extern bool arc64_check_mov_const (HOST_WIDE_INT); +extern bool arc64_split_mov_const (rtx *); +extern bool arc64_expand_cpymem (rtx *); + +extern void arc64_expand_atomic_op (enum rtx_code, rtx, rtx, rtx, rtx, rtx); +extern void arc64_pre_atomic_barrier (enum memmodel); +extern void arc64_post_atomic_barrier (enum memmodel); +extern void arc64_expand_compare_and_swap (rtx []); +extern void arc64_split_compare_and_swap (rtx []); +extern bool arc64_allow_direct_access_p (rtx); +extern bool arc64_use_fp_regs (machine_mode); +extern bool arc64_fp_access_p (rtx, machine_mode); +extern void arc64_expand_casesi (rtx []); +extern bool arc64_split_double_move_p (rtx *, machine_mode); +extern void arc64_split_double_move (rtx *, machine_mode); +extern unsigned arc64_dbx_register_number (unsigned); +extern bool arc64_expand_fvect_shr (rtx *); +extern bool arc64_use_plt34_p (rtx); +extern int regno_clobbered_p (unsigned int, rtx_insn *, machine_mode, int); +extern void arc64_gen_unlikely_cbranch (enum rtx_code, machine_mode, rtx); +extern int accumulator_bypass_p (rtx_insn *, rtx_insn *); +extern int set_accumulator_p (rtx_insn *, rtx_insn *); +extern const char *arc64_output_return (void); +extern bool arc64_hard_regno_rename_ok (unsigned, unsigned); +extern void arc64_expand_vector_init (rtx, rtx); + +#endif /* RTX_CODE */ + +#endif /* GCC_ARC64_PROTOS_H */ diff --git a/gcc/config/arc64/arc64.cc b/gcc/config/arc64/arc64.cc new file mode 100644 index 0000000000000..39704c131f405 --- /dev/null +++ b/gcc/config/arc64/arc64.cc @@ -0,0 +1,6767 @@ +#define IN_TARGET_CODE 1 + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "memmodel.h" +#include "backend.h" +#include "target.h" +#include "rtl.h" +#include "tree.h" +#include "cfghooks.h" +#include "df.h" +#include "tm_p.h" +#include "stringpool.h" +#include "attribs.h" +#include "optabs.h" +#include "regs.h" +#include "emit-rtl.h" +#include "recog.h" +#include "diagnostic.h" +#include "fold-const.h" +#include "varasm.h" +#include "stor-layout.h" +#include "calls.h" +#include "output.h" +#include "insn-attr.h" +#include "flags.h" +#include "explow.h" +#include "expr.h" +#include "langhooks.h" +#include "tm-constrs.h" +#include "cfgrtl.h" +#include "tree-pass.h" +#include "context.h" +#include "builtins.h" +#include "rtl-iter.h" +#include "alias.h" +#include "opts.h" +#include "dwarf2.h" +#include "hw-doloop.h" + +/* This file should be included last. */ +#include "target-def.h" + +/* Return true if REGNO is suited for short instructions. */ +#define COMPACT_REG_P(REGNO) \ + (((signed)(REGNO) >= R0_REGNUM && (REGNO) <= R3_REGNUM) \ + || ((REGNO) >= R12_REGNUM && (REGNO) <= R15_REGNUM)) + +/* Use ARC64_LPIC only if dealing with 64-bit variant of arc64. */ +#define ARC64_MAYBE_LPIC (TARGET_64BIT ? ARC64_LPIC : ARC64_PIC) +#define ARC64_MAYBE_LARGE (TARGET_64BIT ? ARC64_LARGE : ARC64_LO32) + +/* Maximum size of a loop. */ +#define MAX_LOOP_LENGTH 4094 +#define MIN_LOOP_LENGTH -4092 + +#define UNITS_PER_LIMM 4 + +#define DOUBLE_LOAD_STORE ((!TARGET_64BIT && TARGET_LL64) \ + || (TARGET_64BIT && TARGET_WIDE_LDST)) + +/* Logic: + + HS5x (32-bit arch): + - no 64-bit loads and stores -> 32-bit moves + - use_fpu && fpu_exists -> fpr + - else -> gpr + - 64-bit loads and stores -> 64-bit moves + - use_fpu && fpu{s,d}_exists -> fpr + - else -> gpr + + HS6x (64-bit arch): + - no 128-bit loads and stores -> 64-bit moves + - use_fpu && fpu_exists -> fpr + - else -> gpr + - 128-bit loads and stores -> 128-bit moves + - use_fpu && fpud_exists -> fpr + - else -> gpr. */ + +static machine_mode cpymem_copy_mode (void) +{ + /* HS6x. */ + if (TARGET_64BIT) + { + if (!TARGET_WIDE_LDST) + { + if (TARGET_FP_MOVE && ARC64_HAS_FPUD) + return DFmode; + else if (TARGET_FP_MOVE && ARC64_VFP_64) + return V2SFmode; + + return DImode; + } + + if (TARGET_FP_MOVE) + { + if (ARC64_VFP_128) + return V2DFmode; + } + + return TImode; + } + /* HS5x. */ + else + { + if (!TARGET_LL64) + { + if (TARGET_FP_MOVE && ARC64_HAS_FPUS) + return SFmode; + + return SImode; + } + + if (TARGET_FP_MOVE) + { + /* ARC64_VFP_64 does not cover all cases YET. */ + if (ARC64_VFP_64) + return DFmode; + } + + return DImode; + } +} + +#define ARC_INVERSE_CONDITION_CODE(X) ((X) ^ 1) + +/* Implement REGNO_REG_CLASS. */ +const enum reg_class arc64_regno_to_regclass[FIRST_PSEUDO_REGISTER] = + { + AC16_REGS, AC16_REGS, AC16_REGS, AC16_REGS, + CORE_REGS, CORE_REGS, CORE_REGS, CORE_REGS, + CORE_REGS, CORE_REGS, CORE_REGS, CORE_REGS, + AC16_REGS, AC16_REGS, AC16_REGS, AC16_REGS, + CORE_REGS, CORE_REGS, CORE_REGS, CORE_REGS, + CORE_REGS, CORE_REGS, CORE_REGS, CORE_REGS, + CORE_REGS, CORE_REGS, CORE_REGS, CORE_REGS, + CORE_REGS, NO_REGS, CORE_REGS, CORE_REGS, + + NO_REGS, NO_REGS, NO_REGS, NO_REGS, + NO_REGS, NO_REGS, NO_REGS, NO_REGS, + NO_REGS, NO_REGS, NO_REGS, NO_REGS, + NO_REGS, NO_REGS, NO_REGS, NO_REGS, + + NO_REGS, NO_REGS, NO_REGS, NO_REGS, + NO_REGS, NO_REGS, NO_REGS, NO_REGS, + NO_REGS, NO_REGS, GENERAL_REGS, GENERAL_REGS, + NO_REGS, NO_REGS, NO_REGS, NO_REGS, + + FP_REGS, FP_REGS, FP_REGS, FP_REGS, + FP_REGS, FP_REGS, FP_REGS, FP_REGS, + FP_REGS, FP_REGS, FP_REGS, FP_REGS, + FP_REGS, FP_REGS, FP_REGS, FP_REGS, + FP_REGS, FP_REGS, FP_REGS, FP_REGS, + FP_REGS, FP_REGS, FP_REGS, FP_REGS, + FP_REGS, FP_REGS, FP_REGS, FP_REGS, + FP_REGS, FP_REGS, FP_REGS, FP_REGS, + + GENERAL_REGS, GENERAL_REGS, NO_REGS, + }; + +enum arc_cc_code_index +{ + ARC_CC_AL, ARC_CC_EQ = ARC_CC_AL+2, ARC_CC_NE, ARC_CC_P, ARC_CC_N, + ARC_CC_C, ARC_CC_NC, ARC_CC_V, ARC_CC_NV, + ARC_CC_GT, ARC_CC_LE, ARC_CC_GE, ARC_CC_LT, ARC_CC_HI, ARC_CC_LS, ARC_CC_PNZ, + ARC_CC_LO = ARC_CC_C, ARC_CC_HS = ARC_CC_NC +}; + +typedef enum arc64_symb_type +{ + ARC64_UNK = 0, ARC64_LO32, ARC64_LARGE, ARC64_PIC, ARC64_LPIC, ARC64_TLS, + ARC64_PCREL +} arc64_symb; + +/* Information about single argument. */ +struct arc64_arg_info { + /* Number of integer registers allocated to this argument. */ + unsigned int ngpr; + /* Number of floating-point registers allocated to this argument. */ + unsigned int nfpr; + + /* Offset. */ + unsigned int off_gpr; + unsigned int off_fpr; + + /* Goes on stack. */ + bool stack_p; +}; + +/* Frame and machine specific info. */ + +struct GTY (()) arc64_frame +{ + HOST_WIDE_INT reg_offset[FIRST_PSEUDO_REGISTER]; + + /* The size of the saved callee-save int/FP registers. */ + HOST_WIDE_INT saved_regs_size; + + /* The number of extra stack bytes taken up by register varargs. + This area is allocated by the callee at the very top of the + frame. This value is rounded up to a multiple of + STACK_BOUNDARY. */ + HOST_WIDE_INT saved_varargs_size; + + HOST_WIDE_INT saved_outargs_size; + + HOST_WIDE_INT saved_locals_size; + + /* The size of the frame. This value is the offset from base of the + frame (incomming SP) to the stack_pointer. This value is always + a multiple of STACK_BOUNDARY. */ + HOST_WIDE_INT frame_size; + + bool layout_p; +}; + + +/* ARC64 function types. */ +enum arc_function_type { + /* No function should have the unknown type. This value is used to + indicate the that function type has not yet been computed. */ + ARC64_FUNCTION_UNKNOWN = 0, + + /* The normal function type indicates that the function has the + standard prologue and epilogue. */ + ARC64_FUNCTION_NORMAL = 1L << 0, + + /* These are interrupt handlers. The name corresponds to the register + name that contains the return address. */ + ARC64_FUNCTION_ILINK = 1L << 1, + + /* The naked function type indicates that the function does not have + prologue or epilogue, and that no stack frame is available. */ + ARC64_FUNCTION_NAKED = 1L << 2 +}; + +/* Check if a function is an interrupt function. */ +#define ARC_INTERRUPT_P(TYPE) (((TYPE) & ARC64_FUNCTION_ILINK) != 0) + +/* Check if a function is normal, that is, has standard prologue and + epilogue. */ +#define ARC_NORMAL_P(TYPE) (((TYPE) & ARC64_FUNCTION_NORMAL) != 0) + +/* Check if a function is naked. */ +#define ARC_NAKED_P(TYPE) (((TYPE) & ARC64_FUNCTION_NAKED) != 0) + +typedef struct GTY (()) machine_function +{ + struct arc64_frame frame; + /* Record if the function has a variable argument list. */ + int uses_anonymous_args; + /* Record if the type of the current function. */ + unsigned int fn_type; +} machine_function; + +/* IDs for all the ARC builtins. */ + +enum arc64_builtin_id + { +#define DEF_BUILTIN(NAME, N_ARGS, TYPE, ICODE, MASK) \ + ARC64_BUILTIN_ ## NAME, +#include "builtins.def" +#undef DEF_BUILTIN + + ARC64_BUILTIN_COUNT + }; + +struct GTY(()) arc64_builtin_description +{ + enum insn_code icode; + int n_args; + tree fndecl; +}; + +static GTY(()) struct arc64_builtin_description +arc_bdesc[ARC64_BUILTIN_COUNT] = +{ +#define DEF_BUILTIN(NAME, N_ARGS, TYPE, ICODE, MASK) \ + { (enum insn_code) CODE_FOR_ ## ICODE, N_ARGS, NULL_TREE }, +#include "builtins.def" +#undef DEF_BUILTIN +}; + +/* vec_perm support. */ +struct e_vec_perm_d +{ + rtx target, op0, op1; + vec_perm_indices perm; + machine_mode vmode; + bool one_vector_p; + bool testing_p; +}; + +static tree arc64_fndecl_attribute (tree *, tree, tree, int, bool *); +static tree arc64_interrupt_attribute (tree *, tree, tree, int, bool *); + +/* { name, min_len, max_len, decl_req, type_req, fn_type_req, + affects_type_identity, handler, exclude } */ +const struct attribute_spec arc64_attribute_table[] = +{ + /* Functions which are used for ISR, return address is using ILINK reg. */ + { "interrupt", 0, 1, false, true, true, false, arc64_interrupt_attribute, + NULL }, + + /* Function which are not having the prologue and epilogue generated + by the compiler. */ + { "naked", 0, 0, true, false, false, false, arc64_fndecl_attribute, + NULL }, + + { NULL, 0, 0, false, false, false, false, NULL, NULL } +}; + +/* Local variable true if we output scalled address. */ +static bool scalled_p = false; +/* Simple LUT for log2. */ +static const int lutlog2[] = {0, 0, 1, 0, 2, 0, 0, 0, + 3, 0, 0, 0, 0, 0, 0, 0 }; + +/* Safe access lut log2 table. */ +#define ARC64LOG2(X) (((X) > 15) ? 3 : lutlog2[((X) & 0x0f)]) + +/* Check if an offset is scalled. */ +#define ARC64_CHECK_SCALLED_IMMEDIATE(offset, mode) \ + (ARC64LOG2 (GET_MODE_SIZE (mode)) \ + && VERIFY_SHIFT (INTVAL (offset), ARC64LOG2 (GET_MODE_SIZE (mode))) \ + && SIGNED_INT9 (INTVAL (offset) >> ARC64LOG2 (GET_MODE_SIZE (mode)))) + +/* ALIGN FRAMES on word boundaries. */ +#define ARC64_STACK_ALIGN(LOC) \ + (((LOC) + STACK_BOUNDARY / BITS_PER_UNIT - 1) & -STACK_BOUNDARY/BITS_PER_UNIT) + + +/* Callback function used for function attributes. */ + +static tree +arc64_fndecl_attribute (tree *node, tree name, + tree args ATTRIBUTE_UNUSED, + int flags ATTRIBUTE_UNUSED, bool *no_add_attrs) +{ + if (TREE_CODE (*node) != FUNCTION_DECL) + { + warning (OPT_Wattributes, "%qE attribute only applies to functions", + name); + *no_add_attrs = true; + } + + return NULL_TREE; +} + +/* Handle an "interrupt" attribute; arguments as in + struct attribute_spec.handler. */ + +static tree +arc64_interrupt_attribute (tree *, tree name, tree args, int, + bool *no_add_attrs) +{ + + if (is_attribute_p ("interrupt", name)) + { + if (args) + { + tree value = TREE_VALUE (args); + + if (TREE_CODE (value) != STRING_CST) + { + warning (OPT_Wattributes, + "argument of %qE attribute is not a string constant", + name); + *no_add_attrs = true; + } + else if (strcmp (TREE_STRING_POINTER (value), "ilink")) + { + warning (OPT_Wattributes, + "argument of %qE attribute is not \"ilink\"", + name); + *no_add_attrs = true; + } + } + } + return NULL_TREE; +} + +/* ARC64 stack frame generated by this compiler looks like: + + +-------------------------------+ + | | + | incoming stack arguments | + | | + +-------------------------------+ <-- incoming stack pointer (aligned) + | | + | callee-allocated save area | + | for register varargs | + | | + +-------------------------------+ <-- arg_pointer_rtx + | | + | GPR save area | + | | + +-------------------------------+ + | Return address register | + | (if required) | + +-------------------------------+ + | FP (if required) | + +-------------------------------+ <-- (hard) frame_pointer_rtx + | | + | Local variables | + | | + +-------------------------------+ + | outgoing stack arguments | + | | + +-------------------------------+ <-- stack_pointer_rtx (aligned) + + Dynamic stack allocations such as alloca insert data after local + variables. */ + +/* Return TRUE if a register needs to be saved, exception making + BLINK, and FP registers. BLINK is never check by this routine, + while FP is only checked if `frame_pointer_required` is FALSE. */ + +static bool +arc64_save_reg_p (int regno) +{ + bool call_saved; + bool might_clobber; + bool eh_needed; + + gcc_assert (regno <= F31_REGNUM); + gcc_assert (regno >= R0_REGNUM); + + switch (regno) + { + case R60_REGNUM: + case R61_REGNUM: + case R62_REGNUM: + case R63_REGNUM: + case ILINK_REGNUM: + case BLINK_REGNUM: + case SP_REGNUM: + /* Special registers, they are handled separately. */ + return false; + + case R27_REGNUM: + if (frame_pointer_needed) + return false; + break; + + case F0_REGNUM: + case F1_REGNUM: + case F2_REGNUM: + case F3_REGNUM: + case F4_REGNUM: + case F5_REGNUM: + case F6_REGNUM: + case F7_REGNUM: + case F8_REGNUM: + case F9_REGNUM: + case F10_REGNUM: + case F11_REGNUM: + case F12_REGNUM: + case F13_REGNUM: + case F14_REGNUM: + case F15_REGNUM: + case F16_REGNUM: + case F17_REGNUM: + case F18_REGNUM: + case F19_REGNUM: + case F20_REGNUM: + case F21_REGNUM: + case F22_REGNUM: + case F23_REGNUM: + case F24_REGNUM: + case F25_REGNUM: + case F26_REGNUM: + case F27_REGNUM: + case F28_REGNUM: + case F29_REGNUM: + case F30_REGNUM: + case F31_REGNUM: + if (!ARC64_HAS_FP_BASE) + return false; + break; + + default: + break; + } + + call_saved = !global_regs[regno] && !call_used_or_fixed_reg_p (regno); + might_clobber = df_regs_ever_live_p (regno) || crtl->saves_all_registers; + + /* In a frame that calls __builtin_eh_return two data registers are used to + pass values back to the exception handler. Ensure that these registers are + spilled to the stack so that the exception throw code can find them, and + update the saved values. The handling code will then consume these + reloaded values to handle the exception. */ + eh_needed = crtl->calls_eh_return + && (EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM); + + if ((call_saved && might_clobber) || eh_needed) + return true; + + /* If this is an interrupt handler, then we must save extra registers. */ + if (ARC_INTERRUPT_P (cfun->machine->fn_type)) + { + /* ARCv3 has ACCUMULATOR register as baseline. */ + if (regno == R58_REGNUM) + return true; + + if (df_regs_ever_live_p (regno) + /* if this is not a leaf function, then we must save all temporary + registers. */ + || (!crtl->is_leaf && call_used_regs[regno] && !fixed_regs[regno])) + return true; + } + return false; +} + +/* Compute the frame info. */ + +static void +arc64_compute_frame_info (void) +{ + int regno; + HOST_WIDE_INT offset = 0; + struct arc64_frame *frame = &cfun->machine->frame; + + gcc_assert (!frame->layout_p); + + memset (frame, 0, sizeof (*frame)); + + if (!ARC_NAKED_P(cfun->machine->fn_type)) + { + /* Find out which GPR need to be saved. */ + for (regno = R0_REGNUM, offset = 0; + regno <= F31_REGNUM; + regno++) + if (arc64_save_reg_p (regno)) + { + /* TBI: probably I need to make the saving of the FP registers + separate bulk from GPIs such that I can use latter on enter/leave + instruction seamlessly (i.e. first save FPregs/latter GPI, the + leave return feature will not work). */ + /* TBI: the FPUS only configuration is having only 32bit registers, + thus I can stack 2 FP registers in one stack slot ;). */ + frame->reg_offset[regno] = offset; + offset += UNITS_PER_WORD; + } + else + frame->reg_offset[regno] = -1; + + /* Check if we need to save the return address. */ + if (!crtl->is_leaf + || df_regs_ever_live_p (BLINK_REGNUM) + || crtl->calls_eh_return) + { + frame->reg_offset[BLINK_REGNUM] = offset; + offset += UNITS_PER_WORD; + } + + /* Check if we need frame pointer. It is mutual exclusive with + arc64_save_reg_p call. */ + if (frame_pointer_needed) + { + frame->reg_offset[R27_REGNUM] = offset; + offset += UNITS_PER_WORD; + } + } + + /* 1. At the bottom of the stack are any outgoing stack + arguments. */ + frame->saved_outargs_size = ARC64_STACK_ALIGN (crtl->outgoing_args_size); + + /* 2. Size of locals and temporaries. */ + frame->saved_locals_size = ARC64_STACK_ALIGN (get_frame_size ()); + + /* 3. Size of the saved registers (including FP/BLINK). + FIXME! FPR registers. */ + frame->saved_regs_size = ARC64_STACK_ALIGN (offset); + + /* 4. Size of the callee-allocated area for pretend stack + arguments. */ + frame->saved_varargs_size = ARC64_STACK_ALIGN (crtl->args.pretend_args_size); + + /* Total size. */ + frame->frame_size = frame->saved_outargs_size + frame->saved_locals_size + + frame->saved_regs_size + frame->saved_varargs_size; + + gcc_assert (frame->frame_size == ARC64_STACK_ALIGN (frame->frame_size)); + frame->layout_p = reload_completed; +} + +/* Emit a frame insn which adjusts stack pointer by OFFSET. */ + +static void +frame_stack_add (HOST_WIDE_INT offset) +{ + rtx tmp; + HOST_WIDE_INT lo = sext_hwi (offset, 32); + unsigned HOST_WIDE_INT hi = sext_hwi (offset >> 32, 32); + + if (hi != 0xffffffffULL || hi != 0ULL) + tmp = gen_rtx_SET (stack_pointer_rtx, + gen_rtx_PLUS (Pmode, stack_pointer_rtx, + gen_rtx_HIGH (Pmode, GEN_INT (hi)))); + + tmp = gen_rtx_SET (stack_pointer_rtx, + plus_constant (Pmode, stack_pointer_rtx, lo)); + tmp = emit_insn (tmp); + RTX_FRAME_RELATED_P (tmp) = 1; + add_reg_note (tmp, REG_FRAME_RELATED_EXPR, + gen_rtx_SET (stack_pointer_rtx, + plus_constant (Pmode, stack_pointer_rtx, + offset))); +} + +/* Helper for prologue: emit frame store with pre_modify or pre_dec to + save register REG on stack. An initial offset OFFSET can be passed + to the function. If a DISPLACEMENT is defined, it will be used to + generate pre_modify instead of pre_dec. */ + +static HOST_WIDE_INT +frame_save_reg (rtx reg, HOST_WIDE_INT offset, HOST_WIDE_INT displacement) +{ + rtx addr, tmp; + + if (offset) + { + tmp = plus_constant (Pmode, stack_pointer_rtx, + offset - GET_MODE_SIZE (GET_MODE (reg))); + addr = gen_frame_mem (GET_MODE (reg), + gen_rtx_PRE_MODIFY (Pmode, + stack_pointer_rtx, + tmp)); + } + else if (displacement) + { + tmp = plus_constant (Pmode, stack_pointer_rtx, (-displacement)); + addr = gen_frame_mem (GET_MODE (reg), + gen_rtx_PRE_MODIFY (Pmode, + stack_pointer_rtx, + tmp)); + } + else + addr = gen_frame_mem (GET_MODE (reg), gen_rtx_PRE_DEC (Pmode, + stack_pointer_rtx)); + tmp = emit_move_insn (addr, reg); + RTX_FRAME_RELATED_P (tmp) = 1; + + return (displacement ? displacement : GET_MODE_SIZE (GET_MODE (reg))) + - offset; +} + +/* ARC prologue saving regs routine. */ + +static HOST_WIDE_INT +arc64_save_callee_saves (void) +{ + struct arc64_frame *frame = &cfun->machine->frame; + machine_mode save_mode = DImode; + int regno; + HOST_WIDE_INT offset = -frame->saved_varargs_size; + HOST_WIDE_INT frame_allocated = 0; + rtx reg; + + for (regno = F31_REGNUM; regno >= R0_REGNUM; regno--) + { + HOST_WIDE_INT disp = 0; + if (frame->reg_offset[regno] == -1 + /* Hard frame pointer is saved in a different place. */ + || (frame_pointer_needed && regno == R27_REGNUM) + /* blink register is saved in a different place. */ + || (regno == BLINK_REGNUM)) + continue; + + save_mode = word_mode; + if (ARC64_HAS_FP_BASE && FP_REGNUM_P (regno)) + { + save_mode = ARC64_HAS_FPUD ? DFmode : SFmode; + disp = UNITS_PER_WORD; + } + else if (regno >= 1 + && (((regno - 1) % 2) == 0) + && (frame->reg_offset[regno - 1] != -1)) + { + /* Use 64-bit double stores for context saving. */ + if (!TARGET_64BIT && TARGET_LL64) + { + save_mode = DImode; + --regno; + } + /* Use 128-bit double stores for context saving. */ + else if (TARGET_64BIT && TARGET_WIDE_LDST) + { + save_mode = TImode; + --regno; + } + } + + reg = gen_rtx_REG (save_mode, regno); + frame_allocated += frame_save_reg (reg, offset, disp); + offset = 0; + } + + /* Save BLINK if required. */ + if (frame->reg_offset[BLINK_REGNUM] != -1) + { + reg = gen_rtx_REG (Pmode, BLINK_REGNUM); + frame_allocated += frame_save_reg (reg, offset, 0); + offset = 0; + } + + /* Save FP if required. */ + if (frame_pointer_needed) + { + frame_allocated += frame_save_reg (hard_frame_pointer_rtx, offset, 0); + offset = 0; + } + + /* Emit mov fp,sp, if required. */ + if (frame_pointer_needed) + { + rtx tmp = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx); + RTX_FRAME_RELATED_P (tmp) = 1; + } + + return frame_allocated; +} + +/* Helper for epilogue: emit frame load with post_modify or post_inc + to restore register REG from stack. The initial offset is passed + via OFFSET. */ + +static HOST_WIDE_INT +frame_restore_reg (rtx reg, HOST_WIDE_INT displacement) +{ + rtx addr, insn, tmp; + + if (displacement) + { + tmp = plus_constant (Pmode, stack_pointer_rtx, displacement); + addr = gen_frame_mem (GET_MODE (reg), + gen_rtx_POST_MODIFY (Pmode, + stack_pointer_rtx, + tmp)); + } + else + addr = gen_frame_mem (GET_MODE (reg), + gen_rtx_POST_INC (Pmode, stack_pointer_rtx)); + insn = emit_move_insn (reg, addr); + RTX_FRAME_RELATED_P (insn) = 1; + add_reg_note (insn, REG_CFA_RESTORE, reg); + + if (reg == hard_frame_pointer_rtx) + add_reg_note (insn, REG_CFA_DEF_CFA, + plus_constant (Pmode, stack_pointer_rtx, + GET_MODE_SIZE (GET_MODE (reg)))); + else + add_reg_note (insn, REG_CFA_ADJUST_CFA, + gen_rtx_SET (stack_pointer_rtx, + plus_constant (Pmode, stack_pointer_rtx, + GET_MODE_SIZE (GET_MODE (reg))))); + + return displacement ? displacement : GET_MODE_SIZE (GET_MODE (reg)); +} + +/* ARC' epilogue restore regs routine. */ + +static HOST_WIDE_INT +arc64_restore_callee_saves (bool sibcall_p ATTRIBUTE_UNUSED) +{ + struct arc64_frame *frame = &cfun->machine->frame; + HOST_WIDE_INT offset, frame_deallocated = 0; + rtx reg; + int regno; + machine_mode restore_mode = DImode; + + /* Recover the frame_pointer location for the current frame. */ + offset = frame->frame_size - (frame->saved_regs_size + + frame->saved_varargs_size); + + /* Emit mov sp,fp if need. Thus, we get rid of the offset without + using a possible expensive add3 instruction. */ + if (frame_pointer_needed) + { + rtx tmp = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx); + RTX_FRAME_RELATED_P (tmp) = 1; + } + else if (offset) + frame_stack_add (offset); + + frame_deallocated += offset; + + if (frame_pointer_needed) + frame_deallocated += frame_restore_reg (hard_frame_pointer_rtx, 0); + + if (frame->reg_offset[BLINK_REGNUM] != -1) + { + reg = gen_rtx_REG (Pmode, BLINK_REGNUM); + frame_deallocated += frame_restore_reg (reg, 0); + } + + for (regno = R0_REGNUM; regno <= F31_REGNUM; regno++) + { + HOST_WIDE_INT disp = 0; + bool double_load_p = false; + + if (frame->reg_offset[regno] == -1 + /* Hard frame pointer has been restored. */ + || (frame_pointer_needed && regno == R27_REGNUM) + /* blink register has been restored. */ + || (regno == BLINK_REGNUM)) + continue; + + restore_mode = word_mode; + if (ARC64_HAS_FP_BASE && FP_REGNUM_P (regno)) + { + restore_mode = ARC64_HAS_FPUD ? DFmode : SFmode; + disp = UNITS_PER_WORD; + } + else if ((regno % 2) == 0 + && (!frame_pointer_needed || ((regno + 1) != R27_REGNUM)) + && (frame->reg_offset[regno + 1] != -1 + && ((regno + 1) != BLINK_REGNUM))) + { + /* Use 64-bit double loads for context restoring. */ + if (!TARGET_64BIT && TARGET_LL64) + { + restore_mode = DImode; + double_load_p = true; + } + /* Use 128-bit double loads for context restoring. */ + else if (TARGET_64BIT && TARGET_WIDE_LDST) + { + restore_mode = TImode; + double_load_p = true; + } + } + + reg = gen_rtx_REG (restore_mode, regno); + frame_deallocated += frame_restore_reg (reg, disp); + + if (double_load_p) + regno++; + } + + return frame_deallocated; +} + +/* Emit an insn that's a simple single-set. Both the operands must be + known to be valid. */ +inline static rtx_insn * +emit_set_insn (rtx x, rtx y) +{ + return emit_insn (gen_rtx_SET (x, y)); +} + +/* Given FROM and TO register numbers, say whether this elimination is allowed. + Frame pointer elimination is automatically handled. + + All eliminations are permissible. If we need a frame + pointer, we must eliminate ARG_POINTER_REGNUM into + FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */ + +static bool +arc64_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to) +{ + return ((to == HARD_FRAME_POINTER_REGNUM) || (to == STACK_POINTER_REGNUM)); +} + +/* We force all frames that call eh_return to require a frame pointer, this will + ensure that the previous frame pointer is stored on entry to the function, + and will then be reloaded at function exit. */ + +static bool +arc64_frame_pointer_required (void) +{ + return cfun->calls_alloca || crtl->calls_eh_return; +} + +/* Giving a symbol, return how it will be addressed. */ + +static arc64_symb +arc64_get_symbol_type (rtx x) +{ + bool is_local = false, is_tls = false; + + /* Labels are always local, so a short access will suffice. FIXME! + For large model, we should use a pc-rel accessing. */ + if (LABEL_REF_P (x)) + return flag_pic ? ARC64_PIC : + (arc64_cmodel_var == ARC64_CMODEL_LARGE ? ARC64_MAYBE_LARGE : + ARC64_LO32); + + /* FIXME! Maybe I should assert here. */ + if (!SYMBOL_REF_P (x)) + return ARC64_UNK; + + is_local = SYMBOL_REF_DECL (x) + ? targetm.binds_local_p (SYMBOL_REF_DECL (x)) + : SYMBOL_REF_LOCAL_P (x); + is_tls = SYMBOL_REF_TLS_MODEL (x); + + if (is_tls) + return ARC64_TLS; + + if (!flag_pic) + switch (arc64_cmodel_var) + { + case ARC64_CMODEL_SMALL: + case ARC64_CMODEL_MEDIUM: + return ARC64_LO32; + case ARC64_CMODEL_LARGE: + return ARC64_MAYBE_LARGE; + default: + gcc_unreachable (); + } + else if (flag_pic == 1) + return is_local ? ARC64_PCREL : ARC64_PIC; + else if (flag_pic == 2) + return is_local ? ARC64_PCREL : ARC64_MAYBE_LPIC; + else + gcc_unreachable (); +} + +/* Helper legitimate address. Extra takes an input to discriminate + among load or store addresses. */ +static bool +arc64_legitimate_address_1_p (machine_mode mode, + rtx x, + bool strict ATTRIBUTE_UNUSED, + bool load_p, + bool scaling_p) +{ + if (REG_P (x)) + return true; + + if (CONST_INT_P (x)) + return true; + + if (CONSTANT_P (x)) + { + /* Don't allow constant + offset when we don't have native + ld/st, as the compiler may use very large offsets. These + memory accesses are splited anyhow. */ + if (GET_MODE_SIZE (mode) == UNITS_PER_WORD * 2) + { + /* 32-bit and no double loads? */ + if (!TARGET_64BIT && !TARGET_LL64) + return false; + /* 64-bit and no double loads? */ + if (TARGET_64BIT && !TARGET_WIDE_LDST) + return false; + /* fall thru */ + } + if (GET_CODE (XEXP (x, 0)) == PLUS + && CONST_INT_P (XEXP (XEXP (x, 0), 1)) + /* Reloc addendum is only 32bit. */ + && UNSIGNED_INT32 (INTVAL (XEXP (XEXP (x, 0), 1)))) + x = XEXP (XEXP (x, 0), 0); + } + + if (GET_CODE (x) == SYMBOL_REF + || GET_CODE (x) == LABEL_REF) + return (arc64_get_symbol_type (x) == ARC64_LO32); + + /* Check register + offset address type. */ + if (GET_CODE (x) == PLUS + && REG_P (XEXP (x, 0)) + && CONST_INT_P (XEXP (x, 1))) + { + machine_mode scaling_mode = mode; + rtx offset = XEXP (x, 1); + HOST_WIDE_INT ioffset = INTVAL (offset); + + + if (GET_MODE_SIZE (scaling_mode) == 2 * UNITS_PER_WORD) + { + /* Double load/stores are not scaling with 128 bits but with the + register size. */ + scaling_mode = smallest_int_mode_for_size (BITS_PER_WORD); + + /* Adjust the offset as we may need to split this address. */ + if (ioffset > 0) + ioffset += UNITS_PER_WORD; + } + scaling_mode = scaling_p ? scaling_mode : QImode; + + /* ST instruction can only accept a single register plus a small s9 offset + as address. */ + if ((ARC64LOG2 (GET_MODE_SIZE (scaling_mode)) + && VERIFY_SHIFT (ioffset, ARC64LOG2 (GET_MODE_SIZE (scaling_mode))) + && SIGNED_INT9 (ioffset >> ARC64LOG2 (GET_MODE_SIZE (scaling_mode)))) + || SIGNED_INT9 (ioffset)) + return true; + + if (load_p + /* FIXME! we can use address scalling here to fit even more. */ + && (UNSIGNED_INT32 (INTVAL (offset)) + || SIGNED_INT32 (INTVAL (offset))) + && !optimize_size) + return true; + } + + /* Indexed addresses. */ + if (load_p + && GET_CODE (x) == PLUS + && REG_P (XEXP (x, 0)) + && REG_P (XEXP (x, 1))) + { + if (GET_MODE_SIZE (mode) >= 2 * UNITS_PER_WORD) + { + if (!TARGET_64BIT) + return TARGET_LL64; + else + return TARGET_WIDE_LDST; + } + return true; + } + + /* Scalled addresses. Permitted variants: + ld.as rx, [rb,ri] addr = rb + ri * scaling + ld.as rx, [offset32, ri] addr = offset32 + ri * scalling + + The store address can have only immediate operands scalled. This + case toghether with its load variant are handled by above + code. */ + if (scaling_p + && load_p + && GET_CODE (x) == PLUS + && (REG_P (XEXP (x, 1)) || CONST_INT_P (XEXP (x, 1))) + /* Check multiplication. */ + && GET_CODE (XEXP (x, 0)) == MULT + && REG_P (XEXP (XEXP (x, 0), 0)) + && CONST_INT_P (XEXP (XEXP (x, 0), 1))) + { + /* x is plus(mult(index, scaling), base) => base + index*scaling */ + const rtx mult = XEXP (x, 0); + const int scaling = INTVAL (XEXP (mult, 1)); + + switch (GET_MODE_SIZE (mode)) + { + case 2: /* ldh */ + case 4: /* ld */ + if (scaling == GET_MODE_SIZE (mode)) + return true; + break; + case 8: /* ldd or ldl */ + if (scaling == 4) + return (!TARGET_64BIT && TARGET_LL64); + if (scaling == 8) + return TARGET_64BIT; + break; + case 16: /* lddl */ + if (scaling == 8) + return TARGET_WIDE_LDST; + break; + default: + break; + } + } + + if ((GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC + || GET_CODE (x) == POST_DEC || GET_CODE (x) == POST_INC) + && REG_P (XEXP (x, 0))) + return true; + + if ((GET_CODE (x) == PRE_MODIFY || GET_CODE (x) == POST_MODIFY)) + return arc64_legitimate_address_1_p (mode, XEXP (x, 1), strict, + load_p, false); + + /* PIC address (LARGE). */ + if (GET_CODE (x) == LO_SUM + && REG_P (XEXP (x, 0)) + && GET_CODE (XEXP (x, 1)) == UNSPEC) + return true; + + /* PIC address (small) or local symbol. */ + if (load_p + && GET_CODE (x) == UNSPEC + && (XINT (x, 1) == ARC64_UNSPEC_GOT32 + || XINT (x, 1) == ARC64_UNSPEC_TLS_IE + || XINT (x, 1) == ARC64_UNSPEC_PCREL)) + return true; + + return false; +} + +/* Return TRUE if X is a legitimate address for accessing memory in + mode MODE. We do recognize addresses like: + - [Rb] + - [Rb, s9] + - [Rb, Ri] (ld only) + - [Rb, limm] (ld only) + - predec/postdec + - preinc/postinc + - premodif/postmodif +*/ + +static bool +arc64_legitimate_address_p (machine_mode mode, + rtx x, + bool strict ATTRIBUTE_UNUSED) +{ + /* Allow all the addresses accepted by load. */ + return arc64_legitimate_address_1_p (mode, x, strict, true, true); +} + +/* Helper for legitimate constant. */ +static bool +arc64_legitimate_constant1_p (machine_mode mode, rtx x, bool nosym) +{ + switch (GET_CODE (x)) + { + case CONST_DOUBLE: + case CONST_INT: + case CONST_WIDE_INT: + case HIGH: + if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) + return false; + return true; + + case SYMBOL_REF: + /* TODO: We should use arc64_get_symbol_type function here and retun + true/false depending on the type of the symbol. */ + if (SYMBOL_REF_TLS_MODEL (x)) + return false; + if (nosym || flag_pic) + return false; + /* fallthrough */ + case LABEL_REF: + /* FIXME: Labels should be PC-rel when PIC, and make sure they are not + ending up in constant pool. */ + return true; + + case CONST: + if (GET_CODE (XEXP (x, 0)) == PLUS) + { + rtx tmp = XEXP (x, 0); + /* Do not allow @symb + offset constants. */ + bool t1 = arc64_legitimate_constant1_p (mode, XEXP (tmp, 0), true); + bool t2 = arc64_legitimate_constant1_p (mode, XEXP (tmp, 1), true); + return (t1 && t2); + } + return false; + + default: + return false; + } +} + +/* Implement TARGET_LEGITIMATE_CONSTANT_P hook. Return true for constants + that should be rematerialized rather than spilled. */ + +static bool +arc64_legitimate_constant_p (machine_mode mode, rtx x) +{ + return arc64_legitimate_constant1_p (mode, x, false); +} + +/* Giving a mode, return true if we can pass it in fp registers. */ + +bool +arc64_use_fp_regs (machine_mode mode) +{ + if (!FLOAT_MODE_P (mode)) + return false; + + /* FPU unit can have either 32 or 64 bit wide data path. */ + /* FIXME: Use macros for the sizes. */ + if ((ARC64_HAS_FPUS && (GET_MODE_SIZE (mode) == (UNITS_PER_WORD / 2))) + || (ARC64_HAS_FPUH && (GET_MODE_SIZE (mode) == (UNITS_PER_WORD / 4))) + || ARC64_HAS_FPUD) + return true; + return false; +} + +static rtx +arc64_gen_fp_pair (machine_mode mode, unsigned regno1, + machine_mode mode1, HOST_WIDE_INT offset1, + unsigned regno2, machine_mode mode2, + HOST_WIDE_INT offset2) +{ + return gen_rtx_PARALLEL + (mode, + gen_rtvec (2, + gen_rtx_EXPR_LIST (VOIDmode, + gen_rtx_REG (mode1, regno1), + GEN_INT (offset1)), + gen_rtx_EXPR_LIST (VOIDmode, + gen_rtx_REG (mode2, regno2), + GEN_INT (offset2)))); +} + +static rtx +arc64_layout_arg (struct arc64_arg_info *info, cumulative_args_t pcum_v, + machine_mode mode, const_tree type, bool named) +{ + CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v); + HOST_WIDE_INT size; + unsigned int nregs; + bool record_p = type ? (TREE_CODE (type) == RECORD_TYPE) : false; + + memset (info, 0, sizeof (*info)); + info->off_fpr = pcum->fregs; + info->off_gpr = pcum->iregs; + + /* Find out the size of argument. */ + size = type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode); + + /* When named, we can pass FP types into FP registers if they exists and they + have the right size, unless a record type is used. */ + if (named + && !record_p + && arc64_use_fp_regs (mode)) + { + size = ROUND_UP (size, UNITS_PER_FP_REG); + nregs = size / UNITS_PER_FP_REG; + + if (info->off_fpr + nregs <= MAX_ARC64_PARM_REGS) + { + int fregno = F0_REGNUM + info->off_fpr; + info->nfpr = nregs; + switch (GET_MODE_CLASS (mode)) + { + case MODE_VECTOR_FLOAT: + /* FIXME! for double-sized vectors, we may need to use double + register. */ + case MODE_FLOAT: + return gen_rtx_REG (mode, fregno); + + case MODE_COMPLEX_FLOAT: + gcc_assert (nregs == 2); + return arc64_gen_fp_pair (mode, fregno, GET_MODE_INNER (mode), 0, + fregno + 1, GET_MODE_INNER (mode), + GET_MODE_UNIT_SIZE (mode)); + + default: + gcc_unreachable (); + } + } + /* No free FP-reg, continue using R-regs for the remaining FP + arguments. */ + } + + size = ROUND_UP (size, UNITS_PER_WORD); + nregs = size / UNITS_PER_WORD; + + /* Partition the argument between register and stack. */ + gcc_assert (info->nfpr == 0); + info->ngpr = MIN (nregs, MAX_ARC64_PARM_REGS - info->off_gpr); + info->stack_p = (nregs - info->ngpr) != 0; + + if (info->ngpr) + return gen_rtx_REG (mode, R0_REGNUM + info->off_gpr); + return NULL_RTX; +} + +/* Worker for return_in_memory. */ +/* FIXME! shall we use pass_by_reference? */ + +static bool +arc64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED) +{ + HOST_WIDE_INT size; + + /* Maybe we may need to return simple scalar types in registers: + if (!AGGREGATE_TYPE_P (type) + && TREE_CODE (type) != COMPLEX_TYPE) + return false; + */ + if (AGGREGATE_TYPE_P (type) || TREE_ADDRESSABLE (type)) + return true; + + size = int_size_in_bytes (type); + + /* Double sized float vectors are mapped into even-odd register + pair, hence use the stack when someone wants to pass them to + the caller. */ + if (VECTOR_FLOAT_TYPE_P (type) && size > UNITS_PER_WORD) + return true; + + /* Types larger than 2 registers returned in memory. */ + return ((size < 0) || (size > 2 * UNITS_PER_WORD)); +} + +/* Worker for pass_by_reference. */ + +static bool +arc64_pass_by_reference (cumulative_args_t cum_v, + const function_arg_info &arg) +{ + HOST_WIDE_INT size = arg.type_size_in_bytes (); + struct arc64_arg_info info; + CUMULATIVE_ARGS *pcum = get_cumulative_args (cum_v); + + /* Double sized fp-vectors are passed on the stack. */ + if (arg.type + && VECTOR_FLOAT_TYPE_P (arg.type) && size > UNITS_PER_WORD) + return true; + + /* N.B. std_gimplify_va_arg_expr passes NULL for cum. However, we + do not use variadic arguments in fp-regs. */ + if (pcum != NULL) + { + /* Check if we can use fp regs. */ + arc64_layout_arg (&info, cum_v, arg.mode, arg.type, arg.named); + if (info.nfpr) + return false; + } + + /* In earlier passes, the *_pass_by_reference() hook is called with the + "COMPLEX" as the "argument of the function" and later"COMPLEX.element" + is considered to be the "argument of the function". This check makes + a unified decision in all those scenarios. */ + if (COMPLEX_MODE_P (arg.mode)) + { + const machine_mode mode = GET_MODE_INNER (arg.mode); + size = GET_MODE_SIZE (mode); + } + + /* Variable sized arguments are always returned by reference, and + arguments which are variable sized or larger than 2 registers are + passed by reference. */ + return !IN_RANGE (size, 0, 2 * UNITS_PER_WORD); +} + +/* The function to update the summarizer variable *CUM to advance past + an argument in the argument list. The values MODE, TYPE and NAMED + describe that argument. Once this is done, the variable *CUM is + suitable for analyzing the *following* argument with + `FUNCTION_ARG', etc. */ + +static void +arc64_function_arg_advance (cumulative_args_t pcum_v, + const function_arg_info &arg) +{ + struct arc64_arg_info info; + CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v); + + arc64_layout_arg (&info, pcum_v, arg.mode, arg.type, arg.named); + + pcum->fregs = info.nfpr + info.off_fpr; + pcum->iregs = info.ngpr + info.off_gpr; +} + +/* Implement TARGET_ARG_PARTIAL_BYTES. */ + +static int +arc64_arg_partial_bytes (cumulative_args_t pcum_v, + const function_arg_info &arg) +{ + struct arc64_arg_info info; + + arc64_layout_arg (&info, pcum_v, arg.mode, arg.type, arg.named); + gcc_assert ((info.nfpr == 0) || (info.ngpr == 0)); + + return info.stack_p ? info.ngpr * UNITS_PER_WORD : 0; +} + +/* This function is used to control a function argument is passed in a + register, and which register. + + The arguments are CUM, of type CUMULATIVE_ARGS, which summarizes + (in a way defined by INIT_CUMULATIVE_ARGS and FUNCTION_ARG_ADVANCE) + all of the previous arguments so far passed in registers; MODE, the + machine mode of the argument; TYPE, the data type of the argument + as a tree node or 0 if that is not known (which happens for C + support library functions); and NAMED, which is 1 for an ordinary + argument and 0 for nameless arguments that correspond to `...' in + the called function's prototype. + + The returned value should either be a `reg' RTX for the hard + register in which to pass the argument, or zero to pass the + argument on the stack. */ + +static rtx +arc64_function_arg (cumulative_args_t pcum_v, + const function_arg_info &arg) +{ + struct arc64_arg_info info; + + return arc64_layout_arg (&info, pcum_v, arg.mode, arg.type, arg.named); +} + +/* Define how to find the value returned by a function. VALTYPE is + the data type of the value (as a tree). If the precise function + being called is known, FN_DECL_OR_TYPE is its FUNCTION_DECL; + otherwise, FN_DECL_OR_TYPE is its type. */ + +static rtx +arc64_function_value (const_tree type, + const_tree func, + bool outgoing ATTRIBUTE_UNUSED) +{ + machine_mode mode = TYPE_MODE (type); + int unsignedp = TYPE_UNSIGNED (type); + + if (INTEGRAL_TYPE_P (type)) + mode = promote_function_mode (type, mode, &unsignedp, func, 1); + + if (arc64_use_fp_regs (mode)) + { + switch (GET_MODE_CLASS (mode)) + { + case MODE_VECTOR_FLOAT: + /* FIXME! for double-sized vectors, we may need to use double + register. */ + case MODE_FLOAT: + return gen_rtx_REG (mode, F0_REGNUM); + + case MODE_COMPLEX_FLOAT: + return arc64_gen_fp_pair (mode, F0_REGNUM, GET_MODE_INNER (mode), 0, + F1_REGNUM, GET_MODE_INNER (mode), + GET_MODE_UNIT_SIZE (mode)); + + default: + gcc_unreachable (); + } + + } + return gen_rtx_REG (mode, R0_REGNUM); +} + +/* Implements TARGET_FUNCTION_VALUE_REGNO_P. + Return true if REGNO is the number of a hard register in which the values + of called function may come back. */ + +static bool +arc64_function_value_regno_p (const unsigned int regno) +{ + /* Maximum of 16 bytes can be returned in the general registers. Examples + of 16-byte return values are: 128-bit integers and 16-byte small + structures (excluding homogeneous floating-point aggregates). + + We need to implement untyped_call instruction pattern when + returning more than one value. */ + + if (regno == R0_REGNUM) + return true; + + if (regno == F0_REGNUM) + return ARC64_HAS_FP_BASE; + + return false; +} + +static bool +arc64_split_complex_arg (const_tree) +{ + return true; +} + +/* Implement TARGET_SETUP_INCOMING_VARARGS. */ + +static void +arc64_setup_incoming_varargs (cumulative_args_t cum_v, + const function_arg_info &arg, + int *pretend_size, int no_rtl) +{ + CUMULATIVE_ARGS cum = *get_cumulative_args (cum_v); + int gpi_saved; + + /* The caller has advanced CUM up to, but not beyond, the last named + argumend. Advance a local copu of CUM past the last "real" named + argument, to find out how many registers are left over. */ + arc64_function_arg_advance (pack_cumulative_args (&cum), arg); + + cfun->machine->uses_anonymous_args = 1; + if (!FUNCTION_ARG_REGNO_P (cum.iregs)) + return; + + gpi_saved = MAX_ARC64_PARM_REGS - cum.iregs; + + if (!no_rtl && gpi_saved > 0) + { + rtx ptr, mem; + ptr = plus_constant (Pmode, arg_pointer_rtx, 0); + mem = gen_frame_mem (BLKmode, ptr); + set_mem_alias_set (mem, get_varargs_alias_set ()); + + move_block_from_reg (R0_REGNUM + cum.iregs, mem, gpi_saved); + } + + /* FIXME! do I need to ROUND_UP (pretend, STACK_BOUNDARY / + BITS_PER_UNIT) ? */ + *pretend_size = gpi_saved * UNITS_PER_WORD; +} + +/* Implement TARGET_HARD_REGNO_NREGS. */ + +static unsigned int +arc64_hard_regno_nregs (unsigned int regno, + machine_mode mode) +{ + if (FP_REGNUM_P (regno)) + return CEIL (GET_MODE_SIZE (mode), UNITS_PER_FP_REG); + return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD); +} + +/* Implement TARGET_HARD_REGNO_MODE_OK. */ + +static bool +arc64_hard_regno_mode_ok (unsigned int regno, machine_mode mode) +{ + if (GET_MODE_CLASS (mode) == MODE_CC) + return regno == CC_REGNUM; + + if (regno == SP_REGNUM + || regno == FRAME_POINTER_REGNUM + || regno == ARG_POINTER_REGNUM) + return (mode == Pmode); + + if (regno <= R58_REGNUM) + { + if (GET_MODE_SIZE (mode) <= UNITS_PER_WORD) + return true; + else if (GET_MODE_SIZE (mode) <= (UNITS_PER_WORD * 2)) + return ((regno & 1) == 0); + } + else if (FLOAT_MODE_P (mode) && FP_REGNUM_P (regno)) + { + /* FIXME! I should make the decision base on the WIDE option + alone, if we need double regs or not. */ + if (ARC64_VFP_128 + && (GET_MODE_SIZE (mode) <= (UNITS_PER_FP_REG * 2)) + && (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)) + return ((regno & 1) == 0); + return true; + } + + return false; +} + +/* Implement TARGET_MODES_TIEABLE_P. Tie QI/HI/SI/DI modes together. */ + +static bool +arc64_modes_tieable_p (machine_mode mode1, machine_mode mode2) +{ + if (GET_MODE_CLASS (mode1) == MODE_INT + && GET_MODE_CLASS (mode2) == MODE_INT + && GET_MODE_SIZE (mode1) <= UNITS_PER_WORD + && GET_MODE_SIZE (mode2) <= UNITS_PER_WORD) + return true; + + return false; +} + +static inline bool +arc64_short_insn_p (rtx_insn *insn) +{ + enum attr_iscompact iscompact; + + iscompact = get_attr_iscompact (insn); + if (iscompact == ISCOMPACT_YES) + return true; + + if (iscompact == ISCOMPACT_MAYBE) + return (get_attr_length (insn) == 2) + || (get_attr_length (insn) == 6); + + return (get_attr_length (insn) == 2); +} + +/* Returns the index of the ARC condition code string in + `arc_condition_codes'. COMPARISON should be an rtx like `(eq (...) + (...))'. */ + +static int +get_arc64_condition_code (rtx comparison) +{ + switch (GET_MODE (XEXP (comparison, 0))) + { + case E_DImode: /* brcc/bbit instructions. */ + case E_SImode: + case E_CCmode: + switch (GET_CODE (comparison)) + { + case EQ : return ARC_CC_EQ; + case NE : return ARC_CC_NE; + case GT : return ARC_CC_GT; + case LE : return ARC_CC_LE; + case GE : return ARC_CC_GE; + case LT : return ARC_CC_LT; + case GTU : return ARC_CC_HI; + case LEU : return ARC_CC_LS; + case LTU : return ARC_CC_LO; + case GEU : return ARC_CC_HS; + default : gcc_unreachable (); + } + case E_CC_ZNmode: + switch (GET_CODE (comparison)) + { + case EQ : return ARC_CC_EQ; + case NE : return ARC_CC_NE; + case GE: return ARC_CC_P; + case LT: return ARC_CC_N; + case GT : return ARC_CC_PNZ; + default : gcc_unreachable (); + } + case E_CC_Zmode: + switch (GET_CODE (comparison)) + { + case EQ : return ARC_CC_EQ; + case NE : return ARC_CC_NE; + default : gcc_unreachable (); + } + case E_CC_Cmode: + switch (GET_CODE (comparison)) + { + case LTU : return ARC_CC_C; + case GEU : return ARC_CC_NC; + default : gcc_unreachable (); + } + case E_CC_Vmode: + switch (GET_CODE (comparison)) + { + case EQ : return ARC_CC_NV; + case NE : return ARC_CC_V; + default : gcc_unreachable (); + } + case E_CC_FPUmode: + case E_CC_FPUEmode: + switch (GET_CODE (comparison)) + { + case EQ: return ARC_CC_EQ; + case NE: return ARC_CC_NE; + case GT: return ARC_CC_GT; + case GE: return ARC_CC_GE; + case LT: + /* Equivalent with N, short insn friendly. */ + return ARC_CC_C; + case LE: return ARC_CC_LS; + case UNORDERED: return ARC_CC_V; + case ORDERED: return ARC_CC_NV; + case UNGT: return ARC_CC_HI; + case UNGE: + /* Equivalent with NV, short insn friendly. */ + return ARC_CC_HS; + case UNLT: return ARC_CC_LT; + case UNLE: return ARC_CC_LE; + default: gcc_unreachable (); + } + break; + default : gcc_unreachable (); + } + gcc_unreachable (); +} + +/* Address scaling is a bit tricky in case of double loads/stores. + In normal cases, the address scaling takes the element size + of the data it is handling as the offset. However, in case of + a double load/store the offset size is the same size of a single + element and not the double of it. e.g.: + + ldb.as r1, [r0, 1] offset is 1 (1*1), data is 1 byte + ldw.as r1, [r0, 1] offset is 2 (1*2), data is 2 bytes + ld.as r1, [r0, 1] offset is 4 (1*4), data is 4 bytes + ldl.as r1, [r0, 1] offset is 8 (1*8), data is 8 bytes + + ldd.as r1, [r0, 1] offset is 4 (1*4), data is 8 bytes + lddl.as r1, [r0, 1] offset is 8 (1*8), data is 16 bytes +*/ + +static machine_mode +arc64_get_effective_mode_for_address_scaling (const machine_mode mode) +{ + if (GET_MODE_SIZE (mode) == (UNITS_PER_WORD * 2)) + { + gcc_assert (DOUBLE_LOAD_STORE); + return Pmode; + } + return mode; +} + +/* Print operand X (an rtx) in assembler syntax to file FILE. CODE is + a letter or dot (`z' in `%z0') or 0 if no letter was specified. + For `%' followed by punctuation, CODE is the punctuation and X is + null. Letters `acln' are reserved. The acceptable formatting + commands given by CODE are: + '0': Print a normal operand, if it's a general register, + then we assume DImode. + 'U': Load/store update or scaling indicator. + 'm': output condition code without 'dot'. + 'M': output inverse condition code without 'dot'. + 'w': output proper condition code for emulated brcc with u6 immediate. + 'W': output proper condition code for emulated brcc with r/limm. + '?': Short instruction suffix. + '*': Delay slot suffix + 'L': Lower 32bit of immediate or symbol. + 'H': Higher 32bit of an immediate, 64b-register or symbol. + 'C': Constant address, switches on/off @plt. + 's': Scalled immediate. + 'S': Scalled immediate, to be used in pair with 's'. + 'N': Negative immediate, to be used in pair with 's'. + 'V': 2x16b vector immediate, hi lane is zero. + 'P': Constant address, swithces on/off _s to be used with 'C' + 'A': output aq, rl or aq.rl flags for atomic ops. +*/ + +static void +arc64_print_operand (FILE *file, rtx x, int code) +{ + HOST_WIDE_INT ival; + const char * const arc_condition_codes[] = + { + "al", 0, "eq", "ne", "p", "n", "lo", "hs", "v", "nv", + "gt", "le", "ge", "lt", "hi", "ls", "pnz", 0 + }; + const char * const ebrcc_u6ccodes[] = + { + "na", "na", "na", "na", "na", "na", "na", "na", "na", "na", + "ge", "lt", "na", "na", "hs", "lo", "na", "na" + }; + const char * const ebrcc_rccodes[] = + { + "na", "na", "na", "na", "na", "na", "na", "na", "na", "na", + "lt", "ge", "na", "na", "lo", "hs", "na", "na" + }; + + int scalled = 0; + int sign = 1; + machine_mode effective_mode; + + switch (code) + { + case '*': + if (final_sequence && final_sequence->len () != 1) + { + rtx_insn *delay = final_sequence->insn (1); + + if (delay->deleted ()) + return; + fputs (".d", file); + } + return; + + case '?': + if (arc64_short_insn_p (current_output_insn)) + fputs ("_s", file); + break; + + case 'U' : + /* Output a load/store with update indicator if appropriate. */ + if (!MEM_P (x)) + { + output_operand_lossage ("invalid operand for %%U code"); + return; + } + + /* FIXME! consider volatile accesses as .di accesses, everything + under an option. */ + if (MEM_VOLATILE_P (x) && TARGET_VOLATILE_DI) + fputs (".di", file); + + switch (GET_CODE (XEXP (x, 0))) + { + case PRE_INC: + case PRE_DEC: + case PRE_MODIFY: + fputs (".a", file); + break; + + case POST_INC: + case POST_DEC: + case POST_MODIFY: + fputs (".ab", file); + break; + + case PLUS: + effective_mode = + arc64_get_effective_mode_for_address_scaling (GET_MODE (x)); + if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT) + fputs (".as", file); + else if (REG_P (XEXP (XEXP (x, 0), 0)) + && CONST_INT_P (XEXP (XEXP (x, 0), 1)) + && ARC64_CHECK_SCALLED_IMMEDIATE (XEXP (XEXP (x, 0), 1), + effective_mode)) + { + fputs (".as", file); + scalled_p = true; + } + default: + break; + } + break; + + case 'L': + if (GET_CODE (x) == SYMBOL_REF + || GET_CODE (x) == LABEL_REF) + { + output_addr_const (asm_out_file, x); + fputs ("@u32", file); + break; + } + else if (REG_P (x)) + { + asm_fprintf (file, "%s", reg_names [REGNO (x)]); + break; + } + else if (!CONST_INT_P (x)) + { + output_operand_lossage ("invalid operand for %%L code"); + return; + } + ival = INTVAL (x); + ival &= 0xffffffffULL; + fprintf (file,"0x%08" PRIx32, (uint32_t) ival); + break; + + case 'H': + if (GET_CODE (x) == SYMBOL_REF + || GET_CODE (x) == LABEL_REF + || GET_CODE (x) == UNSPEC) + { + output_addr_const (asm_out_file, x); + break; + } + else if (CONST_INT_P (x)) + { + ival = INTVAL (x); + ival >>= 32; + fprintf (file, "%d", (int32_t) ival); + } + else if (REG_P (x)) + asm_fprintf (file, "%s", reg_names [REGNO (x) + 1]); + else + { + output_operand_lossage ("invalid operand for %%H code"); + return; + } + break; + + case 'V': + if (!CONST_INT_P (x)) + { + output_operand_lossage ("invalid operand for %%V code"); + return; + } + ival = INTVAL (x); + ival &= 0xffffULL; + fprintf (file, "0x%08" PRIx32, (uint32_t) ival); + break; + + case 'm': + fputs (arc_condition_codes[get_arc64_condition_code (x)], file); + break; + + case 'M': + fputs (arc_condition_codes[ARC_INVERSE_CONDITION_CODE + (get_arc64_condition_code (x))], file); + break; + + case 'w': + fputs (ebrcc_u6ccodes[get_arc64_condition_code (x)], file); + break; + + case 'W': + fputs (ebrcc_rccodes[get_arc64_condition_code (x)], file); + break; + + case 'C': + if (GET_CODE (x) != SYMBOL_REF + && GET_CODE (x) != LABEL_REF) + { + output_operand_lossage ("invalid operand for %%C code"); + return; + } + output_addr_const (asm_out_file, x); + /* N.B. The instruction is valid, hence any symbol which its + type is LPIC is valid for instruction, see + arc64_is_long_call_p. */ + switch (arc64_get_symbol_type (x)) + { + case ARC64_PIC: + fputs ("@plt", file); + break; + case ARC64_LPIC: + fputs ("@plt34", file); + break; + default: + break; + } + break; + + case 'P': + if (GET_CODE (x) != SYMBOL_REF + && GET_CODE (x) != LABEL_REF) + { + output_operand_lossage ("invalid operand for %%P code"); + return; + } + if (arc64_use_plt34_p (x)) + fputs ("_s", file); + break; + + case 's': + if (REG_P (x)) + break; + if (!CONST_INT_P (x)) + { + output_operand_lossage ("invalid operand for %%s code"); + return; + } + ival = INTVAL (x); + if ((ival & 0x07) == 0) + scalled = 3; + else if ((ival & 0x03) == 0) + scalled = 2; + else if ((ival & 0x01) == 0) + scalled = 1; + + if (scalled) + asm_fprintf (file, "%d", scalled); + break; + + case 'N': + if (REG_P (x)) + { + output_operand_lossage ("invalid operand for %%N code"); + return; + } + sign = -1; + /* fall through */ + case 'S': + if (REG_P (x)) + { + asm_fprintf (file, "%s", reg_names [REGNO (x)]); + return; + } + if (!CONST_INT_P (x)) + { + output_operand_lossage ("invalid operand for %%N or %%S code"); + return; + } + ival = sign * INTVAL (x); + if ((ival & 0x07) == 0) + scalled = 3; + else if ((ival & 0x03) == 0) + scalled = 2; + else if ((ival & 0x01) == 0) + scalled = 1; + + asm_fprintf (file, "%wd", (ival >> scalled)); + break; + + case 'A': + if (!ARC64_HAS_ATOMIC_3) + return; + if (!CONST_INT_P (x)) + { + output_operand_lossage ("invalid operand for %%A"); + return; + } + ival = INTVAL (x); + switch ((enum memmodel) ival) + { + case MEMMODEL_ACQ_REL: + fputs (".aq.rl", file); + break; + + case MEMMODEL_SEQ_CST: + case MEMMODEL_SYNC_SEQ_CST: + case MEMMODEL_ACQUIRE: + case MEMMODEL_CONSUME: + case MEMMODEL_SYNC_ACQUIRE: + fputs (".aq", file); + break; + + case MEMMODEL_RELEASE: + case MEMMODEL_SYNC_RELEASE: + fputs (".rl", file); + break; + + case MEMMODEL_RELAXED: + break; + + default: + gcc_unreachable (); + } + break; + + case 0: + if (x == NULL) + { + output_operand_lossage ("missing operand"); + return; + } + + switch (GET_CODE (x)) + { + case REG : + asm_fprintf (file, "%s", reg_names [REGNO (x)]); + break; + + case MEM : + fputc ('[', file); + output_address (GET_MODE (x), XEXP (x, 0)); + fputc (']', file); + break; + + case CONST: + case LABEL_REF: + case SYMBOL_REF: + case UNSPEC: + output_addr_const (asm_out_file, x); + break; + + case CONST_DOUBLE: + { + long l; + int msize; + machine_mode mode = GET_MODE (x); + /* Maybe I need to define TARGET_SUPPORTS_WIDE_INT. */ + gcc_assert (mode != VOIDmode); + /* GET_MODE_BITSIZE BITS_PER_WORD */ + msize = GET_MODE_SIZE (mode); + if (msize > UNITS_PER_LIMM) + msize = UNITS_PER_LIMM; + msize *= 8; + l = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (x), + float_mode_for_size (msize).require ()); + asm_fprintf (file, "0x%08lx", l); + break; + } + case CONST_INT: + asm_fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x)); + break; + + default: + output_operand_lossage ("invalid operand"); + return; + } + break; + + default: + output_operand_lossage ("invalid operand prefix '%%%c'", code); + } +} + +/* Print address 'addr' of a memory access with mode 'mode'. */ + +static void +arc64_print_operand_address (FILE *file , machine_mode mode, rtx addr) +{ + rtx base, index = 0; + machine_mode effective_mode = mode; + + switch (GET_CODE (addr)) + { + case REG : + fputs (reg_names[REGNO (addr)], file); + break; + + case CONST: + output_address (mode, XEXP (addr, 0)); + break; + + case PLUS : + if (GET_CODE (XEXP (addr, 0)) == MULT) + index = XEXP (XEXP (addr, 0), 0), base = XEXP (addr, 1); + else if (CONST_INT_P (XEXP (addr, 0))) + index = XEXP (addr, 0), base = XEXP (addr, 1); + else + base = XEXP (addr, 0), index = XEXP (addr, 1); + + gcc_assert (OBJECT_P (base)); + effective_mode = + arc64_get_effective_mode_for_address_scaling (mode); + if (REG_P (base) + && scalled_p + && CONST_INT_P (index) + && ARC64_CHECK_SCALLED_IMMEDIATE (index, effective_mode)) + { + index = GEN_INT (INTVAL (index) >> + ARC64LOG2 (GET_MODE_SIZE (effective_mode))); + } + scalled_p = false; + + arc64_print_operand_address (file, mode, base); + if (CONSTANT_P (base) && CONST_INT_P (index)) + fputc ('+', file); + else + fputc (',', file); + gcc_assert (OBJECT_P (index)); + arc64_print_operand_address (file, mode, index); + break; + + case PRE_INC: + case POST_INC: + output_address (VOIDmode, + plus_constant (Pmode, XEXP (addr, 0), + GET_MODE_SIZE (mode))); + break; + + case PRE_DEC: + case POST_DEC: + output_address (VOIDmode, + plus_constant (Pmode, XEXP (addr, 0), + -GET_MODE_SIZE (mode))); + break; + + case PRE_MODIFY: + case POST_MODIFY: + output_address (VOIDmode, XEXP (addr, 1)); + break; + + case LO_SUM: + /* This type of address can be only accepted by LD instructions. */ + base = XEXP (addr, 0); + index = XEXP (addr, 1); + arc64_print_operand_address (file, mode, base); + fputc (',', file); + output_addr_const (file, index); + break; + + case UNSPEC: + /* Small PIC. */ + fputs ("pcl,", file); + output_addr_const (file, addr); + break; + + case LABEL_REF: + case SYMBOL_REF: + case CONST_INT: + output_addr_const (file, addr); + break; + + default: + gcc_unreachable (); + break; + } +} + +/* Target hook for indicating whether a punctuation character for + TARGET_PRINT_OPERAND is valid. */ + +static bool +arc64_print_operand_punct_valid_p (unsigned char code) +{ + return (code == '?' || code == '*'); +} + +/* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */ + +static bool +arc64_output_addr_const_extra (FILE *file, rtx x) +{ + rtx base, offset = NULL_RTX; + + if (GET_CODE (x) == UNSPEC) + { + base = XVECEXP (x, 0, 0); + if (GET_CODE (base) == CONST + && GET_CODE (XEXP (base, 0)) == PLUS) + { + offset = XEXP (XEXP (base, 0), 1); + base = XEXP (XEXP (base, 0), 0); + } + output_addr_const (file, base); + switch (XINT (x, 1)) + { + case ARC64_UNSPEC_PCREL: + fputs ("@pcl", file); + break; + + case ARC64_UNSPEC_GOT32: + case ARC64_UNSPEC_GOT: + fputs ("@gotpc", file); + break; + + case ARC64_UNSPEC_TLS_GD: + fputs ("@tlsgd", file); + break; + + case ARC64_UNSPEC_TLS_IE: + fputs ("@tlsie", file); + break; + + case ARC64_UNSPEC_TLS_OFF: + fputs ("@tpoff", file); + break; + + default: + gcc_unreachable (); + } + + if (offset != NULL_RTX) + { + fputs ("+", file); + output_addr_const (file, offset); + } + return true; + } + + return false; +} + +/* Wrap X in an unspec of kind KIND. */ + +static rtx +gen_sym_unspec (rtx x, int kind) +{ + return gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), kind); +} + +/* The __tls_get_attr symbol. */ +static GTY(()) rtx arc_tls_symbol; + +/* Emit a call to __tls_get_addr. TI is the argument to this function. + RET is an RTX for the return value location. The entire insn sequence + is returned. */ + +static void +arc64_tls_call (rtx dest, rtx arg) +{ + rtx argreg = gen_reg_rtx (Pmode); + if (!arc_tls_symbol) + arc_tls_symbol = init_one_libfunc ("__tls_get_addr"); + + df_set_regs_ever_live (BLINK_REGNUM, true); + emit_insn (gen_rtx_SET (argreg, arg)); + emit_library_call_value (arc_tls_symbol, dest, LCT_CONST, Pmode, + argreg, Pmode); +} + +/* Handle LARGE memory model for RTX. */ + +static rtx +arc64_large_address (rtx base, rtx scratch) +{ + if (!TARGET_64BIT) + return base; + + emit_insn (gen_rtx_SET (scratch, gen_rtx_HIGH (Pmode, base))); + return gen_rtx_LO_SUM (Pmode, scratch, copy_rtx (base)); +} + +/* Create a legitimate mov instruction for the given BASE (unspec). */ + +static rtx +arc64_legit_unspec (rtx base) +{ + rtx t1, ret; + gcc_assert (can_create_pseudo_p ()); + + switch (arc64_cmodel_var) + { + case ARC64_CMODEL_SMALL: + case ARC64_CMODEL_MEDIUM: + return base; + + case ARC64_CMODEL_LARGE: + ret = gen_reg_rtx (Pmode); + t1 = gen_reg_rtx (Pmode); + emit_insn (gen_rtx_SET (ret, arc64_large_address (base, t1))); + return ret; + + default: + break; + } + gcc_unreachable (); +} + +/* Return a legitimized TLS address to access ADDR, which is a + SYMBOL_REF. */ + +static rtx +arc64_legitimize_tls_address (rtx addr) +{ + rtx t1, t2; + rtx base; + enum tls_model model = SYMBOL_REF_TLS_MODEL (addr); + + gcc_assert (can_create_pseudo_p ()); + + switch (model) + { + case TLS_MODEL_LOCAL_DYNAMIC: + case TLS_MODEL_GLOBAL_DYNAMIC: + /* Gen: + addl r0,pcl,@ADDR@tlsgd + bl __tls_get_addr@plt */ + t2 = gen_reg_rtx (Pmode); + base = gen_sym_unspec (addr, ARC64_UNSPEC_TLS_GD); + t1 = arc64_legit_unspec (base); + arc64_tls_call (t2, t1); + return t2; + + case TLS_MODEL_INITIAL_EXEC: + /* Gen: + ldl rx,[pcl,@ADDR@tlsie] + addl rx,rx,r30 */ + addr = arc64_legit_unspec (gen_sym_unspec (addr, ARC64_UNSPEC_TLS_IE)); + addr = copy_to_mode_reg (Pmode, gen_const_mem (Pmode, addr)); + return gen_rtx_PLUS (Pmode, addr, gen_rtx_REG (Pmode, R30_REGNUM)); + + case TLS_MODEL_LOCAL_EXEC: + /* Gen: + addl rx,r30,@ADDR@tpoff */ + addr = arc64_legit_unspec (gen_sym_unspec (addr, ARC64_UNSPEC_TLS_OFF)); + return gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, R30_REGNUM), addr); + + default: + gcc_unreachable (); + } +} + +/* Helper function. Returns a valid ARC64 RTX that represents the + argument X which is an invalid address RTX. The argument SCRATCH + may be used as a temp when building affresses. */ + +static rtx +arc64_legitimize_address_1 (rtx x, rtx scratch) +{ + rtx base, addend, t1; + bool is_local = true, ATTRIBUTE_UNUSED is_weak = false; + + switch (GET_CODE (x)) + { + case SYMBOL_REF: + is_local = SYMBOL_REF_DECL (x) + ? targetm.binds_local_p (SYMBOL_REF_DECL (x)) + : SYMBOL_REF_LOCAL_P (x); + is_weak = SYMBOL_REF_WEAK (x); + if (SYMBOL_REF_TLS_MODEL (x)) + return arc64_legitimize_tls_address (x); + /* FALLTHRU */ + + case LABEL_REF: + t1 = can_create_pseudo_p () ? gen_reg_rtx (Pmode) : scratch; + gcc_assert (t1); + if (!flag_pic) + { + switch (arc64_cmodel_var) + { + case ARC64_CMODEL_SMALL: + case ARC64_CMODEL_MEDIUM: + return x; + default: + return arc64_large_address (x, t1); + } + } + else if (is_local) + { + /* Local symbol, we can access it using a simple + PCL-relative access. */ + base = gen_sym_unspec (x, ARC64_UNSPEC_PCREL); + return base; + } + else if (flag_pic) + { + /* Global symbol, we access it via a load from the GOT + (small model). I.e., load pointer address via GOT, do + the access of the datum using the loaded pointer. */ + /* FIXME! to enable LARGE/small pic models make the above + condition flag_pic == 1. */ + base = gen_sym_unspec (x, ARC64_UNSPEC_GOT32); + return gen_const_mem (Pmode, base); + } + else + { + /* Global symbol, we access it via a load from the GOT + (LARGE model). */ + base = gen_sym_unspec (x, ARC64_UNSPEC_GOT); + t1 = arc64_large_address (base, t1); + return gen_const_mem (Pmode, t1); + } + + case LO_SUM: + return x; + + case CONST: + /* We expect something like: const (plus (symbol_ref) (const_int)) + A c-function which will generate this should be: + int a; + void b (void) { a = "" ? "" + 8 : 3; } + */ + gcc_assert (can_create_pseudo_p ()); + split_const (x, &base, &addend); + base = force_reg (Pmode, base); + if (addend == const0_rtx) + return base; + return gen_rtx_PLUS (Pmode, base, addend); + + default: + break; + } + + gcc_unreachable (); +} + + +/* Nested function support. */ + +/* Output assembler code for a block containing the constant parts of + a trampoline, leaving space for variable parts. */ + +static void +arc64_asm_trampoline_template (FILE *f) +{ + if (!TARGET_64BIT) /* ARC32 */ + { + /* ld_s r12,[pcl,8] + ld r11,[pcl,12] + j_s [r12] */ + asm_fprintf (f, "\tld_s\t%s,[pcl,8]\n", reg_names[R12_REGNUM]); + asm_fprintf (f, "\tld\t%s,[pcl,12]\n", reg_names[STATIC_CHAIN_REGNUM]); + asm_fprintf (f, "\tj_s\t[%s]\n", reg_names[R12_REGNUM]); + } + else /* TARGET_64BIT */ + { + /* nop + ldl r12,[pcl,12] + ldl r11,[pcl,16] + j [r12] */ + asm_fprintf (f, "\tnop\n"); + asm_fprintf (f, "\tldl\t%s,[pcl,12]\n", reg_names[R12_REGNUM]); + asm_fprintf (f, "\tldl\t%s,[pcl,16]\n", reg_names[STATIC_CHAIN_REGNUM]); + asm_fprintf (f, "\tj\t[%s]\n", reg_names[R12_REGNUM]); + } + /* .(x)word function's address + .(x)word static chain value */ + assemble_aligned_integer (POINTER_BYTES, const0_rtx); + assemble_aligned_integer (POINTER_BYTES, const0_rtx); +} + +/* Helper initialize trampoline. */ + +static void +arc64_initialize_trampoline (rtx tramp, tree fndecl, rtx cxt) +{ + rtx fnaddr = XEXP (DECL_RTL (fndecl), 0); + const int fnaddr_offset = TRAMPOLINE_CODE_SIZE; + const int cxt_offset = TRAMPOLINE_CODE_SIZE + POINTER_BYTES; + + emit_block_move (tramp, assemble_trampoline_template (), + GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL); + emit_move_insn (adjust_address (tramp, Pmode, fnaddr_offset), fnaddr); + emit_move_insn (adjust_address (tramp, Pmode, cxt_offset), cxt); + /* FIXME: maybe it's good to use "maybe_emit_call_builtin___clear_cache" */ + emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"), + LCT_NORMAL, VOIDmode, XEXP (tramp, 0), Pmode, + plus_constant (Pmode, XEXP (tramp, 0), TRAMPOLINE_SIZE), + Pmode); +} + +/* Implement FUNCTION_OK_FOR_SIBCALL hook. */ + +static bool +arc64_function_ok_for_sibcall (tree decl, + tree exp ATTRIBUTE_UNUSED) +{ + /* Don't use sibcall for naked functions. */ + if (ARC_NAKED_P (cfun->machine->fn_type)) + return false; + + /* Don't use sibcall for ISR functions. */ + if (ARC_INTERRUPT_P (cfun->machine->fn_type)) + return false; + + if (decl && targetm.binds_local_p (decl)) + return true; + + /* We don't have an instruction to do what bl_s sym@plt34 does. */ + if (flag_pic == 2) + return false; + + return true; +} + +/* Implement INIT_LIBFUNCS hook. */ + +static void +arc64_init_libfuncs (void) +{ + set_optab_libfunc (ffs_optab, SImode, "__ffssi2"); + set_optab_libfunc (clz_optab, SImode, "__clzsi2"); + set_optab_libfunc (ctz_optab, SImode, "__ctzsi2"); + set_optab_libfunc (popcount_optab, SImode, "__popcountsi2"); + set_optab_libfunc (parity_optab, SImode, "__paritysi2"); +} + +/* Helper evp_dump_stack_info. */ + +static void +arc64_print_format_registers(FILE *stream, + unsigned regno, + enum machine_mode mode) +{ + unsigned int j, nregs; + unsigned int ll = 0; + + nregs = arc64_hard_regno_nregs (regno, mode); + /* Make sure BLKmode has a number of regs attached. */ + nregs = nregs ? nregs : 2; + for (j = regno + nregs; j > regno; j--) + { + asm_fprintf (stream,"%s", reg_names[j - 1]); + ll += strlen (reg_names[j - 1]); + } + asm_fprintf (stream,"`"); + for (j = ll; j < 20; j++) + asm_fprintf (stream, " "); + + asm_fprintf (stream,"\t(%d)\n", + GET_MODE_SIZE (mode)); +} + +/* Place some comment into assembler stream describing the current + function. */ + +static void +arc64_output_function_prologue (FILE *f) +{ + int regno, i; + struct arc64_frame *frame = &cfun->machine->frame; + tree parm = DECL_ARGUMENTS (current_function_decl); + + asm_fprintf (f, "\t# args = %wd, pretend = %ld, frame = %wd\n", + (HOST_WIDE_INT) crtl->args.size, + frame->saved_varargs_size, + (HOST_WIDE_INT) get_frame_size ()); + asm_fprintf (f, "\t# frame_needed = %d, uses_anonymous_args = %d\n", + frame_pointer_needed, + cfun->machine->uses_anonymous_args); + asm_fprintf (f, "\t# size = %wd bytes\n", + frame->frame_size); + asm_fprintf (f, "\t# + outargs = %wd bytes\n", + frame->saved_outargs_size); + asm_fprintf (f, "\t# + locals = %wd bytes\n", + frame->saved_locals_size); + asm_fprintf (f, "\t# + regs = %wd bytes\n", + frame->saved_regs_size); + asm_fprintf (f, "\t# + varargs = %wd bytes\n", + frame->saved_varargs_size); + + if (crtl->calls_eh_return) + asm_fprintf (f, "\t# Calls __builtin_eh_return.\n"); + + for (regno = R0_REGNUM; regno <= F31_REGNUM; regno++) + if (frame->reg_offset[regno] != -1) + asm_fprintf (f, "\t# regsave[%s] => %ld\n", reg_names[regno], + frame->reg_offset[regno]); + + asm_fprintf(f, "\t# Parameters:\n"); + while (parm) + { + rtx rtl = DECL_INCOMING_RTL (parm); + if (rtl) + { + asm_fprintf(f,"\t# "); + tree decl_name; + decl_name = DECL_NAME (parm); + if (decl_name != NULL && IDENTIFIER_POINTER (decl_name) != NULL) + { + const char *name = lang_hooks.dwarf_name (parm, 0); + if(name) + asm_fprintf(f, "%-20.20s =`", name); + else + asm_fprintf(f, "N.A.`"); + } + if (REG_P (rtl)) + { + unsigned regno = REGNO (rtl); + enum machine_mode mode = GET_MODE (rtl); + arc64_print_format_registers (f, regno, mode); + } + else if (MEM_P (rtl)) + { + rtx addr = XEXP (rtl, 0); + long argPtrOfs = frame->frame_size - + arc64_initial_elimination_offset (ARG_POINTER_REGNUM, + (frame_pointer_needed ? + HARD_FRAME_POINTER_REGNUM : + STACK_POINTER_REGNUM)); + if (GET_CODE (addr) == PLUS) + { + rtx ofs = XEXP (addr, 1); + gcc_assert (CONST_INT_P (ofs)); + argPtrOfs += INTVAL (ofs); + } + asm_fprintf (f, "%s[%4ld]` (%d)\n", + (frame_pointer_needed ? "fp" : "sp"), + argPtrOfs, + GET_MODE_SIZE (GET_MODE (rtl))); + } + else if (GET_CODE (rtl) == PARALLEL) + { + asm_fprintf (f,"xvec` (%d)\n", + GET_MODE_SIZE (GET_MODE (rtl))); + for (i = 0; i < XVECLEN (rtl, 0); i++) + { + rtx xv = XEXP (XVECEXP (rtl, 0, i), 0); + if (REG_P (xv)) + { + unsigned regno = REGNO (xv); + enum machine_mode mode = GET_MODE (xv); + asm_fprintf (f,"# `"); + arc64_print_format_registers (f, regno, mode); + } + } + } + else if (GET_CODE (rtl) == CONCAT) + { + rtx op0 = XEXP (rtl, 0); + rtx op1 = XEXP (rtl, 1); + if (REG_P (op0)) + arc64_print_format_registers (f, REGNO (op0), GET_MODE (op0)); + else + asm_fprintf(f, "MEM`\n"); + asm_fprintf(f,"\t#\t\t\t+`"); + if (REG_P (op1)) + arc64_print_format_registers (f, REGNO (op1), GET_MODE (op1)); + else + asm_fprintf(f, "MEM`\n"); + } + else + { + asm_fprintf(f,"N.A.`\n"); + } + } + parm = TREE_CHAIN (parm); + } +} + +/* Helper for INSN_COST. + + Per Segher Boessenkool: rtx_costs computes the cost for any rtx (an + insn, a set, a set source, any random piece of one). set_src_cost, + set_rtx_cost, etc. are helper functions that use that. + + Those functions do not work for parallels. Also, costs are not + additive like this simplified model assumes. Also, more complex + backends tend to miss many cases in their rtx_costs function. + + Many passes that want costs want to know the cost of a full insn. Like + combine. That's why I created insn_cost: it solves all of the above + problems. */ + +static int +arc64_insn_cost (rtx_insn *insn, bool speed) +{ + int cost; + + /* Needed for ifcvt. */ + if (GET_CODE (PATTERN (insn)) == USE) + return 1; + + if (recog_memoized (insn) < 0) + return 0; + + + /* Use cost if provided. */ + cost = get_attr_cost (insn); + if (cost > 0) + return cost; + + cost = pattern_cost (PATTERN (insn), speed); + return cost; +#if 0 + /* If optimizing for size, we want the insn size. */ + if (!speed) + return get_attr_length (insn); + + /* Use cost if provided. */ + cost = get_attr_cost (insn); + if (cost > 0) + return cost; + + /* For speed make a simple cost model: memory access is more + expensive than any other instruction. */ + enum attr_type type = get_attr_type (insn); + + switch (type) + { + case TYPE_LD: + case TYPE_ST: + cost = COSTS_N_INSNS (2); + break; + + default: + cost = COSTS_N_INSNS (1); + break; + } + + return cost; +#endif +} + +/* Helper for arc64_short_access_p. */ + +static bool +check_short_insn_register_p (rtx op, bool hclass_p) +{ + if (!REG_P (op)) + return false; + + return (REGNO (op) >= FIRST_PSEUDO_REGISTER + || COMPACT_REG_P (REGNO (op)) + || (hclass_p && (REGNO (op) <= R30_REGNUM))); +} + +/* Helper for arc64_short_access_p. */ + +static bool +check_short_insn_constant_p (rtx op, machine_mode mode) +{ + HOST_WIDE_INT ival; + + if (!CONST_INT_P (op)) + return false; + + ival = INTVAL (op); + + /* Check u5, u6, u7 short immediates. */ + if (VERIFY_SHIFT (ival, ARC64LOG2 (GET_MODE_SIZE (mode))) + && UNSIGNED_INT5 (ival >> ARC64LOG2 (GET_MODE_SIZE (mode)))) + return true; + + return false; +} + +/* Output code to add DELTA to the first argument, and then jump to + FUNCTION. Used for C++ multiple inheritance. */ + +static void +arc64_output_mi_thunk (FILE *file, + tree thunk_fndecl, + HOST_WIDE_INT delta, + HOST_WIDE_INT vcall_offset, + tree function) +{ + const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl)); + rtx this_rtx, fnaddr, temp1; + rtx_insn *insn; + + /* Pretend to be a post-reload pass while generating rtl. */ + reload_completed = 1; + + /* Mark the end of the (empty) prologue. */ + emit_note (NOTE_INSN_PROLOGUE_END); + + /* Determine if we can use a sibcall to call FUNCTION directly. */ + fnaddr = gen_rtx_MEM (FUNCTION_MODE, XEXP (DECL_RTL (function), 0)); + + /* We need one temporary register in some cases. */ + temp1 = gen_rtx_REG (Pmode, R12_REGNUM); + + /* Find out which register contains the "this" pointer. */ + if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)) + this_rtx = gen_rtx_REG (Pmode, R1_REGNUM); + else + this_rtx = gen_rtx_REG (Pmode, R0_REGNUM); + + /* Add DELTA to THIS_RTX. */ + if (delta != 0) + { + rtx offset = GEN_INT (delta); + /* FIXME! check if delta fits in 32bit immediate. Also we can + switch from an ADD to a SUB instruction. */ + gcc_assert (UNSIGNED_INT32 (delta) || SIGNED_INT32 (delta)); + emit_insn (gen_rtx_SET (this_rtx, + gen_rtx_PLUS (Pmode, this_rtx, offset))); + } + + if (vcall_offset != 0) + { + rtx addr; + + /* Set TEMP1 to *THIS_RTX. */ + emit_insn (gen_rtx_SET (temp1, gen_rtx_MEM (Pmode, this_rtx))); + + /* Set ADDR to a legitimate address for *THIS_RTX + VCALL_OFFSET. */ + /* FIXME! check if vcall_offset fits in 32bit immediate. */ + gcc_assert (UNSIGNED_INT32 (vcall_offset) || SIGNED_INT32 (vcall_offset)); + addr = plus_constant (Pmode, temp1, vcall_offset); + + /* Load the offset and add it to THIS_RTX. */ + emit_insn (gen_rtx_SET (temp1, gen_rtx_MEM (Pmode, addr))); + emit_insn (gen_add3_insn (this_rtx, this_rtx, temp1)); + } + + /* Jump to the target function. */ + insn = emit_call_insn (gen_sibcall (fnaddr, const0_rtx, const0_rtx)); + SIBLING_CALL_P (insn) = 1; + + /* Run just enough of rest_of_compilation. This sequence was + "borrowed" from alpha.c. */ + insn = get_insns (); + split_all_insns_noflow (); + shorten_branches (insn); + assemble_start_function (thunk_fndecl, fnname); + final_start_function (insn, file, 1); + final (insn, file, 1); + final_end_function (); + assemble_end_function (thunk_fndecl, fnname); + + /* Stop pretending to be a post-reload pass. */ + reload_completed = 0; +} + +/* Helper INIT_EXPANDERS. */ + +static struct machine_function * +arc64_init_machine_status (void) +{ + struct machine_function *machine; + machine = ggc_cleared_alloc (); + return machine; +} + +static tree +arc64_builtin_decl (unsigned id, bool initialize_p ATTRIBUTE_UNUSED) +{ + if (id < ARC64_BUILTIN_COUNT) + return arc_bdesc[id].fndecl; + + return error_mark_node; +} + +/* Transform UP into lowercase and write the result to LO. + You must provide enough space for LO. Return LO. */ + +static char* +arc64_tolower (char *lo, const char *up) +{ + char *lo0 = lo; + + for (; *up; up++, lo++) + *lo = TOLOWER (*up); + + *lo = '\0'; + + return lo0; +} + +/* Helper for adding the builtins. */ +static void +arc64_init_builtins (void) +{ + tree void_ftype_usint_usint + = build_function_type_list (void_type_node, unsigned_type_node, + unsigned_type_node, NULL_TREE); + tree usint_ftype_usint + = build_function_type_list (long_unsigned_type_node, + unsigned_type_node, NULL_TREE); + tree void_ftype_void + = build_function_type_list (void_type_node, NULL_TREE); + tree void_ftype_usint + = build_function_type_list (void_type_node, unsigned_type_node, + NULL_TREE); + tree long_ftype_long + = build_function_type_list (long_long_integer_type_node, + long_long_integer_type_node, NULL_TREE); + + tree void_ftype_long_long + = build_function_type_list (void_type_node, long_long_integer_type_node, + long_long_integer_type_node, NULL_TREE); + + /* Add the builtins. */ +#define DEF_BUILTIN(NAME, N_ARGS, TYPE, ICODE, MASK) \ + { \ + int id = ARC64_BUILTIN_ ## NAME; \ + const char *Name = "__builtin_arc_" #NAME; \ + char *name = (char*) alloca (1 + strlen (Name)); \ + \ + gcc_assert (id < ARC64_BUILTIN_COUNT); \ + if (MASK) \ + arc_bdesc[id].fndecl \ + = add_builtin_function (arc64_tolower(name, Name), TYPE, id, \ + BUILT_IN_MD, NULL, NULL_TREE); \ + } +#include "builtins.def" +#undef DEF_BUILTIN +} + +/* Helper arc_expand_builtin, generates a pattern for the given icode + and arguments. */ + +static rtx_insn * +apply_GEN_FCN (enum insn_code icode, rtx *arg) +{ + switch (insn_data[icode].n_generator_args) + { + case 0: + return GEN_FCN (icode) (); + case 1: + return GEN_FCN (icode) (arg[0]); + case 2: + return GEN_FCN (icode) (arg[0], arg[1]); + case 3: + return GEN_FCN (icode) (arg[0], arg[1], arg[2]); + case 4: + return GEN_FCN (icode) (arg[0], arg[1], arg[2], arg[3]); + case 5: + return GEN_FCN (icode) (arg[0], arg[1], arg[2], arg[3], arg[4]); + default: + gcc_unreachable (); + } +} + +/* Expand an expression EXP that calls a built-in function, + with result going to TARGET if that's convenient + (and in mode MODE if that's convenient). + SUBTARGET may be used as the target for computing one of EXP's operands. + IGNORE is nonzero if the value is to be ignored. */ + +static rtx +arc64_expand_builtin (tree exp, + rtx target, + rtx subtarget ATTRIBUTE_UNUSED, + machine_mode mode ATTRIBUTE_UNUSED, + int ignore ATTRIBUTE_UNUSED) +{ + tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); + unsigned int id = DECL_FUNCTION_CODE (fndecl); + const struct arc64_builtin_description *d = &arc_bdesc[id]; + int i, j, n_args = call_expr_nargs (exp); + rtx pat = NULL_RTX; + rtx xop[5]; + enum insn_code icode = d->icode; + machine_mode tmode = insn_data[icode].operand[0].mode; + int nonvoid; + tree arg0; + rtx op0; + + if (id >= ARC64_BUILTIN_COUNT) + internal_error ("bad builtin fcode"); + + /* 1st part: Expand special builtins. */ + switch (id) + { + case ARC64_BUILTIN_NOP: + emit_insn (gen_nopv ()); + return NULL_RTX; + + case ARC64_BUILTIN_BRK: + gcc_assert (icode != 0); + emit_insn (GEN_FCN (icode) (const1_rtx)); + return NULL_RTX; + + case ARC64_BUILTIN_TRAP_S: + arg0 = CALL_EXPR_ARG (exp, 0); + fold (arg0); + op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL); + + gcc_assert (icode != 0); + emit_insn (GEN_FCN (icode) (op0)); + return NULL_RTX; + default: + break; + } + + /* 2nd part: Expand regular builtins. */ + if (icode == 0) + internal_error ("bad builtin fcode"); + + nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node; + j = 0; + + if (nonvoid) + { + if (target == NULL_RTX + || GET_MODE (target) != tmode + || !insn_data[icode].operand[0].predicate (target, tmode)) + { + target = gen_reg_rtx (tmode); + } + xop[j++] = target; + } + + gcc_assert (n_args <= 4); + for (i = 0; i < n_args; i++, j++) + { + tree arg = CALL_EXPR_ARG (exp, i); + machine_mode mode = insn_data[icode].operand[j].mode; + rtx op = expand_expr (arg, NULL_RTX, mode, EXPAND_NORMAL); + machine_mode opmode = GET_MODE (op); + + if (CONST_INT_P (op)) + opmode = mode; + + if ((opmode == SImode) && (mode == HImode)) + { + opmode = HImode; + op = gen_lowpart (HImode, op); + } + + /* In case the insn wants input operands in modes different from + the result, abort. */ + gcc_assert (opmode == mode || opmode == VOIDmode); + + if (!insn_data[icode].operand[i + nonvoid].predicate (op, mode)) + op = copy_to_mode_reg (mode, op); + + xop[j] = op; + } + + pat = apply_GEN_FCN (icode, xop); + if (pat == NULL_RTX) + return NULL_RTX; + + emit_insn (pat); + + if (nonvoid) + return target; + else + return const0_rtx; +} + +/* A callback for the hw-doloop pass. Called when a loop we have discovered + turns out not to be optimizable; we have to split the loop_end pattern into + a subtract and a test. */ + +static void +hwloop_fail (hwloop_info loop) +{ + rtx test; + rtx insn; + + if (TARGET_64BIT) + emit_insn_before (gen_adddi_cmp0 (loop->iter_reg, + loop->iter_reg, + constm1_rtx), + loop->loop_end); + else + emit_insn_before (gen_addsi_cmp0 (loop->iter_reg, + loop->iter_reg, + constm1_rtx), + loop->loop_end); + + test = gen_rtx_NE (VOIDmode, gen_rtx_REG (CC_ZNmode, CC_REGNUM), const0_rtx); + test = gen_rtx_IF_THEN_ELSE (VOIDmode, test, + gen_rtx_LABEL_REF (Pmode, loop->start_label), + pc_rtx); + insn = emit_jump_insn_before (gen_rtx_SET (pc_rtx, test), + loop->loop_end); + + JUMP_LABEL (insn) = loop->start_label; + LABEL_NUSES (loop->start_label)++; + delete_insn (loop->loop_end); +} + +/* Optimize LOOP. We just are checking that the loop isn't too long, + returns true if so. Return true if successful, false if the loop + should be marked bad. If it returns false, the FAIL function is + called. */ + +static bool +hwloop_optimize (hwloop_info loop) +{ + unsigned int length; + + /* Call shorten_branches to calculate the insn lengths. */ + shorten_branches (get_insns()); + + if (!INSN_ADDRESSES_SET_P ()) + { + fprintf (dump_file, ";; loop %d has an unknown length\n", loop->loop_no); + return false; + } + + length = INSN_ADDRESSES (INSN_UID (loop->loop_end)) + - INSN_ADDRESSES (INSN_UID (loop->start_label)); + loop->length = length; + if (dump_file) + fprintf (dump_file, ";; loop %d with length %d\n", loop->loop_no, + loop->length); + if (loop->length > MAX_LOOP_LENGTH + || loop->length < MIN_LOOP_LENGTH) + { + if (dump_file) + fprintf (dump_file, ";; loop %d is too long\n", loop->loop_no); + return false; + } + if (loop->length == 0) + { + if (dump_file) + fprintf (dump_file, ";; loop %d is empty\n", loop->loop_no); + return false; + } + + return true; +} + +/* A callback for the hw-doloop pass. This function examines INSN; if + it is a loop_end pattern we recognize, return the reg rtx for the + loop counter. Otherwise, return NULL_RTX. */ + +static rtx +hwloop_pattern_reg (rtx_insn *insn) +{ + rtx reg; + + if (!JUMP_P (insn) + || (TARGET_64BIT && (recog_memoized (insn) != CODE_FOR_dbnzdi)) + || (!TARGET_64BIT && (recog_memoized (insn) != CODE_FOR_dbnzsi))) + return NULL_RTX; + + reg = SET_DEST (XVECEXP (PATTERN (insn), 0, 1)); + if (!REG_P (reg)) + return NULL_RTX; + return reg; +} + +static struct hw_doloop_hooks arc64_doloop_hooks = +{ + hwloop_pattern_reg, + hwloop_optimize, + hwloop_fail +}; + +/* Machine specific reorg step. */ +static void +arc64_reorg (void) +{ + compute_bb_for_insn (); + df_analyze (); + reorg_loops (true, &arc64_doloop_hooks); + + /* Search MAC instructions and remove the super-flu move from + accumulator to a register. Hence, we try to repair what we do in + madd expands or in mac* splits. */ + for (rtx_insn *insn = get_insns (); insn; insn = next_real_insn (insn)) + { + rtx op0, op1, op2, tmp; + enum insn_code icode = CODE_FOR_nothing; + machine_mode mode = E_VOIDmode; + + if (!INSN_P (insn)) + continue; + + /* 1st find the MAC instruction with null (accumulator) + output. */ + switch (INSN_CODE (insn)) + { + case CODE_FOR_umachi0: + icode = CODE_FOR_umachi; + mode = E_SImode; + break; + + case CODE_FOR_machi0: + icode = CODE_FOR_machi; + mode = E_SImode; + break; + + case CODE_FOR_umacd0: + icode = CODE_FOR_umacd; + mode = E_DImode; + break; + + case CODE_FOR_macd0: + icode = CODE_FOR_macd; + mode = E_DImode; + break; + + case CODE_FOR_macsi0: + icode = CODE_FOR_macsi; + mode = E_SImode; + break; + + case CODE_FOR_dmach0: + icode = CODE_FOR_dmach; + mode = E_HImode; + break; + + default: + continue; + } + + gcc_assert (REGNO (SET_DEST (PATTERN (insn))) == R58_REGNUM); + rtx_insn *nxt = next_real_insn (insn); + + /* 2nd Check if it is a move instruction. */ + tmp = PATTERN (nxt); + if (GET_CODE (tmp) != SET + || (GET_CODE (SET_SRC (tmp)) != REG) + || (GET_CODE (SET_DEST (tmp)) != REG)) + continue; + + op0 = SET_DEST (tmp); + op1 = SET_SRC (tmp); + if (REGNO (op1) != R58_REGNUM) + continue; + + /* Make the new MAC instruction. */ + switch (INSN_CODE (insn)) + { + case CODE_FOR_umachi0: + case CODE_FOR_umacd0: + case CODE_FOR_machi0: + case CODE_FOR_macd0: + if (!TARGET_64BIT && ((REGNO (op0) & 1) != 0)) + continue; + tmp = SET_SRC (PATTERN (insn)); + op1 = XEXP (XEXP (XEXP (tmp, 0), 0), 0); + op2 = XEXP (XEXP (XEXP (tmp, 0), 1), 0); + break; + + case CODE_FOR_dmach0: + case CODE_FOR_macsi0: + tmp = SET_SRC (PATTERN (insn)); + op1 = XEXP (XEXP (tmp, 0), 0); + op2 = XEXP (XEXP (tmp, 0), 1); + break; + + default: + gcc_unreachable (); + } + + emit_insn_before (GEN_FCN (icode) (op0, op1, op2, + gen_rtx_REG (mode, R58_REGNUM)), + insn); + + /* Remove the old MAC and MOV instruction. */ + set_insn_deleted (insn); + set_insn_deleted (nxt); + } +} + +/* Expand a compare and swap pattern. */ + +static void +emit_unlikely_jump (rtx insn) +{ + rtx_insn *jump = emit_jump_insn (insn); + add_reg_br_prob_note (jump, profile_probability::very_unlikely ()); +} + +/* Expand code to perform a 8 or 16-bit compare and swap by doing + 32-bit compare and swap on the word containing the byte or + half-word. The difference between a weak and a strong CAS is that + the weak version may simply fail. The strong version relies on two + loops, one checks if the SCOND op is succsfully or not, the other + checks if the 32 bit accessed location which contains the 8 or 16 + bit datum is not changed by other thread. The first loop is + implemented by the atomic_compare_and_swapsdi_1 pattern. The second + loops is implemented by this routine. */ + +static void +arc_expand_compare_and_swap_qh (rtx bool_result, rtx result, rtx mem, + rtx oldval, rtx newval, rtx weak, + rtx mod_s, rtx mod_f) +{ + rtx addr1 = force_reg (Pmode, XEXP (mem, 0)); + rtx addr = gen_reg_rtx (Pmode); + rtx off = gen_reg_rtx (SImode); + rtx oldv = gen_reg_rtx (SImode); + rtx newv = gen_reg_rtx (SImode); + rtx oldvalue = gen_reg_rtx (SImode); + rtx newvalue = gen_reg_rtx (SImode); + rtx res = gen_reg_rtx (SImode); + rtx resv = gen_reg_rtx (SImode); + rtx memsi, val, mask, end_label, loop_label, cc, x; + machine_mode mode; + bool is_weak = (weak != const0_rtx); + + /* Truncate the address. */ + emit_insn (gen_rtx_SET (addr, + gen_rtx_AND (Pmode, addr1, GEN_INT (-4)))); + + /* Compute the datum offset. */ + + emit_insn (gen_rtx_SET (off, gen_rtx_AND (SImode, + gen_lowpart(SImode, addr1), + GEN_INT (3)))); + + /* Normal read from truncated address. */ + memsi = gen_rtx_MEM (SImode, addr); + set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER); + MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem); + + val = copy_to_reg (memsi); + + /* Convert the offset in bits. */ + emit_insn (gen_rtx_SET (off, + gen_rtx_ASHIFT (SImode, off, GEN_INT (3)))); + + /* Get the proper mask. */ + if (GET_MODE (mem) == QImode) + mask = force_reg (SImode, GEN_INT (0xff)); + else + mask = force_reg (SImode, GEN_INT (0xffff)); + + emit_insn (gen_rtx_SET (mask, + gen_rtx_ASHIFT (SImode, mask, off))); + + /* Prepare the old and new values. */ + emit_insn (gen_rtx_SET (val, + gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask), + val))); + + oldval = gen_lowpart (SImode, oldval); + emit_insn (gen_rtx_SET (oldv, + gen_rtx_ASHIFT (SImode, oldval, off))); + + newval = gen_lowpart_common (SImode, newval); + emit_insn (gen_rtx_SET (newv, + gen_rtx_ASHIFT (SImode, newval, off))); + + emit_insn (gen_rtx_SET (oldv, + gen_rtx_AND (SImode, oldv, mask))); + + emit_insn (gen_rtx_SET (newv, + gen_rtx_AND (SImode, newv, mask))); + + if (!is_weak) + { + end_label = gen_label_rtx (); + loop_label = gen_label_rtx (); + emit_label (loop_label); + } + + /* Make the old and new values. */ + emit_insn (gen_rtx_SET (oldvalue, + gen_rtx_IOR (SImode, oldv, val))); + + emit_insn (gen_rtx_SET (newvalue, + gen_rtx_IOR (SImode, newv, val))); + + /* Try an 32bit atomic compare and swap. It clobbers the CC + register. */ + if (GET_MODE (mem) == SImode) + emit_insn (gen_atomic_compare_and_swapsi_1 (res, memsi, oldvalue, newvalue, + weak, mod_s, mod_f)); + else /* DImode */ + emit_insn (gen_atomic_compare_and_swapdi_1 (res, memsi, oldvalue, newvalue, + weak, mod_s, mod_f)); + + /* Regardless of the weakness of the operation, a proper boolean + result needs to be provided. */ + x = gen_rtx_REG (CC_Zmode, CC_REGNUM); + x = gen_rtx_EQ (SImode, x, const0_rtx); + emit_insn (gen_rtx_SET (bool_result, x)); + + if (!is_weak) + { + /* Check the results: if the atomic op is successfully the goto + to end label. */ + x = gen_rtx_REG (CC_Zmode, CC_REGNUM); + x = gen_rtx_EQ (VOIDmode, x, const0_rtx); + x = gen_rtx_IF_THEN_ELSE (VOIDmode, x, + gen_rtx_LABEL_REF (Pmode, end_label), pc_rtx); + emit_jump_insn (gen_rtx_SET (pc_rtx, x)); + + /* Wait for the right moment when the accessed 32-bit location + is stable. */ + emit_insn (gen_rtx_SET (resv, + gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask), + res))); + mode = SELECT_CC_MODE (NE, resv, val); + cc = gen_rtx_REG (mode, CC_REGNUM); + emit_insn (gen_rtx_SET (cc, gen_rtx_COMPARE (mode, resv, val))); + + /* Set the new value of the 32 bit location, proper masked. */ + emit_insn (gen_rtx_SET (val, resv)); + + /* Try again if location is unstable. Fall through if only + scond op failed. */ + x = gen_rtx_NE (VOIDmode, cc, const0_rtx); + x = gen_rtx_IF_THEN_ELSE (VOIDmode, x, + gen_rtx_LABEL_REF (Pmode, loop_label), pc_rtx); + emit_unlikely_jump (gen_rtx_SET (pc_rtx, x)); + + emit_label (end_label); + } + + /* End: proper return the result for the given mode. */ + emit_insn (gen_rtx_SET (res, + gen_rtx_AND (SImode, res, mask))); + + emit_insn (gen_rtx_SET (res, + gen_rtx_LSHIFTRT (SImode, res, off))); + + emit_move_insn (result, gen_lowpart (GET_MODE (result), res)); +} + + +/* This hook may conditionally modify five variables: fixed_regs, + call_used_regs, global_regs, reg_names and reg_class_contents. */ + +static void +arc64_conditional_register_usage (void) +{ + int regno; + + /* When having floating point, we enable the registers to be used by compiler + and set the appropriate call used registers (i.e., f0-f15). */ + if (ARC64_HAS_FP_BASE) + { + for (regno = F0_REGNUM; regno <= F31_REGNUM; regno++) + { + fixed_regs[regno] = 0; + call_used_regs[regno] = (regno < F16_REGNUM) ? 1 : 0; + } + } +} + +/* Implement TARGET_LIBGCC_FLOATING_POINT_MODE_SUPPORTED_P - return TRUE + if MODE is HFmode, and punt to the generic implementation otherwise. */ + +static bool +arc64_libgcc_floating_mode_supported_p (scalar_float_mode mode) +{ + return (mode == HFmode + ? ARC64_HAS_FPUH + : default_libgcc_floating_mode_supported_p (mode)); +} + +/* Implement TARGET_SCALAR_MODE_SUPPORTED_P - return TRUE + if MODE is HFmode, and punt to the generic implementation otherwise. */ + +static bool +arc64_scalar_mode_supported_p (scalar_mode mode) +{ + return (mode == HFmode + ? ARC64_HAS_FPUH + : default_scalar_mode_supported_p (mode)); +} + +/* Implements target hook vector_mode_supported_p. */ + +static bool +arc64_vector_mode_supported_p (machine_mode mode) +{ + switch (mode) + { + /* 32-bit fp SIMD vectors. */ + case E_V2HFmode: + return ARC64_VFP_32; + /* 64-bit fp SIMD vectors. */ + case E_V4HFmode: + case E_V2SFmode: + return ARC64_VFP_64; + /* 128-bit fp SIMD vectors. */ + case E_V8HFmode: + case E_V4SFmode: + case E_V2DFmode: + return ARC64_VFP_128; + + /* 32-bit SIMD vectors. */ + case E_V2HImode: + /* 64-bit SIMD vectors. */ + case E_V4HImode: + case E_V2SImode: + return TARGET_SIMD; + + default: + return false; + } +} + +/* Implements target hook TARGET_VECTORIZE_PREFERRED_SIMD_MODE. */ + +static machine_mode +arc64_preferred_simd_mode (scalar_mode mode) +{ + switch (mode) + { + case E_HFmode: + if (ARC64_VFP_128) + return V8HFmode; + if (ARC64_VFP_64) + return V4HFmode; + if (ARC64_VFP_32) + return V2HFmode; + return word_mode; + + case E_SFmode: + if (ARC64_VFP_128) + return V4SFmode; + if (ARC64_VFP_64) + return V2SFmode; + return word_mode; + + case E_DFmode: + if (ARC64_VFP_128) + return V2DFmode; + return word_mode; + + case E_HImode: + return TARGET_SIMD ? V4HImode : word_mode; + case E_SImode: + return TARGET_SIMD ? V2SImode : word_mode; + + default: + return word_mode; + } +} + +/* Implements target hook + TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES. */ + +static unsigned int +arc64_autovectorize_vector_modes (vector_modes *modes, bool) +{ + if (ARC64_VFP_128) + { + modes->quick_push (V8HFmode); + modes->quick_push (V4SFmode); + modes->quick_push (V2DFmode); + } + else if (ARC64_VFP_64) + { + modes->quick_push (V4HFmode); + modes->quick_push (V2SFmode); + } + else if (ARC64_VFP_32) + modes->quick_push (V2HFmode); + + if (TARGET_SIMD) + { + modes->quick_push (V4HImode); + modes->quick_push (V2SImode); + } + return 0; +} + +/* Vectorization costs. */ +static int +arc64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, + tree vectype, + int misalign ATTRIBUTE_UNUSED) +{ + unsigned elements; + + switch (type_of_cost) + { + case scalar_stmt: + return 1; + + case scalar_load: + return 1; + + case scalar_store: + return 1; + + case vector_stmt: + return 1; /* fp operations are more efficient than int. */ + + case vector_load: + return 1; + + case vector_store: + return 1; + + case vec_to_scalar: + return 1; /* We have extract instructions. */ + + case scalar_to_vec: + return 1; /* fp is more efficient than int. */ + + case unaligned_load: + case vector_gather_load: + return 1; /* Maybe I need to reflect unaligned flag here. */ + + case unaligned_store: + case vector_scatter_store: + return 1; /* Likewise. */ + + case cond_branch_taken: + return 3; /* A jump is always expensive. */ + + case cond_branch_not_taken: + return 1; + + case vec_perm: + return 1; /* We don't really have vec_perm. */ + + case vec_promote_demote: + return 1; + + case vec_construct: + elements = estimated_poly_value (TYPE_VECTOR_SUBPARTS (vectype)); + return elements / 2; + + default: + gcc_unreachable (); + } +} + +/* Return a new RTX holding the result of moving POINTER forward by + AMOUNT bytes. */ + +static rtx +arc64_move_pointer (rtx pointer, poly_int64 amount) +{ + rtx next = plus_constant (Pmode, XEXP (pointer, 0), amount); + + return adjust_automodify_address (pointer, GET_MODE (pointer), + next, amount); +} + +/* Return a new RTX holding the result of moving POINTER forward by the + size of the mode it points to. */ + +static rtx +arc64_progress_pointer (rtx pointer) +{ + return arc64_move_pointer (pointer, GET_MODE_SIZE (GET_MODE (pointer))); +} + +/* Copy one MODE sized block from SRC to DST, then progress SRC and DST by + MODE bytes. */ + +static void +arc64_copy_one_block_and_progress_pointers (rtx *src, rtx *dst, + machine_mode mode) +{ + rtx reg = gen_reg_rtx (mode); + + /* "Cast" the pointers to the correct mode. */ + *src = adjust_address (*src, mode, 0); + *dst = adjust_address (*dst, mode, 0); + /* Emit the memcpy. */ + emit_move_insn (reg, *src); + emit_move_insn (*dst, reg); + /* Move the pointers forward. */ + *src = arc64_progress_pointer (*src); + *dst = arc64_progress_pointer (*dst); +} + +/* Moving f regs to r regs is not a very good idea. */ +static int +arc64_register_move_cost (machine_mode, + reg_class_t from_class, reg_class_t to_class) +{ + if ((from_class == FP_REGS && to_class == GENERAL_REGS) + || (to_class == FP_REGS && from_class == GENERAL_REGS)) + return 200; + return 2; +} + +/* Check/emit vector duplicate instructions. */ + +static bool +arc64_simd_dup (struct e_vec_perm_d *d) +{ + machine_mode vmode = d->vmode; + HOST_WIDE_INT elt; + rtx t0, parallel, select; + rtx in0 = d->op0; + rtx out = d->target; + + if (!TARGET_64BIT + || !d->one_vector_p + || vmode == E_V2HImode + || d->perm.encoding ().encoded_nelts () != 1 + || !d->perm[0].is_constant (&elt) + /* elt is zero, then the vec_dup pattern does as good as we do here. */ + || elt == 0) + return false; + + if (d->testing_p) + return true; + + switch (vmode) + { + case E_V8HFmode: + case E_V4HFmode: + case E_V2HFmode: + case E_V2SFmode: + case E_V4SFmode: + if (elt != 0) + { + t0 = gen_reg_rtx (GET_MODE_INNER (vmode)); + parallel = gen_rtx_PARALLEL (vmode, gen_rtvec (1, GEN_INT (elt))); + select = gen_rtx_VEC_SELECT (GET_MODE_INNER (vmode), in0, parallel); + emit_set_insn (t0, select); + emit_set_insn (out, gen_rtx_VEC_DUPLICATE (vmode, t0)); + return true; + } + + /* FALLTHRU */ + case E_V2DFmode: + case E_V2SImode: + parallel = gen_rtx_PARALLEL (vmode, gen_rtvec (1, GEN_INT (elt))); + select = gen_rtx_VEC_SELECT (GET_MODE_INNER (vmode), in0, parallel); + emit_set_insn (out, gen_rtx_VEC_DUPLICATE (vmode, select)); + return true; + + case E_V4HImode: + if (elt == 0) + { + t0 = gen_reg_rtx (vmode); + emit_insn (gen_arc64_sel_lane2_0v4hi (t0, in0, in0)); + emit_insn (gen_arc64_sel_lane2_0v4hi (out, t0, t0)); + return true; + } + else if (elt == 1) + { + t0 = gen_reg_rtx (vmode); + emit_insn (gen_arc64_sel_lane3_1v4hi (t0, in0, in0)); + emit_insn (gen_arc64_sel_lane2_0v4hi (out, t0, t0)); + return true; + } + else if (elt == 2) + { + t0 = gen_reg_rtx (vmode); + emit_insn (gen_arc64_sel_lane2_0v4hi (t0, in0, in0)); + emit_insn (gen_arc64_sel_lane3_1v4hi (out, t0, t0)); + return true; + } + else if (elt == 3) + { + t0 = gen_reg_rtx (vmode); + emit_insn (gen_arc64_sel_lane3_1v4hi (t0, in0, in0)); + emit_insn (gen_arc64_sel_lane3_1v4hi (out, t0, t0)); + return true; + } + break; + default: + gcc_unreachable (); + } + gcc_unreachable (); +} + +/* Recognize VPACK instructions. */ + +static bool +arc64_simd_vpack (struct e_vec_perm_d *d) +{ + HOST_WIDE_INT odd; + poly_uint64 nelt = d->perm.length (); + rtx out, in0, in1; + machine_mode vmode = d->vmode; + + if (FLOAT_MODE_P (vmode) + || !d->perm[0].is_constant (&odd) + || (odd != 0 && odd != 1) + || !d->perm.series_p (0, 1, odd, 2) + || !d->perm.series_p (2, 1, nelt + odd, 2)) + return false; + + switch (vmode) + { + case E_V2SImode: + case E_V4HImode: + if (!TARGET_64BIT) + return false; + break; + + case E_V2HImode: + break; + + default: + return false; + } + + /* Success! */ + if (d->testing_p) + return true; + + in0 = d->op0; + in1 = d->op1; + out = d->target; + switch (vmode) + { + case E_V4HImode: + if (odd) + emit_insn (gen_arc64_sel_lane3_1v4hi (out, in0, in1)); + else + emit_insn (gen_arc64_sel_lane2_0v4hi (out, in0, in1)); + break; + + case E_V2SImode: + if (odd) + emit_insn (gen_arc64_sel_lane1_v2si (out, in0, in1)); + else + emit_insn (gen_arc64_sel_lane0_v2si (out, in0, in1)); + break; + + case E_V2HImode: + if (odd) + emit_insn (gen_arc64_sel_lane1_v2hi (out, in0, in1)); + else + emit_insn (gen_arc64_sel_lane0_v2hi (out, in0, in1)); + break; + + default: + gcc_unreachable (); + } + return true; +} + +/* Reverse vector, recognize swapl and vfexch instructions. */ + +static bool +arc64_simd_swapl (struct e_vec_perm_d *d) +{ + poly_uint64 nelt = d->perm.length (); + machine_mode vmode = d->vmode; + rtx t0, t1, t2, out, in0; + rtx src; + unsigned int unspec; + + if (GET_MODE_UNIT_SIZE (vmode) > 4 + || !TARGET_64BIT) + return false; + + if (!d->one_vector_p) + return false; + + if (!d->perm.series_p (0, 1, nelt - 1, -1)) + return false; + + /* Success! */ + if (d->testing_p) + return true; + + in0 = d->op0; + out = d->target; + t0 = d->target; + t1 = d->target; + + switch (vmode) + { + case E_V4HImode: + t0 = gen_reg_rtx (vmode); + t1 = gen_reg_rtx (vmode); + t2 = gen_reg_rtx (vmode); + emit_insn (gen_arc64_swapl (t0, in0)); + emit_insn (gen_arc64_swapv4hi (t1, in0)); + emit_insn (gen_arc64_swapv4hi (t2, t0)); + emit_insn (gen_arc64_swp_lane0_v4hi (out, t2, t1)); + break; + + case E_V2SImode: + emit_insn (gen_arc64_swaplv2si (out, in0)); + break; + + case E_V2HImode: + emit_insn (gen_arc64_swapv2hi (out, in0)); + break; + + case E_V8HFmode: + t1 = gen_reg_rtx (vmode); + /* Fall through. */ + case E_V4SFmode: + t0 = gen_reg_rtx (vmode); + /* Fall through. */ + case E_V2DFmode: + unspec = ARC64_UNSPEC_DEXCH; + src = gen_rtx_UNSPEC (vmode, gen_rtvec (1, in0), unspec); + emit_set_insn (t0, src); + if (vmode == E_V2DFmode) + return true; + + unspec = ARC64_UNSPEC_SEXCH; + src = gen_rtx_UNSPEC (vmode, gen_rtvec (1, t0), unspec); + emit_set_insn (t1, src); + if (vmode == E_V4SFmode) + return true; + + unspec = ARC64_UNSPEC_HEXCH; + src = gen_rtx_UNSPEC (vmode, gen_rtvec (1, t1), unspec); + emit_set_insn (out, src); + break; + + case E_V4HFmode: + t1 = gen_reg_rtx (vmode); + /* Fall through. */ + case E_V2SFmode: + unspec = ARC64_UNSPEC_SEXCH; + src = gen_rtx_UNSPEC (vmode, gen_rtvec (1, in0), unspec); + emit_set_insn (t1, src); + if (vmode == E_V2SFmode) + return true; + in0 = t1; + /* Fall through. */ + + case E_V2HFmode: + unspec = ARC64_UNSPEC_HEXCH; + src = gen_rtx_UNSPEC (vmode, gen_rtvec (1, in0), unspec); + emit_set_insn (out, src); + break; + + default: + gcc_unreachable (); + } + return true; +} + +/* Detect cases when we can use swap instruction. */ + +static bool +arc64_simd_swap (struct e_vec_perm_d *d) +{ + rtx t0, t1, t2, out, in0; + machine_mode vmode = d->vmode; + + if (vmode != E_V4HImode + || !TARGET_64BIT) + return false; + + if (!d->one_vector_p) + return false; + + if (!d->perm.series_p (0, 2, 1, 2) + || !d->perm.series_p (1, 2, 0, 2)) + return false; + + /* Success! */ + if (d->testing_p) + return true; + + in0 = d->op0; + out = d->target; + + t0 = gen_reg_rtx (vmode); + t1 = gen_reg_rtx (vmode); + t2 = gen_reg_rtx (vmode); + emit_insn (gen_arc64_swapl (t0, in0)); + emit_insn (gen_arc64_swapv4hi (t1, in0)); + emit_insn (gen_arc64_swapv4hi (t2, t0)); + emit_insn (gen_arc64_swp_lane0_v4hi (out, t1, t2)); + return true; +} + +/* Detect cases when we can use vapck2wl for 4xVectors. */ + +static bool +arc64_simd_vpack2wl (struct e_vec_perm_d *d) +{ + machine_mode vmode = d->vmode; + + if (vmode != E_V4HImode + || !TARGET_64BIT) + return false; + + if (d->perm[0] != 0 + || d->perm[1] != 1 + || (d->perm[2] != 4 && d->perm[2] != 0) + || (d->perm[3] != 5 && d->perm[3] != 1)) + return false; + + /* Success! */ + if (d->testing_p) + return true; + + emit_insn (gen_arc64_swp_lane0_v4hi (d->target, d->op0, d->op1)); + return true; +} + +static bool +arc64_simd_vpack2wm (struct e_vec_perm_d *d) +{ + machine_mode vmode = d->vmode; + + if (vmode != E_V4HImode + || !TARGET_64BIT) + return false; + + if (d->perm[0] != 2 + || d->perm[1] != 3 + || (d->perm[2] != 6 && d->perm[2] != 2) + || (d->perm[3] != 7 && d->perm[3] != 3)) + return false; + + /* Success! */ + if (d->testing_p) + return true; + + emit_insn (gen_arc64_swp_lane1_v4hi (d->target, d->op0, d->op1)); + return true; +} + +/* Recognize patterns for {H,S,D}EXCH insns, which reverse elements: + VFHEXCH (v2hf): h0 h1 + VFHEXCH (v4hf): h2 h3 h0 h1 + VFHEXCH (v8hf): h6 h7 h4 h5 h2 h3 h0 h1 + + VFSEXCH (v4hf): h1h0 h3h2 + VFSEXCH (v8hf): h5h4 h7h6 h1h0 h3h2 + + VFDEXCH (v8hf): h3h2h1h0 h7h6h5h4 + + VFSEXCH (v2sf): s0 s1 + VFSEXCH (v4sf): s2 s3 s0 s1 + + VFDEXCH (v4sf): s1s0 s3s2 + + VFDEXCH (v2df): d0 d1 + */ + +static bool +arc64_simd_exch (struct e_vec_perm_d *d) +{ + HOST_WIDE_INT diff; + unsigned int i, size, unspec; + machine_mode vmode = d->vmode; + + if (!ARC64_HAS_FP_BASE + || !FLOAT_MODE_P (vmode) + || !d->one_vector_p + || !d->perm[0].is_constant (&diff) + || !diff) + return false; + + size = diff * GET_MODE_UNIT_BITSIZE (vmode); + if (size == 64) + { + if (!ARC64_HAS_FPUD) + return false; + unspec = ARC64_UNSPEC_DEXCH; + } + else if (size == 32) + { + unspec = ARC64_UNSPEC_SEXCH; + } + else if (size == 16) + { + unspec = ARC64_UNSPEC_HEXCH; + } + else + return false; + + switch (diff) + { + case 1: + for (i = 0; i < 2; i++) + if (!d->perm.series_p (i, 2, diff - i, 2)) + return false; + break; + + case 2: + case 4: + for (i = 0; i < diff; i++) + if (!d->perm.series_p (i, diff, diff + i, -diff)) + return false; + break; + + default: + return false; + } + + /* Success! */ + if (d->testing_p) + return true; + + rtx src = gen_rtx_UNSPEC (vmode, gen_rtvec (1, d->op0), unspec); + emit_set_insn (d->target, src); + return true; +} + +/* Recognize FV

UNPACKL/FV

UNPACKM instructions. + + VFHUNPKL (v2hf): Ch0 Bh0 + VFHUNPKL (v4hf): Ch2 Ch0 Bh2 Bh0 + VFHUNPKL (v8hf): Ch6 Ch4 Ch2 Ch0 Bh6 Bh4 Bh2 Bh0 + + VFSUNPKL (v4hf): Ch1Ch0 Bh1Bh0 + VFSUNPKL (v8hf): Ch5Ch4 Ch1Ch0 Bh5Bh4 Bh1Bh0 + + VFDUNPKL (v8hf): Ch3Ch2Ch1Ch0 Bh3Bh2Bh1Bh0 + + VFSUNPKL (v2sf): Cs0 Bs0 + VFSUNPKL (v4sf): Cs2 Cs0 Bs2 Bs0 + + VFDUNPKL (v4sf): Cs1Cs0 Bs1Bs0 + + VFDUNPKL (v2df): Cd0 Bd0 + + VFHUNPKM (v2hf): Ch1 Bh1 + VFHUNPKM (v4hf): Ch3 Ch1 Bh3 Bh1 + VFHUNPKM (v8hf): Ch7 Ch5 Ch3 Ch1 Bh7 Bh5 Bh3 Bh1 + + VFSUNPKM (v4hf): Ch3Ch2 Bh3Bh2 + VFSUNPKM (v8hf): Ch7Ch6 Ch3Ch2 Bh7Bh6 Bh3Bh2 + + VFDUNPKM (v8hf): Ch7Ch6Ch5Ch4 Bh7Bh6Bh5Bh4 + + VFSUNPKM (v2sf): Cs1 Bs1 + VFSUNPKM (v4sf): Cs3 Cs1 Bs3 Bs1 + + VFDUNPKM (v4sf): Cs3Cs2 Bs3Bs2 + + VFDUNPKM (v2df): Cd1 Bd1 + */ + +static bool +arc64_simd_unpk (struct e_vec_perm_d *d) +{ + HOST_WIDE_INT odd, lo; + poly_uint64 nelt = d->perm.length (); + unsigned int i, j, size, unspec, diff = 0; + machine_mode vmode = d->vmode; + + if (!ARC64_HAS_FP_BASE + || !FLOAT_MODE_P (vmode) + || !d->perm[0].is_constant (&odd) + || (odd == 3) + || (odd < 0 && odd > (HOST_WIDE_INT)(nelt >> 1))) + return false; + + /* If ODD is set, then diff == odd. Thus, the below condition should + hold. */ + lo = (odd == 0) ? 1 : odd; + for (i = 4; (i >= lo) && (diff == 0); i >>= 1) + { + bool found = true; + for (j = 0; (j < i) && found; j++) + if (!d->perm.series_p (j, i, odd + j, i * 2 ) + || !d->perm.series_p ((nelt >> 1) + j, i, nelt + odd + j, i * 2)) + found = false; + if (found) + diff = i; + } + + size = diff * GET_MODE_UNIT_BITSIZE (vmode); + if (size == 64) + { + if (!ARC64_HAS_FPUD) + return false; + unspec = odd ? ARC64_UNSPEC_DUNPKM : ARC64_UNSPEC_DUNPKL; + } + else if (size == 32) + { + unspec = odd ? ARC64_UNSPEC_SUNPKM : ARC64_UNSPEC_SUNPKL; + } + else if (size == 16) + { + unspec = odd ? ARC64_UNSPEC_HUNPKM : ARC64_UNSPEC_HUNPKL; + } + else + return false; + + /* Success! */ + if (d->testing_p) + return true; + + rtx src = gen_rtx_UNSPEC (vmode, gen_rtvec (2, d->op0, d->op1), unspec); + emit_set_insn (d->target, src); + return true; +} + +/* Recognize VF

PACKL and VF

PACKM instructions. + + VFHPACKL (v2hf): Ch0 Bh0 + VFHPACKL (v4hf): Ch1 Bh1 Ch0 Bh0 + VFHPACKL (v8hf): Ch3 Bh3 Ch2 Bh2 Ch1 Bh1 Ch0 Bh0 + + VFSPACKL (v4hf): Ch1Ch0 Bh1Bh0 + VFSPACKL (v8hf): Ch3Ch2 Bh3Bh2 Ch1Ch0 Bh1Bh0 + + VFDPACKL (v8hf): Ch3Ch2Ch1Ch0 Bh3Bh2Bh1Bh0 + + VFSPACKL (v2sf): Cs0 Bs0 + VFSPACKL (v4sf): Cs1 Bs1 Cs0 Bs0 + + VFDPACKL (v4sf): Cs1Cs0 Bs1Bs0 + + VFDPACKL (v2df): Cd0 Bd0 + + + VFHPACKM (v2hf): Ch1 Bh1 + VFHPACKM (v4hf): Ch3 Bh3 Ch2 Bh2 + VFHPACKM (v8hf): Ch7 Bh7 Ch6 Bh6 Ch5 Bh5 Ch4 Bh4 + + VFSPACKM (v4hf): Ch3Ch2 Bh3Bh2 + VFSPACKM (v8hf): Ch7Ch6 Bh7Bh6 Ch5Ch4 Bh5Bh4 + + VFDPACKM (v8hf): Ch7Ch6Ch5Ch4 Bh7Bh6Bh5Bh4 + + VFSPACKM (v2sf): Cs1 Bs1 + VFSPACKM (v4sf): Cs3 Bs3 Cs2 Bs2 + + VFDPACKM (v4sf): Cs3Cs2 Bs3Bs2 + + VFDPACKM (v2df): Cd1 Bd1 + */ + +static bool +arc64_simd_pack (struct e_vec_perm_d *d) +{ + HOST_WIDE_INT odd; + poly_uint64 nelt = d->perm.length (); + unsigned int i, j, size, unspec, diff = 0; + machine_mode vmode = d->vmode; + + if (!ARC64_HAS_FP_BASE + || !FLOAT_MODE_P (vmode) + || !d->perm[0].is_constant (&odd) + || (odd != 0 && odd != (HOST_WIDE_INT)(nelt >> 1))) + return false; + + for (i = 4; (i > 0) && (diff == 0); i >>= 1) + { + bool found = true; + for (j = 0; (j < i) && found; j++) + if (!d->perm.series_p (j, 2 * i, odd + j, i) + || !d->perm.series_p (i + j, 2 * i, nelt + odd + j, i)) + found = false; + if (found) + diff = i; + } + + size = diff * GET_MODE_UNIT_BITSIZE (vmode); + if (size == 64) + { + if (!ARC64_HAS_FPUD) + return false; + unspec = odd ? ARC64_UNSPEC_DPACKM : ARC64_UNSPEC_DPACKL; + } + else if (size == 32) + { + unspec = odd ? ARC64_UNSPEC_SPACKM : ARC64_UNSPEC_SPACKL; + } + else if (size == 16) + { + unspec = odd ? ARC64_UNSPEC_HPACKM : ARC64_UNSPEC_HPACKL; + } + else + return false; + + /* Success! */ + if (d->testing_p) + return true; + + rtx src = gen_rtx_UNSPEC (vmode, gen_rtvec (2, d->op0, d->op1), unspec); + emit_set_insn (d->target, src); + return true; +} + +/* Recognize VF

BFLYL and VF

BFLYM instructions. + + VFHBFLYL (v2hf): Ch0 Bh0 + VFHBFLYL (v4hf): Ch2 Bh2 Ch0 Bh0 + VFHBFLYL (v8hf): Ch6 Bh6 Ch4 Bh4 Ch2 Bh2 Ch0 Bh0 + + VFSBFLYL (v4hf): Ch1Ch0 Bh1Bh0 + VFSBFLYL (v8hf): Ch5Ch4 Bh5Bh4 Ch1Ch0 Bh1Bh0 + + VFDBFLYL (v8hf): Ch3Ch2Ch1Ch0 Bh3Bh2Bh1Bh0 + + VFSBFLYL (v2sf): Cs0 Bs0 + VFSBFLYL (v4sf): Cs2 Bs2 Cs0 Bs0 + + VFDBFLYL (v4sf): Cs1Cs0 Bs1Bs0 + + VFDBFLYL (v2df): Cd0 Bd0 + + + VFHBFLYM (v2hf): Ch1 Bh1 + VFHBFLYM (v4hf): Ch3 Bh3 Ch1 Bh1 + VFHBFLYM (v8hf): Ch7 Bh7 Ch5 Bh5 Ch3 Bh3 Ch1 Bh1 + + VFSBFLYM (v4hf): Ch3Ch2 Bh3Bh2 + VFSBFLYM (v8hf): Ch7Ch6 Bh7Bh6 Ch3Ch2 Bh3Bh2 + + VFDBFLYM (v8hf): Ch7Ch6Ch5Ch4 Bh7Bh6Bh5Bh4 + + VFSBFLYM (v2sf): Cs1 Bs1 + VFSBFLYM (v4sf): Cs3 Bs3 Cs1 Bs1 + + VFDBFLYM (v4sf): Cs3Cs2 Bs3Bs2 + + VFDBFLYM (v2df): Cd1 Bd1 + */ + +static bool +arc64_simd_bfly (struct e_vec_perm_d *d) +{ + HOST_WIDE_INT odd; + poly_uint64 nelt = d->perm.length (); + unsigned int i, j, size, unspec, diff = 0; + machine_mode vmode = d->vmode; + + if (!ARC64_HAS_FP_BASE + || !FLOAT_MODE_P (vmode) + || !d->perm[0].is_constant (&odd) + || (odd == 3) + || (odd < 0 && odd > (HOST_WIDE_INT)(nelt >> 1))) + return false; + + for (i = 4; (i > 0) && (diff == 0); i >>= 1) + { + bool found = true; + for (j = 0; (j < i) && found; j++) + if (!d->perm.series_p (j, 2 * i, odd + j, 2 * i) + || !d->perm.series_p (i + j, 2 * i, nelt + odd + j, 2 * i)) + found = false; + if (found) + diff = i; + } + + size = diff * GET_MODE_UNIT_BITSIZE (vmode); + if (size == 64) + { + if (!ARC64_HAS_FPUD) + return false; + unspec = odd ? ARC64_UNSPEC_DBFLYM : ARC64_UNSPEC_DBFLYL; + } + else if (size == 32) + { + unspec = odd ? ARC64_UNSPEC_SBFLYM : ARC64_UNSPEC_SBFLYL; + } + else if (size == 16) + { + unspec = odd ? ARC64_UNSPEC_HBFLYM : ARC64_UNSPEC_HBFLYL; + } + else + return false; + + /* Success! */ + if (d->testing_p) + return true; + + rtx src = gen_rtx_UNSPEC (vmode, gen_rtvec (2, d->op0, d->op1), unspec); + emit_set_insn (d->target, src); + return true; +} + +/* Implement combination of vpack4hl/vpack4hm instructions. */ + +static bool +arc64_simd_lane_pack (struct e_vec_perm_d *d) +{ + machine_mode vmode = d->vmode; + HOST_WIDE_INT elem; + poly_uint64 nelt = d->perm.length (); + rtx t0, t1; + rtx in0 = d->op0; + rtx in1 = d->op1; + rtx out = d->target; + + if (vmode != E_V4HImode + || !TARGET_64BIT + || !d->perm[0].is_constant (&elem) + || (elem != 0 && elem != 2) + || !d->perm.series_p (0, 2, elem, 1) + || !d->perm.series_p (1, 2, elem + nelt, 1)) + return false; + + /* Success! */ + if (d->testing_p) + return true; + + t0 = gen_reg_rtx (vmode); + t1 = gen_reg_rtx (vmode); + emit_insn (gen_arc64_sel_lane2_0v4hi (t0, in0, in1)); + emit_insn (gen_arc64_sel_lane3_1v4hi (t1, in0, in1)); + if (elem == 0) + emit_insn (gen_arc64_sel_lane2_0v4hi (out, t0, t1)); + else + emit_insn (gen_arc64_sel_lane3_1v4hi (out, t0, t1)); + return true; +} + +/* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */ + +static bool +arc64_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, + rtx op1, const vec_perm_indices &sel) +{ + struct e_vec_perm_d d; + + /* Check whether the mask can be applied to a single vector. */ + if (sel.ninputs () == 1 + || (op0 && rtx_equal_p (op0, op1))) + d.one_vector_p = true; + else if (sel.all_from_input_p (0)) + { + d.one_vector_p = true; + op1 = op0; + } + else if (sel.all_from_input_p (1)) + { + d.one_vector_p = true; + op0 = op1; + } + else + d.one_vector_p = false; + + d.perm.new_vector (sel.encoding (), d.one_vector_p ? 1 : 2, + sel.nelts_per_input ()); + d.vmode = vmode; + d.target = target; + d.op0 = op0 ? force_reg (vmode, op0) : NULL_RTX; + if (op0 == op1) + d.op1 = op1; + else + d.op1 = op1 ? force_reg (vmode, op1) : NULL_RTX; + d.testing_p = !target; + + /* The pattern matching functions above are written to look for a small + number to begin the sequence (0, 1, N/2). If we begin with an index + from the second operand, we can swap the operands. */ + poly_int64 nelt = d.perm.length (); + if (known_ge (d.perm[0], nelt)) + { + d.perm.rotate_inputs (1); + std::swap (d.op0, d.op1); + } + if (known_gt (nelt, 1)) + { + if (arc64_simd_dup (&d)) + return true; + else if (arc64_simd_vpack (&d)) + return true; + else if (arc64_simd_swapl (&d)) + return true; + else if (arc64_simd_swap (&d)) + return true; + else if (arc64_simd_vpack2wl (&d)) + return true; + else if (arc64_simd_vpack2wm (&d)) + return true; + else if (arc64_simd_exch (&d)) + return true; + else if (arc64_simd_unpk (&d)) + return true; + else if (arc64_simd_pack (&d)) + return true; + else if (arc64_simd_bfly (&d)) + return true; + else if (arc64_simd_lane_pack (&d)) + return true; + } + return false; +} + +/* Provide the costs of an addressing mode that contains ADDR. + LOAD_P is true when address is used to load a value. */ + +static int +arc64_address_cost (rtx addr, machine_mode mode, + addr_space_t as ATTRIBUTE_UNUSED, + bool speed) +{ + const int cost_limm = speed ? 0 : COSTS_N_INSNS (1); + + if (CONSTANT_P (addr)) + return cost_limm; + + /* The cheapest construct are the addresses which fit a store + instruction (or a fp load/store instruction). */ + if (arc64_legitimate_address_1_p (mode, addr, true, false, true)) + switch (GET_CODE (addr)) + { + case PRE_DEC: + case PRE_INC: + case POST_DEC: + case POST_INC: + case PRE_MODIFY: + case POST_MODIFY: + return 0; + + default: + return 1; + } + + /* Anything else has a limm. */ + return cost_limm + 2; +} + +/* Compute the rtx cost. */ + +static bool +arc64_rtx_costs (rtx x, machine_mode mode, rtx_code outer, + int opno ATTRIBUTE_UNUSED, int *cost, bool speed) +{ + rtx op0, op1; + const int cost_limm = speed ? 0 : COSTS_N_INSNS (1); + int factor; + + /* If we use a mode larger than UNITS_PER_WORD factor it in. N.B. The cost is + already factored in, however, the costs for MULT and DIV is too large. */ + factor = CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD); + + switch (GET_CODE (x)) + { + case SET: + op0 = SET_DEST (x); + op1 = SET_SRC (x); + + switch (GET_CODE (op0)) + { + case MEM: + /* Store instruction. */ + + if ((factor == 2) && DOUBLE_LOAD_STORE) + *cost = COSTS_N_INSNS (1); + *cost += arc64_address_cost (XEXP (op0, 0), mode, 0, speed); + if (CONST_INT_P (op1)) + { + *cost += speed ? 0 : + satisfies_constraint_S06S0 (op1) ? 0 : cost_limm; + return true; + } + + *cost += rtx_cost (op1, mode, SET, 1, speed); + return true; + + case SUBREG: + if (!REG_P (SUBREG_REG (op0))) + *cost += rtx_cost (SUBREG_REG (op0), VOIDmode, SET, 0, speed); + + /* Fall through. */ + case REG: + /* Cost is just the cost of the RHS of the set. */ + *cost += rtx_cost (op1, mode, SET, 1, speed); + return true; + + default: + break; + } + return false; + + case MEM: + /* Generic/loads. */ + + if ((factor == 2) && DOUBLE_LOAD_STORE) + *cost = COSTS_N_INSNS (1); + *cost += arc64_address_cost (XEXP (x, 0), mode, 0, speed); + return true; + + case MINUS: + case PLUS: + op0 = XEXP (x, 0); + op1 = XEXP (x, 1); + + if ((mode != SImode) && (mode != DImode)) + *cost += 1; + + /* Check if we have add{1,2,3} instruction. */ + if ((GET_CODE (op0) == ASHIFT + && _1_2_3_operand (XEXP (op0, 1), VOIDmode)) + || (GET_CODE (op0) == MULT + && _2_4_8_operand (XEXP (op0, 1), VOIDmode))) + { + /* Check if 2nd instruction operand is constant int. This + always goes as limm. */ + if (CONST_INT_P (op1)) + *cost += cost_limm ; + } + return true; + break; + + case COMPARE: + op0 = XEXP (x, 0); + op1 = XEXP (x, 1); + + /* Vitually, any instruction can do compare with zero. */ + if (op1 == const0_rtx) + *cost = 0; + return true; + + case ZERO_EXTEND: + op0 = XEXP (x, 0); + + /* Zero extending from an SI operation is cheap. */ + if (MEM_P (op0)) + { + /* All loads can zero extend to any size for free. */ + *cost = rtx_cost (op0, VOIDmode, ZERO_EXTEND, 0, speed); + return true; + } + if (mode == DImode + && GET_MODE (op0) == SImode + && outer == SET) + { + int op_cost = rtx_cost (op0, VOIDmode, ZERO_EXTEND, 0, speed); + if (op_cost) + *cost = op_cost; + return true; + } + break; + + case SIGN_EXTEND: + op0 = XEXP (x, 0); + if (MEM_P (op0)) + { + /* All loads can sign extend to any size for free. */ + *cost = rtx_cost (op0, VOIDmode, SIGN_EXTEND, 0, speed); + return true; + } + *cost += COSTS_N_INSNS (2); + break; + + case CONST_INT: + { + HOST_WIDE_INT imm = INTVAL (x); + + /* In general any 32bit constant can be loaded immediately, + however, when we compile for speed, we try to avoid + them. */ + *cost = 0; + if (UNSIGNED_INT6 (imm)) + return true; + else + switch (outer) + { + case SET: + if (SIGNED_INT12 (imm)) + return true; + break; + + default: + break; + } + } + /* FALLTHRU */ + case CONST: + case LABEL_REF: + case SYMBOL_REF: + *cost = cost_limm; + return true; + + case LSHIFTRT: + op0 = XEXP (x, 0); + if (REG_P (op0)) + return true; + break; + + case ASHIFT: + case ASHIFTRT: + return true; + + case MULT: + op0 = XEXP (x, 0); + /* Multiplication has a large latency, use adds and shifts. */ + *cost = COSTS_N_INSNS (2); + /* 64x64 multiplication is expensive. */ + if (GET_MODE_SIZE (mode) != UNITS_PER_WORD + && (GET_CODE (op0) != ZERO_EXTEND + || GET_CODE (op0) != SIGN_EXTEND)) + *cost = COSTS_N_INSNS (3); + else if (GET_MODE_SIZE (mode) == UNITS_PER_WORD * 2) + *cost = factor * COSTS_N_INSNS (4); + + return true; + + case MOD: + case UMOD: + case DIV: + case UDIV: + /* Fav synthetic divs. */ + *cost = factor * COSTS_N_INSNS (12); + return true; + + case EQ: + case NE: + if (outer == IF_THEN_ELSE + && (GET_CODE (XEXP (x, 0)) == AND + || GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT) + && XEXP (x, 1) == const0_rtx) + { + *cost = 0; + return true; + } + break; + + case AND: + case XOR: + case IOR: + op0 = XEXP (x, 0); + op1 = XEXP (x, 1); + + if ((REG_P (op0) || REG_P (op1)) + && (CONST_INT_P (op0) || CONST_INT_P (op1))) + return true; + + /* Detect VPACK2HL instructions. */ + if (TARGET_SIMD + && GET_CODE (op0) == AND + && GET_CODE (op1) == ASHIFT + && mode == E_SImode) + return true; + + break; + + default: + break; + } + return false; +} + +/* Wrapper around arc64_rtx_costs, dumps the partial, or total cost + calculated for X. This cost is stored in *COST. Returns true + if the total cost of X was calculated. */ +static bool +arc64_rtx_costs_wrapper (rtx x, machine_mode mode, int outer, + int param, int *cost, bool speed) +{ + bool result = arc64_rtx_costs (x, mode, (rtx_code) outer, param, cost, speed); + + if (dump_file) + { + print_rtl_single (dump_file, x); + fprintf (dump_file, "\nARC: %s cost: %d (%s)\n", + speed ? "Speed" : "Size", + *cost, result ? "final" : "partial"); + } + + return result; +} + +/* Implement TARGET_SCHED_MACRO_FUSION_P. Return true if target supports + instruction fusion of some sort. */ + +static bool +arc64_macro_fusion_p (void) +{ + /* When we use accumulators, make sure we schedule the producer/consumer of + accumulator close to each others. */ + return TARGET_SIMD; +} + +/* Implement TARGET_SCHED_MACRO_FUSION_PAIR_P. Return true if PREV and CURR + should be kept together during scheduling. */ + +static bool +arc64_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr) +{ + rtx prev_set = single_set (prev); + rtx curr_set = single_set (curr); + /* prev and curr are simple SET insns i.e. no flag setting or branching. */ + bool simple_sets_p = prev_set && curr_set && !any_condjump_p (curr); + + if (!arc64_macro_fusion_p ()) + return false; + + /* Don't handle anything with a jump. FIXME! maybe it is interesting to keep + the cmp and jcc together for latter folding into BRcc insn. */ + if (!simple_sets_p) + return false; + + /* 1st We are trying to match any MPY instruction which can have implicit + accumulator write and any mac instruction. */ + if (get_attr_type (prev) == TYPE_MPY + && get_attr_type (curr) == TYPE_MAC) + return true; + + /* 2nd We try to match any back to back mac instruction. */ + if (get_attr_type (prev) == TYPE_MAC + && (get_attr_type (curr) == TYPE_MAC)) + return true; + if (get_attr_type (prev) == TYPE_VMAC2H + && (get_attr_type (curr) == TYPE_VMAC2H)) + return true; + + /* 3rd Keep close to each other the MAC and the following MOV(L) rx,r58. This + pattern will be match in machine reorg and simplified to a simple MAC + instruction. */ + if (get_attr_type (curr) == TYPE_MOVE + && REG_P (SET_SRC (curr_set)) + && REGNO (SET_SRC (curr_set)) == R58_REGNUM + && get_attr_type (prev) == TYPE_MAC) + return true; + +#if 0 + /* Try to keep r58 setting close to any previous related instruction. We may + be able to merge those two into one instruction. */ + rtx set_dest; + set_dest = SET_DEST (curr_set); + if (get_attr_type (curr) == TYPE_MOVE + && REG_P (set_dest) + && REGNO (set_dest) == R58_REGNUM + && REG_P (SET_DEST (prev_set)) + && REG_P (SET_SRC (curr_set)) + && REGNO (SET_DEST (prev_set)) == REGNO (SET_SRC (curr_set))) + return true; + + /* Try to keep any mac and any previous instruction close, dependency on add + operand. */ + if (get_attr_type (curr) == TYPE_MAC + && REG_P (SET_DEST (prev_set)) + && GET_CODE (SET_SRC (curr_set)) == PLUS + && REG_P (XEXP (SET_SRC (curr_set), 1)) + && REGNO (SET_DEST (prev_set)) != R58_REGNUM + && REGNO (SET_DEST (prev_set)) == REGNO (XEXP (SET_SRC (curr_set), 1))) + return true; +#endif + return false; +} + +static void +arc64_override_options (void) +{ + if (arcv3_cpu_string) + { + const char *p = arcv3_cpu_string; + if (strncmp (p, "hs5", 3) == 0) + TARGET_64BIT = false; + else if (strncmp (p, "hs6", 3) == 0) + TARGET_64BIT = true; + else + error ("%<-mcpu=%s%>s is not a valid CPU option.", arcv3_cpu_string); + p += 3; + if ( *p == '8') + { + if (TARGET_64BIT) + { + target_flags |= MASK_WIDE_LDST; + } + else + { + target_flags |= MASK_LL64; + } + target_flags |= MASK_SIMD; + } + } + + if (TARGET_LL64 && TARGET_64BIT) + { + target_flags &= ~MASK_LL64; + warning (0, "Option -mll64 is ignored because the target" + " is not 32-bit."); + } +} + +/* Return the fixed registers used for condition codes. */ + +static bool +arc64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2) +{ + *p1 = CC_REGNUM; + *p2 = INVALID_REGNUM; + return true; +} + +/* Return true if FUNC is a naked function. */ +static bool +arc64_naked_function_p (tree func) +{ + tree func_decl = func; + if (func == NULL_TREE) + func_decl = current_function_decl; + return NULL_TREE != lookup_attribute ("naked", DECL_ATTRIBUTES (func_decl)); +} + +/* Implement 'TARGET_SET_CURRENT_FUNCTION'. */ + +static void +arc64_set_current_function (tree decl) +{ + unsigned int fn_type = ARC64_FUNCTION_UNKNOWN; + tree func_decl = decl; + + if (decl == NULL_TREE + || current_function_decl == NULL_TREE + || current_function_decl == error_mark_node + || ! cfun->machine + || cfun->machine->fn_type != ARC64_FUNCTION_UNKNOWN) + return; + + /* Check if it is a naked function. */ + if (arc64_naked_function_p (decl)) + fn_type |= ARC64_FUNCTION_NAKED; + + if (func_decl == NULL_TREE) + func_decl = current_function_decl; + + /* Now see if this is an interrupt handler. */ + if (lookup_attribute ("interrupt", + TYPE_ATTRIBUTES (TREE_TYPE (func_decl))) != NULL_TREE) + fn_type |= ARC64_FUNCTION_ILINK; + + if (!ARC_NAKED_P (fn_type) && !ARC_INTERRUPT_P (fn_type)) + fn_type |= ARC64_FUNCTION_NORMAL; + + cfun->machine->fn_type = fn_type; + + if (ARC_NAKED_P (fn_type) && ARC_INTERRUPT_P (fn_type)) + error ("function attributes %qs and %qs are mutually exclusive", + "interrupt", "naked"); +} + +/* Implement TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS. */ +static bool +arc64_allocate_stack_slots_for_args () +{ + /* Naked functions should not allocate stack slots for arguments. */ + return !arc64_naked_function_p (current_function_decl); +} + +/* Implement TARGET_WARN_FUNC_RETURN. */ +static bool +arc64_warn_func_return (tree decl) +{ + /* Naked functions are implemented entirely in assembly, including the + return sequence, so suppress warnings about this. */ + return !arc64_naked_function_p (decl); +} + +/* Return false for selected jumps crossing between hot and cold partitions. */ + +static bool +arc64_can_follow_jump (const rtx_insn *br1, const rtx_insn *br2) +{ + /* Avoid compiler warnings. */ + union {const rtx_insn *c; rtx_insn *r;} u; + + u.c = br1; + if (flag_reorder_blocks_and_partition + && CROSSING_JUMP_P (br2)) + switch (get_attr_type (u.r)) + { + case TYPE_BRANCHCC: + case TYPE_BRCC: + return false; + case TYPE_BRANCH: + if (get_attr_length (u.r) == 2) + return false; + break; + default: + break; + } + + return true; +} + +/* Implements target hook TARGET_SCHED_ISSUE_RATE. */ + +static int +arc64_sched_issue_rate (void) +{ + return 2; +} + +/* + Global functions. +*/ + +/* Returns TRUE if CALLEE should be treated as long-calls (i.e. called + via a register). */ + +bool +arc64_is_long_call_p (rtx sym) +{ + arc64_symb symb_t = arc64_get_symbol_type (sym); + + /* No subtleties for the time being, if user asks for large memory model, + everything goes via regs. */ + if (!TARGET_64BIT + && (arc64_cmodel_var == ARC64_CMODEL_LARGE)) + return true; + + switch (symb_t) + { + case ARC64_UNK: + case ARC64_LO32: + return false; + + case ARC64_PCREL: + case ARC64_PIC: + return false; + + case ARC64_LPIC: + /* fPIC + Large memory model forces everything in registers. */ + return (arc64_cmodel_var == ARC64_CMODEL_LARGE) ? true : false; + + case ARC64_LARGE: + return true; + + case ARC64_TLS: + default: + gcc_unreachable (); + } +} + +/* X and Y are two things to compare using CODE. Emit the compare insn and + return the rtx for the cc reg in the proper mode. */ + +rtx +arc64_gen_compare_reg (enum rtx_code code, rtx x, rtx y) +{ + machine_mode mode = SELECT_CC_MODE (code, x, y); + rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM); + + if (CONSTANT_P (x) && CONSTANT_P (y)) + x = force_reg (word_mode, x); + + emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y)); + return cc_reg; +} + +/* Prepare operands for move in MODE. Return true iff the move has + been emitted. */ + +bool +arc64_prepare_move_operands (rtx op0, rtx op1, machine_mode mode) +{ + if (MEM_P (op0) && !REG_P (op1)) + { + if (mode == E_DImode + || !satisfies_constraint_S06S0 (op1)) + op1 = force_reg (mode, op1); + } + else if (GET_MODE_SIZE (mode) == UNITS_PER_WORD + && CONSTANT_P (op1)) + { + unsigned HOST_WIDE_INT lo; + unsigned HOST_WIDE_INT hi; + rtx tmp; + + switch (GET_CODE (op1)) + { + case CONST_INT: + gcc_assert (mode == Pmode); + if (!SIGNED_INT32 (INTVAL (op1)) && !UNSIGNED_INT32 (INTVAL (op1))) + { + HOST_WIDE_INT val; + /* We have a large 64bit immediate: + movhl rA, (val64 >> 32) + orl rA,rA, (val64 & 0xffffffff) + FIXME! add strategies to minimize the size. */ + + val = INTVAL (op1); + lo = zext_hwi (val, 32); + hi = zext_hwi (val >> 32, 32); + tmp = op0; + + if (can_create_pseudo_p ()) + tmp = gen_reg_rtx (mode); + + /* Maybe do first a move cnst to movsi to get the + constants minimized. */ + emit_insn (gen_rtx_SET (tmp, + gen_rtx_ASHIFT (mode, GEN_INT (hi), + GEN_INT (32)))); + emit_insn (gen_rtx_SET (op0, + plus_constant (mode, tmp, lo))); + return true; + } + break; + + case CONST_WIDE_INT: + gcc_unreachable (); + + case CONST_DOUBLE: + if (mode == SFmode) + return false; + else + { + long res[2]; + unsigned HOST_WIDE_INT ival; + scalar_int_mode imode = int_mode_for_mode (mode).require (); + + gcc_assert (mode == DFmode); + + real_to_target (res, CONST_DOUBLE_REAL_VALUE (op1), + REAL_MODE_FORMAT (mode)); + lo = zext_hwi (res[0], 32); + hi = zext_hwi (res[1], 32); + + ival = lo | (hi << 32); + tmp = gen_reg_rtx (imode); + emit_move_insn (tmp, gen_int_mode (ival, imode)); + emit_move_insn (op0, gen_lowpart (mode, tmp)); + return true; + } + + case CONST: + case SYMBOL_REF: + case LABEL_REF: + op1 = arc64_legitimize_address_1 (op1, op0); + break; + + default: + break; + } + } + + /* Check and fix unsupported store addresses. */ + if (MEM_P (op0) + && !arc64_legitimate_address_1_p (mode, XEXP (op0, 0), false, + false, true)) + { + rtx tmp = gen_reg_rtx (Pmode); + rtx addr = XEXP (op0, 0); + rtx t0 = XEXP (addr, 0); + rtx t1 = XEXP (addr, 1); + + if (GET_CODE (t0) == MULT) + { + rtx ta = XEXP (t0, 0); + rtx tb = XEXP (t0, 1); + t0 = gen_rtx_ASHIFT (Pmode, ta, + GEN_INT (ARC64LOG2 (INTVAL (tb)))); + } + + emit_insn (gen_rtx_SET (tmp, gen_rtx_PLUS (Pmode, t0, t1))); + op0 = replace_equiv_address (op0, tmp); + } + emit_insn (gen_rtx_SET (op0, op1)); + return true; +} + +/* Split a mov with long immediate instruction into smaller, size + friendly instructions. */ +#if 0 +bool +arc64_split_mov_const (rtx *operands) +{ + unsigned HOST_WIDE_INT ival; + HOST_WIDE_INT shimm; + machine_mode mode = GET_MODE (operands[0]); + + /* Manage a constant. */ + gcc_assert (CONST_INT_P (operands[1])); + ival = INTVAL (operands[1]) & 0xffffffff; + + if (SIGNED_INT12 (ival)) + return false; + + /* 1. Check if we can just rotate limm by 8 but using ROR8. */ + if (TARGET_BARREL_SHIFTER && ((ival & ~0x3f000000) == 0)) + { + shimm = (ival >> 24) & 0x3f; + emit_insn (gen_rtx_SET (operands[0], + gen_rtx_ROTATERT (mode, GEN_INT (shimm), + GEN_INT (8)))); + return true; + } + /* 2. Check if we can just shift by 8 to fit into the u6 of LSL8. */ + if (TARGET_BARREL_SHIFTER && ((ival & ~0x3f00) == 0)) + { + shimm = (ival >> 8) & 0x3f; + emit_insn (gen_rtx_SET (operands[0], + gen_rtx_ASHIFT (mode, GEN_INT (shimm), + GEN_INT (8)))); + return true; + } + + /* 3. Check if we can just shift by 16 to fit into the u6 of LSL16. */ + if (TARGET_BARREL_SHIFTER && ((ival & ~0x3f0000) == 0)) + { + shimm = (ival >> 16) & 0x3f; + emit_insn (gen_rtx_SET (operands[0], + gen_rtx_ASHIFT (mode, GEN_INT (shimm), + GEN_INT (16)))); + return true; + } + + /* 4. Check if we can do something like mov_s h,u8 / asl_s ra,h,#nb. */ + if (((ival >> (__builtin_ffs (ival) - 1)) & 0xffffff00) == 0 + && TARGET_BARREL_SHIFTER) + { + HOST_WIDE_INT shift = __builtin_ffs (ival); + shimm = (ival >> (shift - 1)) & 0xff; + emit_insn (gen_rtx_SET (operands[0], GEN_INT (shimm))); + emit_insn (gen_rtx_SET (operands[0], + gen_rtx_ASHIFT (mode, operands[0], + GEN_INT (shift - 1)))); + return true; + } + + /* 5. Check if we can just rotate the limm, useful when no barrel + shifter is present. */ + if ((ival & ~0x8000001f) == 0) + { + shimm = (ival * 2 + 1) & 0x3f; + emit_insn (gen_rtx_SET (operands[0], + gen_rtx_ROTATERT (mode, GEN_INT (shimm), + const1_rtx))); + return true; + } + + /* 6. Check if we can do something with bmask. */ + if (IS_POWEROF2_P (ival + 1)) + { + emit_insn (gen_rtx_SET (operands[0], constm1_rtx)); + emit_insn (gen_rtx_SET (operands[0], + gen_rtx_AND (mode, operands[0], + GEN_INT (ival)))); + return true; + } + + return false; +} + +/* Helper to check Cax constraint. */ + +bool +arc64_check_mov_const (HOST_WIDE_INT ival) +{ + ival = ival & 0xffffffff; + + if ((ival & ~0x8000001f) == 0) + return true; + + if (IS_POWEROF2_P (ival + 1)) + return true; + + /* The next rules requires a barrel shifter. */ + if (!TARGET_BARREL_SHIFTER) + return false; + + if (((ival >> (__builtin_ffs (ival) - 1)) & 0xffffff00) == 0) + return true; + + if ((ival & ~0x3f00) == 0) + return true; + + if ((ival & ~0x3f0000) == 0) + return true; + + if ((ival & ~0x3f000000) == 0) + return true; + + return false; +} +#endif + +/* This function is used by the call expanders of the machine description. + RESULT is the register in which the result is returned. It's NULL for + "call" and "sibcall". + MEM is the location of the function call. + SIBCALL indicates whether this function call is normal call or sibling call. + It will generate different pattern accordingly. */ + +void +arc64_expand_call (rtx result, rtx mem, bool sibcall) +{ + rtx call, callee, tmp; + rtvec vec; + machine_mode mode; + + gcc_assert (MEM_P (mem)); + callee = XEXP (mem, 0); + mode = GET_MODE (callee); + gcc_assert (mode == Pmode || CONST_INT_P (callee)); + + /* Decide if we should generate indirect calls by loading the + address of the callee into a register before performing the + branch-and-link. */ + if (arc64_is_long_call_p (callee) && !REG_P (callee)) + XEXP (mem, 0) = force_reg (mode, callee); + + call = gen_rtx_CALL (VOIDmode, mem, const0_rtx); + + if (result != NULL_RTX) + call = gen_rtx_SET (result, call); + + if (sibcall) + tmp = ret_rtx; + else + tmp = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, BLINK_REGNUM)); + + vec = gen_rtvec (2, call, tmp); + call = gen_rtx_PARALLEL (VOIDmode, vec); + + emit_call_insn (call); +} + +/* Return nonzero if this function is known to have a null epilogue. + This allows the optimizer to omit jumps to jumps if no stack + was created. */ + +bool +arc64_can_use_return_insn_p (void) +{ + return (reload_completed && cfun->machine->frame.frame_size == 0 + && !ARC_INTERRUPT_P (cfun->machine->fn_type)); +} + + +/* Return 1 if the register is used by the epilogue. We need to say the + return register is used, but only after epilogue generation is complete. + Note that in the case of sibcalls, the values "used by the epilogue" are + considered live at the start of the called function. */ + +int +arc64_epilogue_uses (int regno) +{ +#ifdef HAVE_AS_TLS + if (regno == R30_REGNUM) + return 1; +#endif + + if (epilogue_completed) + { + if (regno == BLINK_REGNUM) + return 1; + + /* An interrupt restores more registers. */ + if (ARC_INTERRUPT_P (cfun->machine->fn_type) + && (df_regs_ever_live_p (regno) + || (!crtl->is_leaf && call_used_or_fixed_reg_p (regno)))) + return 1; + } + + return 0; +} + +/* Return 1 if we use TP because it is alivel on entry to an exception + edge. */ + +int +arc64_eh_uses (int regno ATTRIBUTE_UNUSED) +{ +#ifdef HAVE_AS_TLS + if (regno == R30_REGNUM) + return 1; +#endif + return 0; +} + + +/* Implement INITIAL_ELIMINATION_OFFSET. FROM is either the frame pointer + or argument pointer. TO is either the stack pointer or hard frame + pointer. */ + +HOST_WIDE_INT +arc64_initial_elimination_offset (unsigned from, unsigned to) +{ + struct arc64_frame *frame = &cfun->machine->frame; + + if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM) + return frame->saved_regs_size; + + if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM) + return (frame->saved_regs_size + frame->saved_locals_size + + frame->saved_outargs_size); + + if ((from == FRAME_POINTER_REGNUM) && (to == STACK_POINTER_REGNUM)) + return (frame->saved_locals_size + frame->saved_outargs_size); + + if ((from == FRAME_POINTER_REGNUM) && (to == HARD_FRAME_POINTER_REGNUM)) + return 0; + + gcc_unreachable (); +} + +/* Helper for INIT_EXPANDERS macro called to initialize any target + specific information. */ + +void arc64_init_expanders (void) +{ + init_machine_status = arc64_init_machine_status; +} + +/* Given a comparison code (EQ, NE, etc.) and the first operand of a + COMPARE, return the mode to be used for the comparison. */ + +machine_mode +arc64_select_cc_mode (enum rtx_code op, + rtx x, + rtx y) +{ + machine_mode mode = GET_MODE (x); + + /* Matches all instructions which can do .f and clobbers only Z flag. */ + if (GET_MODE_CLASS (mode) == MODE_INT + && y == const0_rtx + && GET_CODE (x) == MULT + && (op == EQ || op == NE)) + return CC_Zmode; + + /* Matches all instructions which can do .f and clobbers Z and N + flags. Because we compare with zero, for LT we can use "mi" and + for GT we can use "pl". We cannot use GT with "pnz" because it + cannot be reversed. */ + if (GET_MODE_CLASS (mode) == MODE_INT + && y == const0_rtx + && (op == EQ || op == NE || op == LT || op == GE)) + return CC_ZNmode; + + /* All floating point compares return CC_FPU if it is an equality + comparison, and CC_FPUE otherwise. N.B. LTGT and UNEQ cannot be + directly mapped to fcmp instructions. */ + if (GET_MODE_CLASS (mode) == MODE_FLOAT) + { + switch (op) + { + case EQ: + case NE: + case UNORDERED: + case ORDERED: + case UNLT: + case UNLE: + case UNGT: + case UNGE: + case UNEQ: + return CC_FPUmode; + + case LT: + case LE: + case GT: + case GE: + case LTGT: + return CC_FPUEmode; + + default: + gcc_unreachable (); + } + } + return CCmode; +} + +/* Implement RETURN_ADDR_RTX. We do not support moving back to a + previous frame. */ + +rtx +arc64_return_addr (int count , rtx frame ATTRIBUTE_UNUSED) +{ + if (count != 0) + return const0_rtx; + return get_hard_reg_initial_val (Pmode, BLINK_REGNUM); +} + +/* Expand the "prologue" pattern. */ + +void +arc64_expand_prologue (void) +{ + HOST_WIDE_INT frame_allocated; + struct arc64_frame *frame = &cfun->machine->frame; + + if (flag_stack_usage_info) + current_function_static_stack_size = frame->frame_size; + + if (ARC_NAKED_P (cfun->machine->fn_type)) + return; + + frame_allocated = frame->frame_size; + + frame_allocated -= arc64_save_callee_saves (); + + /* If something left, allocate. */ + if (frame_allocated > 0) + frame_stack_add ((HOST_WIDE_INT) 0 - frame_allocated); + + /* Emit a blockage. */ + emit_insn (gen_blockage ()); +} + +/* Expand "epilogue" pattern. */ + +void +arc64_expand_epilogue (bool sibcall_p) +{ + HOST_WIDE_INT frame_deallocated; + struct arc64_frame *frame = &cfun->machine->frame; + + if (ARC_NAKED_P (cfun->machine->fn_type)) + { + emit_jump_insn (gen_return ()); + return; + } + + frame_deallocated = frame->frame_size; + frame_deallocated -= arc64_restore_callee_saves (sibcall_p); + + if (frame_deallocated != 0) + frame_stack_add (frame_deallocated); + + /* For frames that use __builtin_eh_return, the register defined by + EH_RETURN_STACKADJ_RTX is set to 0 for all standard return paths. + On eh_return paths however, the register is set to the value that + should be added to the stack pointer in order to restore the + correct stack pointer for the exception handling frame. + + For ARC64 we are going to use r4 for EH_RETURN_STACKADJ_RTX, add + this onto the stack for eh_return frames. */ + if (crtl->calls_eh_return) + emit_insn (gen_add2_insn (stack_pointer_rtx, + EH_RETURN_STACKADJ_RTX)); + + if (ARC_INTERRUPT_P (cfun->machine->fn_type)) + emit_jump_insn (gen_rtie ()); + else if (!sibcall_p) + emit_jump_insn (gen_simple_return ()); +} + +/* Helper used to determine if an address requires a long immediate. + To be used in computing the length of an load/store + instruction. */ + +bool +arc64_limm_addr_p (rtx op) +{ + rtx addr = XEXP (op, 0); + + if (!MEM_P (op)) + return false; + + switch (GET_CODE (addr)) + { + case SYMBOL_REF: + case LABEL_REF: + case CONST_INT: + case CONST: + case UNSPEC: + case LO_SUM: + return true; + + case PRE_INC: + case PRE_DEC: + case POST_INC: + case POST_DEC: + case PRE_MODIFY: + case POST_MODIFY: + case PLUS: + /* legitimate address doesn't recognize [b,limm] variant of st. + Hence, use it to determine if we have limm or not in + address. */ + return !arc64_legitimate_address_1_p (GET_MODE (op), addr, + false, false, true); + default: + break; + } + return false; +} + +/* Used by move_dest_operand predicate. */ + +bool +arc64_legitimate_store_address_p (machine_mode mode, rtx addr) +{ + return arc64_legitimate_address_1_p (mode, addr, true, false, true); +} + +/* Return true if an address fits a short load/store instruction. */ + +bool +arc64_short_access_p (rtx op, machine_mode mode, bool load_p) +{ + rtx addr, plus0, plus1; + bool f0, f1; + + /* Eliminate non-memory operations. */ + if (GET_CODE (op) != MEM) + return 0; + + /* FIXME! remove it when "uncached" attribute is added. */ + if (MEM_VOLATILE_P (op) && TARGET_VOLATILE_DI) + return false; + + if (mode == VOIDmode) + mode = GET_MODE (op); + + /* Decode the address now. */ + addr = XEXP (op, 0); + switch (GET_CODE (addr)) + { + case REG: + return check_short_insn_register_p (addr, false); + + case PLUS: + plus0 = XEXP (addr, 0); + plus1 = XEXP (addr, 1); + + f0 = check_short_insn_register_p (plus0, false); + f1 = check_short_insn_constant_p (plus1, mode); + + /* Check for [Rb + shimm]. */ + if (f0 && f1) + return true; + + if (!load_p) + return false; + + /* Check for [Rb + Ri]. */ + f1 = check_short_insn_register_p (plus1, false); + + if (f0 && f1) + return true; + + default: + break; + } + return false; +} + +/* Return true if an address fits a floating point load/store + instruction. The next formats are allowed [b, s9], [b], [s32limm], + and scaled [b, s9]. */ + +bool +arc64_fp_access_p (rtx op, machine_mode mode) +{ + rtx addr; + + /* Eliminate non-memory operations. */ + if (GET_CODE (op) != MEM) + return 0; + + /* FIXME! remove it when "uncached" attribute is added. */ + if (MEM_VOLATILE_P (op) && TARGET_VOLATILE_DI) + return false; + + if (mode == VOIDmode) + mode = GET_MODE (op); + + /* Decode the address now. */ + addr = XEXP (op, 0); + + return arc64_legitimate_address_1_p (mode, addr, true, false, false); +} + +/* Implement EH_RETURN_HANDLER_RTX. EH returns need to either return + normally or return to a previous frame after unwinding. + + An EH return uses a single shared return sequence. The epilogue is + exactly like a normal epilogue except that it has an extra input + register (EH_RETURN_STACKADJ_RTX) which contains the stack + adjustment that must be applied after the frame has been destroyed. + An extra label is inserted before the epilogue which initializes + this register to zero, and this is the entry point for a normal + return. + + An actual EH return updates the return address, initializes the + stack adjustment and jumps directly into the epilogue (bypassing + the zeroing of the adjustment). Since the return address is + typically saved on the stack when a function makes a call, the + saved BLINK must be updated outside the epilogue. + + This poses problems as the store is generated well before the + epilogue, so the offset of BLINK is not known yet. Also + optimizations will remove the store as it appears dead, even after + the epilogue is generated (as the base or offset for loading BLINK + is different in many cases). + + To avoid these problems this implementation forces the frame + pointer in eh_return functions so that the location of BLINK is + fixed and known early. It also marks the store volatile, so no + optimization is permitted to remove the store. */ + +rtx +arc64_eh_return_handler_rtx (void) +{ + rtx tmp = gen_frame_mem (Pmode, + plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD)); + + /* Mark the store volatile, so no optimization is permitted to remove it. */ + MEM_VOLATILE_P (tmp) = true; + return tmp; +} + +/* Select a format to encode pointers in exception handling data. */ + +int +arc64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global) +{ + int type; + + if (!flag_pic) + return DW_EH_PE_absptr; + + switch (arc64_cmodel_var) + { + case ARC64_CMODEL_SMALL: + case ARC64_CMODEL_MEDIUM: + /* text+got+data < 4Gb. 4-byte signed relocs are sufficient + for everything. */ + type = DW_EH_PE_sdata4; + break; + default: + /* No assumptions here. 8-byte relocs required. */ + type = DW_EH_PE_sdata8; + break; + } + return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type; +} + +/* Emit a (pre) memory barrier around an atomic sequence according to + MODEL. */ + +void +arc64_pre_atomic_barrier (enum memmodel model) +{ + if (need_atomic_barrier_p (model, true)) + emit_insn (gen_memory_barrier ()); +} + +/* Emit a (post) memory barrier around an atomic sequence according to + MODEL. */ + +void +arc64_post_atomic_barrier (enum memmodel model) +{ + if (need_atomic_barrier_p (model, false)) + emit_insn (gen_memory_barrier ()); +} + +/* Expand an atomic fetch-and-operate pattern. CODE is the binary operation + to perform. MEM is the memory on which to operate. VAL is the second + operand of the binary operator. BEFORE and AFTER are optional locations to + return the value of MEM either before of after the operation. MODEL_RTX + is a CONST_INT containing the memory model to use. */ + +void +arc64_expand_atomic_op (enum rtx_code code, rtx mem, rtx val, + rtx orig_before, rtx orig_after, rtx model_rtx) +{ + enum memmodel model = (enum memmodel) INTVAL (model_rtx); + machine_mode mode = GET_MODE (mem); + rtx label, x, cond; + rtx before = orig_before, after = orig_after; + + /* ARC atomic ops work only with 32-bit aligned memories. */ + gcc_assert (mode == SImode || mode == DImode); + + arc64_pre_atomic_barrier (model); + + label = gen_label_rtx (); + emit_label (label); + label = gen_rtx_LABEL_REF (VOIDmode, label); + + if (before == NULL_RTX) + before = gen_reg_rtx (mode); + + if (after == NULL_RTX) + after = gen_reg_rtx (mode); + + /* Load exclusive. */ + if(mode == SImode) + emit_insn (gen_arc_load_exclusivesi (before, mem)); + else /* DImode */ + emit_insn (gen_arc_load_exclusivedi (before, mem)); + + switch (code) + { + case NOT: + x = gen_rtx_AND (mode, before, val); + emit_insn (gen_rtx_SET (after, x)); + x = gen_rtx_NOT (mode, after); + emit_insn (gen_rtx_SET (after, x)); + break; + + case MINUS: + if (CONST_INT_P (val)) + { + val = GEN_INT (-INTVAL (val)); + code = PLUS; + } + + /* FALLTHRU. */ + default: + x = gen_rtx_fmt_ee (code, mode, before, val); + emit_insn (gen_rtx_SET (after, x)); + break; + } + + /* Exclusively store new item. Store clobbers CC reg. */ + if(mode == SImode) + emit_insn (gen_arc_store_exclusivesi (mem, after)); + else /* DImode */ + emit_insn (gen_arc_store_exclusivedi (mem, after)); + + /* Check the result of the store. */ + cond = gen_rtx_REG (CC_Zmode, CC_REGNUM); + x = gen_rtx_NE (VOIDmode, cond, const0_rtx); + x = gen_rtx_IF_THEN_ELSE (VOIDmode, x, + label, pc_rtx); + emit_unlikely_jump (gen_rtx_SET (pc_rtx, x)); + + arc64_post_atomic_barrier (model); +} + +/* Helper function used by "atomic_compare_and_swap" expand + pattern. */ + +void +arc64_expand_compare_and_swap (rtx operands[]) +{ + rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x; + machine_mode mode; + + bval = operands[0]; + rval = operands[1]; + mem = operands[2]; + oldval = operands[3]; + newval = operands[4]; + is_weak = operands[5]; + mod_s = operands[6]; + mod_f = operands[7]; + mode = GET_MODE (mem); + + if (reg_overlap_mentioned_p (rval, oldval)) + oldval = copy_to_reg (oldval); + + if (mode == SImode || mode == DImode) + { + if (mode == SImode) + emit_insn (gen_atomic_compare_and_swapsi_1 (rval, mem, oldval, newval, + is_weak, mod_s, mod_f)); + else /* DImode */ + emit_insn (gen_atomic_compare_and_swapdi_1 (rval, mem, oldval, newval, + is_weak, mod_s, mod_f)); + + x = gen_rtx_REG (CC_Zmode, CC_REGNUM); + x = gen_rtx_EQ (SImode, x, const0_rtx); + emit_insn (gen_rtx_SET (bval, x)); + } + else + { + arc_expand_compare_and_swap_qh (bval, rval, mem, oldval, newval, + is_weak, mod_s, mod_f); + } +} + +/* Helper function used by the "atomic_compare_and_swapsdi_1" + pattern. */ + +void +arc64_split_compare_and_swap (rtx operands[]) +{ + rtx rval, mem, oldval, newval; + machine_mode mode, mode_cc; + enum memmodel mod_s, mod_f; + bool is_weak; + rtx label1, label2, x, cond; + + rval = operands[0]; + mem = operands[1]; + oldval = operands[2]; + newval = operands[3]; + is_weak = (operands[4] != const0_rtx); + mod_s = (enum memmodel) INTVAL (operands[5]); + mod_f = (enum memmodel) INTVAL (operands[6]); + mode = GET_MODE (mem); + + /* ARC atomic ops work only with 32-bit or 64-bit aligned memories. */ + gcc_assert (mode == SImode || mode == DImode); + + arc64_pre_atomic_barrier (mod_s); + + label1 = NULL_RTX; + if (!is_weak) + { + label1 = gen_label_rtx (); + emit_label (label1); + } + label2 = gen_label_rtx (); + + /* Load exclusive. */ + if(mode == SImode) + emit_insn (gen_arc_load_exclusivesi (rval, mem)); + else /* DImode */ + emit_insn (gen_arc_load_exclusivedi (rval, mem)); + + /* Check if it is oldval. */ + mode_cc = SELECT_CC_MODE (NE, rval, oldval); + cond = gen_rtx_REG (mode_cc, CC_REGNUM); + emit_insn (gen_rtx_SET (cond, gen_rtx_COMPARE (mode_cc, rval, oldval))); + + x = gen_rtx_NE (VOIDmode, cond, const0_rtx); + x = gen_rtx_IF_THEN_ELSE (VOIDmode, x, + gen_rtx_LABEL_REF (Pmode, label2), pc_rtx); + emit_unlikely_jump (gen_rtx_SET (pc_rtx, x)); + + /* Exclusively store new item. Store clobbers CC reg. */ + if(mode == SImode) + emit_insn (gen_arc_store_exclusivesi (mem, newval)); + else /* DImode */ + emit_insn (gen_arc_store_exclusivedi (mem, newval)); + + if (!is_weak) + { + /* Check the result of the store. */ + cond = gen_rtx_REG (CC_Zmode, CC_REGNUM); + x = gen_rtx_NE (VOIDmode, cond, const0_rtx); + x = gen_rtx_IF_THEN_ELSE (VOIDmode, x, + gen_rtx_LABEL_REF (Pmode, label1), pc_rtx); + emit_unlikely_jump (gen_rtx_SET (pc_rtx, x)); + } + + if (mod_f != MEMMODEL_RELAXED) + emit_label (label2); + + arc64_post_atomic_barrier (mod_s); + + if (mod_f == MEMMODEL_RELAXED) + emit_label (label2); +} + +/* Expander for casesi. The vector table is always PC-relative, and + it is made up of branch instructions. When we have CODE_DENSITY + option enabled, we use BI instruction, otherwise, depending on the + memory model, an emulation of it. We use the same emulation + contruction, for PIC or LARGE memory model. For a non-pic + SMALL/MEDIUM memory model, we make use of a single add2 instruction + which has one input the address of the start dispatch table, and + the other input indicates where we jump in the table. */ + +void arc64_expand_casesi (rtx operands[]) +{ + rtx reg; + + if (operands[1] != const0_rtx) + { + reg = gen_reg_rtx (SImode); + operands[1] = GEN_INT (trunc_int_for_mode (-INTVAL (operands[1]), + SImode)); + emit_insn (gen_addsi3 (reg, operands[0], operands[1])); + operands[0] = reg; + } + emit_unlikely_jump (gen_cbranchsi4 (gen_rtx_GTU (SImode, operands[0], + operands[2]), + operands[0], operands[2], operands[4])); + + if (!TARGET_CODE_DENSITY) + { + switch (arc64_cmodel_var) + { + case ARC64_CMODEL_SMALL: + if (!flag_pic) + { + reg = gen_reg_rtx (SImode); + emit_insn (gen_casesi_addaddr (reg, operands[0], operands[3])); + operands[0] = reg; + break; + } + /* Fall through */ + case ARC64_CMODEL_MEDIUM: + case ARC64_CMODEL_LARGE: + { + gcc_assert (word_mode == DImode); + /* Same code is used for PIC and large memory model. */ + rtx lbl = gen_rtx_LABEL_REF (VOIDmode, operands[3]); + rtx tmp = gen_reg_rtx (DImode); + reg = gen_reg_rtx (DImode); + emit_insn (gen_rtx_SET (reg, + gen_rtx_UNSPEC (DImode, + gen_rtvec (1, lbl), + ARC64_UNSPEC_PCREL))); + emit_insn (gen_casesi_addaddrdi (tmp, operands[0], reg)); + emit_jump_insn (gen_casesi_dispatchdi (tmp, operands[3])); + return; + } + default: + gcc_unreachable (); + } + } + + emit_jump_insn (gen_casesi_dispatch (operands[0], operands[3])); +} + +bool +arc64_allow_direct_access_p (rtx op) +{ + return (arc64_get_symbol_type (op) == ARC64_LO32); +} + +/* Decide if mov simd instruction needs to be split. Return TRUE if + so. This procedure is required when the vector length is larger + than 64 bit. */ +bool +arc64_split_double_move_p (rtx *operands, machine_mode mode) +{ + rtx op0 = operands[0]; + rtx op1 = operands[1]; + + /* Split only double moves. */ + if (GET_MODE_SIZE (mode) < (UNITS_PER_WORD * 2)) + return false; + + if (register_operand (op0, mode) && register_operand (op1, mode)) + { + /* Check if we can use vadd2 instruction as a mov. */ + if (TARGET_SIMD + && !FLOAT_MODE_P (mode) + && !TARGET_64BIT + && (GET_MODE_SIZE (mode) == (2 * UNITS_PER_WORD))) + { + /* If both registers are even-numbered, fallback to vadd2. */ + if (((REGNO (op0) & 0x01) == 0) && ((REGNO (op1) & 0x01) == 0)) + return false; + else + return true; + } + + /* Check for r-reg to f-reg moves. */ + if (GP_REGNUM_P (REGNO (op0)) || GP_REGNUM_P (REGNO (op1))) + return true; + + /* Sanity check for vfmov instruction. */ + gcc_assert (arc64_fsimd_register (op0, mode) + && arc64_fsimd_register (op1, mode)); + return false; + } + + /* Check if we have 64/128bit moves. */ + if (DOUBLE_LOAD_STORE + && ((memory_operand (op0, mode) && REG_P (op1)) + || (memory_operand (op1, mode) && REG_P (op0)))) + { + gcc_assert (GET_MODE_SIZE (mode) == (UNITS_PER_WORD * 2)); + /* Sanity check for wide st/ld instructions. */ + if (REG_P (op0) && ((REGNO (op0) & 0x01) != 0)) + return true; + if (REG_P (op1) && ((REGNO (op1) & 0x01) != 0)) + return true; + return false; + } + + /* Evereything else is going for a split. */ + return true; +} + +/* This is the actual routine which splits a move simd to smaller + bits. */ +void +arc64_split_double_move (rtx *operands, machine_mode mode) +{ + rtx op0 = operands[0]; + rtx op1 = operands[1]; + rtx lo, hi, mem_lo, mem_hi, src, dst; + unsigned int rdst, rsrc, i; + unsigned iregs = CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD); + bool swap_p = false; + machine_mode mvmode = smallest_int_mode_for_size (BITS_PER_WORD); + + /* Maximum size handled is twice UNITS_PER_WORD. */ + gcc_assert (iregs <= 2); + + /* This procedure works as long as the width of the fp regs is the + same as the width of r regs. */ + if (FLOAT_MODE_P (mode)) + { + gcc_assert (UNITS_PER_WORD == UNITS_PER_FP_REG); + mvmode = float_mode_for_size (BITS_PER_WORD).require (); + } + + /* Split reg-reg move. */ + if (REG_P (op0) && REG_P (op1)) + { + rdst = REGNO (op0); + rsrc = REGNO (op1); + + if (!reg_overlap_mentioned_p (op0, op1) + || rdst < rsrc) + /* The fp regs will never overlap r-regs. However, this + procedure can be used also for r-reg to r-regs splits. */ + for (i = 0; i < iregs; i++) + emit_move_insn (gen_rtx_REG (mvmode, rdst + i), + gen_rtx_REG (mvmode, rsrc + i)); + else + for (i = 0; i < iregs; i++) + emit_move_insn (gen_rtx_REG (mvmode, rdst + iregs - i - 1), + gen_rtx_REG (mvmode, rsrc + iregs - i - 1)); + return; + } + + /* Split mem-reg moves. */ + gcc_assert (REG_P (op0) || REG_P (op1)); + + if (REG_P (op1)) + { + src = op1; + dst = op0; + } + else + { + src = op0; + dst = op1; + } + + lo = gen_lowpart (mvmode, src); + hi = gen_highpart_mode (mvmode, mode, src); + + if (auto_inc_p (XEXP (dst, 0))) + { + rtx offset, reg, next, addr = XEXP (dst, 0); + enum rtx_code code = GET_CODE (addr); + + switch (code) + { + case PRE_INC: + offset = GEN_INT (GET_MODE_SIZE (mode)); + code = PRE_MODIFY; + break; + case PRE_DEC: + offset = GEN_INT (-GET_MODE_SIZE (mode)); + code = PRE_MODIFY; + break; + case POST_MODIFY: + case PRE_MODIFY: + offset = XEXP (XEXP (addr, 1), 1); + break; + case POST_INC: + offset = GEN_INT (GET_MODE_SIZE (mode)); + code = POST_MODIFY; + break; + case POST_DEC: + offset = GEN_INT (-GET_MODE_SIZE (mode)); + code = POST_MODIFY; + break; + default: + gcc_unreachable (); + } + + reg = XEXP (addr, 0); + next = gen_rtx_fmt_ee (code, Pmode, reg, + gen_rtx_PLUS (Pmode, reg, offset)); + + switch (code) + { + case POST_MODIFY: + /* We need to swap lo/hi order such that we emit first the + hi-load with an offset, and last the post modify + instruction. Thus the code can handle any type of auto + increment address. */ + mem_lo = adjust_automodify_address (dst, mvmode, next, 0); + next = plus_constant (Pmode, reg, GET_MODE_SIZE (mvmode)); + mem_hi = adjust_automodify_address (dst, mvmode, next, + GET_MODE_SIZE (mvmode)); + swap_p = true; + break; + case PRE_MODIFY: + mem_lo = adjust_automodify_address (dst, mvmode, next, 0); + next = plus_constant (Pmode, reg, GET_MODE_SIZE (mvmode)); + mem_hi = adjust_automodify_address (dst, mvmode, next, + GET_MODE_SIZE (mvmode)); + break; + default: + gcc_unreachable (); + } + } + else if (GET_CODE (XEXP (dst, 0)) == UNSPEC) + { + /* For rare situations when we need to split a PIC address. */ + rtx addr = XEXP (dst, 0); + switch (XINT (addr, 1)) + { + case ARC64_UNSPEC_PCREL: + addr = XVECEXP (addr, 0, 0); + addr = plus_constant (Pmode, addr, GET_MODE_SIZE (mvmode)); + addr = gen_sym_unspec (addr, ARC64_UNSPEC_PCREL); + break; + + default: + /* Fail for anything else. */ + gcc_unreachable (); + } + + mem_lo = adjust_address (dst, mvmode, 0); + mem_hi = adjust_automodify_address (mem_lo, GET_MODE (mem_lo), + addr, GET_MODE_SIZE (mvmode)); + } + else + { + mem_lo = adjust_address (dst, mvmode, 0); + mem_hi = arc64_move_pointer (mem_lo, GET_MODE_SIZE (mvmode)); + /* Catching scenarios like: + ld r0, [r0, 4] (ld lo, [mem_lo]) + ld r1, [r0, 8] (ld hi, [mem_hi]) + + And setting the trigger (swap_p) to convert them to: + ld r1, [r0, 8] + ld r0, [r0, 4] */ + if (reg_overlap_mentioned_p (lo, mem_lo)) + swap_p = true; + } + + if (REG_P (op1)) + { + if (!swap_p) + emit_move_insn (mem_lo, lo); + emit_move_insn (mem_hi, hi); + if (swap_p) + emit_move_insn (mem_lo, lo); + } + else + { + if (!swap_p) + emit_move_insn (lo, mem_lo); + emit_move_insn (hi, mem_hi); + if (swap_p) + emit_move_insn (lo, mem_lo); + } +} + +/* What mode to use when copying N-bits of data. + + HS5x + n >= 64: copy_mode() + n >= 32: SFmode if FP_MOVE + SImode otherwise + n >= 16: HFmode if FP_MOVE + HImode otherwise + n >= 8: QImode + + HS6x + n >= 128: copy_mode() + n >= 64: DFmode if FP_MOVE +i DImode otherwise + n >= 32: SFmode if FP_MOVE +i SImode otherwise + n >= 16: HFmode if FP_MOVE +i HImode otherwise + n >= 8: QImode + + Note about the "return ((machine_mode) (FP ? Fmode : Imode))": + GCC 8.3 gives a warning about "int to machine_mode" conversion if we + don't use the explicit "((machine_mode) ...)" casting, while it is + absolutely OK with "retun [F|I]mode;" separately. +*/ + +static machine_mode +cpymem_copy_mode_for_n (int n) +{ + /* HS6x. */ + if (TARGET_64BIT) + { + if (n >= 128) + return cpymem_copy_mode (); + else if (n >= 64) + return ((machine_mode) (TARGET_FP_MOVE ? DFmode : DImode)); + /* fall-thru. */ + } + /* HS5x. */ + else + { + if (n >= 64) + return cpymem_copy_mode (); + /* fall-thru. */ + } + + if (n >= 32) + return ((machine_mode) (TARGET_FP_MOVE ? SFmode : SImode)); + else if (n >= 16) + return ((machine_mode) (TARGET_FP_MOVE ? HFmode : HImode)); + else + return QImode; +} + +/* Returns the bit size (of a mode) that is big enough to + handle the remaining N-bits of data. + + This function is not expected to be called for Ns that + are too big for the architecture to swallow. e.g. for + an HS5x target without 64-bit load/store support, any + N > 32 is not expected. */ + +static int +cpymem_smallest_bigger_mode_bitsize (int n) +{ + if (n <= 8) + return 8; /* QImode. */ + else if (n <= 16) + return 16; /* H{I|F}mode. */ + else if (n <= 32) + return 32; /* S{I|F}mode. */ + else if (n <= 64) + { + /* a 64-bit arch or a 32-bit arch with double load/stores. */ + if (TARGET_64BIT || TARGET_LL64) + return 64; /* {DI|DF|V2SF}mode. */ + + /* This functions mustn't have been called. */ + gcc_unreachable (); + } + else if (n <= 128) + { + if (TARGET_64BIT && TARGET_WIDE_LDST) + return 128; /* {TI|V2DF}mode. */ + /* Fall-thru. */ + } + + gcc_unreachable (); +} + +/* Expand cpymem, as if from a __builtin_memcpy. Return true if + we succeed, otherwise return false. */ + +bool +arc64_expand_cpymem (rtx *operands) +{ + int n, mode_bits; + rtx dst = operands[0]; + rtx src = operands[1]; + rtx base; + machine_mode cur_mode; + bool speed_p = !optimize_function_for_size_p (cfun); + + /* When optimizing for size, give a better estimate of the length of a + memcpy call, but use the default otherwise. Moves larger than 8 bytes + will always require an even number of instructions to do now. And each + operation requires both a load+store, so devide the max number by 2. */ + int max_num_moves = (speed_p ? 16 : ARC64_CALL_RATIO) / 2; + /* In case of double moves, double the threshold. */ + if (DOUBLE_LOAD_STORE) + max_num_moves *= 2; + + /* We can't do anything smart if the amount to copy is not constant. */ + if (!CONST_INT_P (operands[2])) + return false; + + n = INTVAL (operands[2]); + + /* Try to keep the number of instructions low. For all cases we will do at + most two moves for the residual amount, since we'll always overlap the + remainder. */ + const int divisor = GET_MODE_SIZE (cpymem_copy_mode ()); + if (((n / divisor) + (n % divisor ? 2 : 0)) > max_num_moves) + return false; + + base = copy_to_mode_reg (Pmode, XEXP (dst, 0)); + dst = adjust_automodify_address (dst, VOIDmode, base, 0); + + base = copy_to_mode_reg (Pmode, XEXP (src, 0)); + src = adjust_automodify_address (src, VOIDmode, base, 0); + + /* Convert n to bits to make the rest of the code simpler. */ + n = n * BITS_PER_UNIT; + + while (n > 0) + { + cur_mode = cpymem_copy_mode_for_n (n); + + mode_bits = GET_MODE_BITSIZE (cur_mode); + arc64_copy_one_block_and_progress_pointers (&src, &dst, cur_mode); + + n -= mode_bits; + + /* Do certain trailing copies as overlapping if it's going to be + cheaper. i.e. less instructions to do so. For instance doing a 15 + byte copy it's more efficient to do two overlapping 8 byte copies than + 8 + 4 + 2 + 1. */ + if (n > 0 && n < (BITS_PER_UNIT * divisor)) + { + int n_bits = cpymem_smallest_bigger_mode_bitsize (n); + src = arc64_move_pointer (src, (n - n_bits) / BITS_PER_UNIT); + dst = arc64_move_pointer (dst, (n - n_bits) / BITS_PER_UNIT); + n = n_bits; + } + } + + return true; +} + +/* Provide a mapping from gcc register numbers to dwarf register numbers. */ +unsigned +arc64_dbx_register_number (unsigned regno) +{ + if (GP_REGNUM_P (regno)) + return regno; + else if (FP_REGNUM_P (regno)) + return 128 + regno - F0_REGNUM; + + /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no + equivalent DWARF register. */ + return DWARF_FRAME_REGISTERS; +} + +#if 0 +/* Expand fp vector shift right pattern. Can handle maximum 128bit + SIMD vectors. + + +----+----+----+----+----+----+----+----+ + | h7 | h6 | h5 | h4 | h3 | h2 | h1 | h0 | + | s3 | s2 | s1 | s0 | + | d1 | d0 | + +----+----+----+----+----+----+----+----+ + + */ + +bool +arc64_expand_fvect_shr (rtx *operands) +{ + rtx op0 = operands[0]; + rtx op1 = operands[1]; + rtx op2 = operands[2]; + rtx t0; + machine_mode mode = GET_MODE (op0); + scalar_int_mode imode = int_mode_for_mode (mode).require (); + unsigned int ival = INTVAL (op2); + + if (ARC64_VFP_128 && (ival == 64)) + { + emit_move_insn (gen_lowpart (DFmode, op0), gen_highpart (DFmode, op1)); + return true; + } + else if (ARC64_VFP_64 && (ival == 32)) + { + t0 = gen_reg_rtx (SFmode); + + emit_insn (gen_vec_extractv2sfsf (t0, + gen_lowpart (V2SFmode, op1), + GEN_INT (1))); + emit_insn (gen_vec_setv2sf (gen_lowpart (V2SFmode, op0), + t0, GEN_INT (0))); + return true; + } + else if (ARC64_VFP_32 && (ival == 16)) + { + t0 = gen_reg_rtx (HFmode); + + emit_insn (gen_vec_extractv2hfhf (t0, op1, GEN_INT (1))); + emit_insn (gen_vec_setv2hf (op0, t0, GEN_INT (0))); + return true; + } + + t0 = gen_reg_rtx (imode); + rtx shift = expand_binop (imode, lshr_optab, + gen_lowpart (imode, op1), op2, + NULL_RTX, true, OPTAB_DIRECT); + emit_move_insn (t0, shift); + emit_move_insn (op0, gen_lowpart (mode, t0)); + return true; +} +#endif + +/* Return TRUE if SYM requires a PLT34 reloc. The instruction is + valid, hence any symbol which its type is LPIC is valid for + instruction, see arc64_is_long_call_p. */ + +bool +arc64_use_plt34_p (rtx sym) +{ + return (arc64_get_symbol_type (sym) == ARC64_LPIC); +} + +/* Determine if it's legal to put X into the constant pool. By all means, it is + not ok to put a symbol in a constant pool. We arive here in the case of a + TLS symbol which needs to be precomputed. We force this in + legitimize_constant_p. */ + +static bool +arc64_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, + rtx x) +{ + return contains_symbol_ref_p (x) || tls_referenced_p (x); +} + +/* Generate RTL for conditional branch with rtx comparison CODE in mode + CC_MODE. */ + +void +arc64_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode, + rtx label_ref) +{ + rtx x; + x = gen_rtx_fmt_ee (code, VOIDmode, gen_rtx_REG (cc_mode, CC_REGNUM), + const0_rtx); + x = gen_rtx_IF_THEN_ELSE (VOIDmode, x, + gen_rtx_LABEL_REF (VOIDmode, label_ref), + pc_rtx); + emit_unlikely_jump (gen_rtx_SET (pc_rtx, x)); +} + +/* True if the dependency between OUT_INSN and IN_INSN is on the accumulator + register. IN_INSN is a mac type of instruction. */ + +int +accumulator_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn) +{ + rtx in_set = single_set (in_insn); + rtx out_set = single_set (out_insn); + + if (!in_set || !out_set) + return false; + + if (!REG_P (SET_DEST (out_set)) || (REGNO (SET_DEST (out_set)) != R58_REGNUM)) + return false; + + rtx tmp = SET_SRC (in_set); + if (GET_CODE (tmp) == PLUS && GET_CODE (XEXP (tmp, 0)) == MULT) + return true; + return true; +} + +/* True if IN_INSN is setting the accumulator. */ + +int +set_accumulator_p (rtx_insn *out_insn ATTRIBUTE_UNUSED, + rtx_insn *in_insn) +{ + rtx in_set = single_set (in_insn); + if (!in_set) + return false; + + if (REG_P (SET_DEST (in_set)) && (REGNO (SET_DEST (in_set)) == R58_REGNUM)) + return true; + return false; +} + +/* Return 'return' instruction. */ + +const char * +arc64_output_return (void) +{ + if (ARC_NAKED_P (cfun->machine->fn_type)) + return ""; + + return "j_s%*\t[blink]"; +} + +/* Return nonzero if register FROM_REGNO can be renamed to register + TO_REGNO. */ + +bool +arc64_hard_regno_rename_ok (unsigned from_regno ATTRIBUTE_UNUSED, + unsigned to_regno) +{ + /* Interrupt functions can only use registers that have already been saved by + the prologue, even if they would normally be call-clobbered. */ + return (!ARC_INTERRUPT_P (cfun->machine->fn_type) + || df_regs_ever_live_p (to_regno)); +} + +/* Emit the RTX necessary to initialize the vector TARGET with values in + VALS. */ + +void +arc64_expand_vector_init (rtx target, rtx vals) +{ + machine_mode mode = GET_MODE (target); + machine_mode inner_mode = GET_MODE_INNER (mode); + int n_elts = GET_MODE_NUNITS (mode); + int i; + rtx elem[4], tmp[2]; + + gcc_assert (n_elts <= 4); + for (i = 0; i < n_elts; i++) + { + elem[i] = XVECEXP (vals, 0, i); + if (!register_operand (elem[i], GET_MODE (elem[i]))) + elem[i] = force_reg (inner_mode, elem[i]); + } + + switch (mode) + { + case V4HImode: + tmp[0] = gen_reg_rtx (mode); + tmp[1] = gen_reg_rtx (mode); + emit_insn (gen_arc64_vpack_v4hihi (tmp[0], elem[0], elem[1])); + emit_insn (gen_arc64_vpack_v4hihi (tmp[1], elem[2], elem[3])); + emit_insn (gen_arc64_sel_lane2_0v4hi (target, tmp[0], tmp[1])); + break; + + case V2SImode: + emit_insn (gen_arc64_vpack_v2sisi (target, elem[0], elem[1])); + break; + + case V2HImode: + emit_insn (gen_arc64_vpack_v2hihi (target, elem[0], elem[1])); + break; + + default: + gcc_unreachable (); + } +} + +/* Target hooks. */ + +#undef TARGET_ASM_ALIGNED_DI_OP +#define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t" + +#undef TARGET_ASM_ALIGNED_HI_OP +#define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t" + +#undef TARGET_ASM_ALIGNED_SI_OP +#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t" + +#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK +#define TARGET_ASM_CAN_OUTPUT_MI_THUNK \ + hook_bool_const_tree_hwi_hwi_const_tree_true + +#undef TARGET_ASM_OUTPUT_MI_THUNK +#define TARGET_ASM_OUTPUT_MI_THUNK arc64_output_mi_thunk + +#undef TARGET_CAN_ELIMINATE +#define TARGET_CAN_ELIMINATE arc64_can_eliminate + +#undef TARGET_FRAME_POINTER_REQUIRED +#define TARGET_FRAME_POINTER_REQUIRED arc64_frame_pointer_required + +#undef TARGET_LEGITIMATE_ADDRESS_P +#define TARGET_LEGITIMATE_ADDRESS_P arc64_legitimate_address_p + +#undef TARGET_LEGITIMATE_CONSTANT_P +#define TARGET_LEGITIMATE_CONSTANT_P arc64_legitimate_constant_p + +#undef TARGET_RETURN_IN_MEMORY +#define TARGET_RETURN_IN_MEMORY arc64_return_in_memory + +/* Passing arguments. */ +#undef TARGET_PASS_BY_REFERENCE +#define TARGET_PASS_BY_REFERENCE arc64_pass_by_reference + +#undef TARGET_SETUP_INCOMING_VARARGS +#define TARGET_SETUP_INCOMING_VARARGS arc64_setup_incoming_varargs + +#undef TARGET_MUST_PASS_IN_STACK +#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size + +#undef TARGET_FUNCTION_VALUE +#define TARGET_FUNCTION_VALUE arc64_function_value + +#undef TARGET_FUNCTION_VALUE_REGNO_P +#define TARGET_FUNCTION_VALUE_REGNO_P arc64_function_value_regno_p + +#undef TARGET_FUNCTION_ARG +#define TARGET_FUNCTION_ARG arc64_function_arg + +#undef TARGET_FUNCTION_ARG_ADVANCE +#define TARGET_FUNCTION_ARG_ADVANCE arc64_function_arg_advance + +#undef TARGET_ARG_PARTIAL_BYTES +#define TARGET_ARG_PARTIAL_BYTES arc64_arg_partial_bytes + +#undef TARGET_STRICT_ARGUMENT_NAMING +#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true + +#undef TARGET_COMPUTE_FRAME_LAYOUT +#define TARGET_COMPUTE_FRAME_LAYOUT arc64_compute_frame_info + +#undef TARGET_HARD_REGNO_NREGS +#define TARGET_HARD_REGNO_NREGS arc64_hard_regno_nregs + +#undef TARGET_HARD_REGNO_MODE_OK +#define TARGET_HARD_REGNO_MODE_OK arc64_hard_regno_mode_ok + +#undef TARGET_MODES_TIEABLE_P +#define TARGET_MODES_TIEABLE_P arc64_modes_tieable_p + +#undef TARGET_PRINT_OPERAND +#define TARGET_PRINT_OPERAND arc64_print_operand + +#undef TARGET_PRINT_OPERAND_ADDRESS +#define TARGET_PRINT_OPERAND_ADDRESS arc64_print_operand_address + +#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P +#define TARGET_PRINT_OPERAND_PUNCT_VALID_P arc64_print_operand_punct_valid_p + +#undef TARGET_TRAMPOLINE_INIT +#define TARGET_TRAMPOLINE_INIT arc64_initialize_trampoline + +#undef TARGET_ASM_TRAMPOLINE_TEMPLATE +#define TARGET_ASM_TRAMPOLINE_TEMPLATE arc64_asm_trampoline_template + +#undef TARGET_HAVE_SPECULATION_SAFE_VALUE +#define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed + +#undef TARGET_FUNCTION_OK_FOR_SIBCALL +#define TARGET_FUNCTION_OK_FOR_SIBCALL arc64_function_ok_for_sibcall + +#undef TARGET_INIT_LIBFUNCS +#define TARGET_INIT_LIBFUNCS arc64_init_libfuncs + +#undef TARGET_ASM_FILE_END +#define TARGET_ASM_FILE_END file_end_indicate_exec_stack + +#undef TARGET_ASM_FUNCTION_PROLOGUE +#define TARGET_ASM_FUNCTION_PROLOGUE arc64_output_function_prologue + +#undef TARGET_CONSTANT_ALIGNMENT +#define TARGET_CONSTANT_ALIGNMENT constant_alignment_word_strings + +#undef TARGET_PROMOTE_FUNCTION_MODE +#define TARGET_PROMOTE_FUNCTION_MODE \ + default_promote_function_mode_always_promote + +/* To be checked if it is better without it. */ +#undef TARGET_PROMOTE_PROTOTYPES +#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true + +#undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA +#define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arc64_output_addr_const_extra + +#undef TARGET_INIT_BUILTINS +#define TARGET_INIT_BUILTINS arc64_init_builtins + +#undef TARGET_EXPAND_BUILTIN +#define TARGET_EXPAND_BUILTIN arc64_expand_builtin + +#undef TARGET_BUILTIN_DECL +#define TARGET_BUILTIN_DECL arc64_builtin_decl + +/* Having TLS support, we turn R30 fixed as well. */ +#ifdef HAVE_AS_TLS +#undef TARGET_HAVE_TLS +#define TARGET_HAVE_TLS HAVE_AS_TLS +#endif + +#undef TARGET_LRA_P +#define TARGET_LRA_P hook_bool_void_true + +#undef TARGET_INSN_COST +#define TARGET_INSN_COST arc64_insn_cost + +#undef TARGET_MACHINE_DEPENDENT_REORG +#define TARGET_MACHINE_DEPENDENT_REORG arc64_reorg + +#undef TARGET_CONDITIONAL_REGISTER_USAGE +#define TARGET_CONDITIONAL_REGISTER_USAGE arc64_conditional_register_usage + +#undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P +#define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \ +arc64_libgcc_floating_mode_supported_p + +#undef TARGET_SCALAR_MODE_SUPPORTED_P +#define TARGET_SCALAR_MODE_SUPPORTED_P arc64_scalar_mode_supported_p + +#undef TARGET_SPLIT_COMPLEX_ARG +#define TARGET_SPLIT_COMPLEX_ARG arc64_split_complex_arg + +/* Vectors. */ +#undef TARGET_VECTOR_MODE_SUPPORTED_P +#define TARGET_VECTOR_MODE_SUPPORTED_P arc64_vector_mode_supported_p + +#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE +#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arc64_preferred_simd_mode + +#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES +#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \ + arc64_autovectorize_vector_modes + +#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST +#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \ + arc64_builtin_vectorization_cost + +#undef TARGET_REGISTER_MOVE_COST +#define TARGET_REGISTER_MOVE_COST arc64_register_move_cost + +#undef TARGET_VECTORIZE_VEC_PERM_CONST +#define TARGET_VECTORIZE_VEC_PERM_CONST arc64_vectorize_vec_perm_const + +#undef TARGET_RTX_COSTS +#define TARGET_RTX_COSTS arc64_rtx_costs_wrapper + +#undef TARGET_ADDRESS_COST +#define TARGET_ADDRESS_COST arc64_address_cost + +/* Scheduling. */ +#undef TARGET_SCHED_MACRO_FUSION_P +#define TARGET_SCHED_MACRO_FUSION_P arc64_macro_fusion_p + +#undef TARGET_SCHED_MACRO_FUSION_PAIR_P +#define TARGET_SCHED_MACRO_FUSION_PAIR_P arc64_macro_fusion_pair_p + +/* Disable the speculation when filling delay slots. In general we get better + (speed) results but not for EEMBC's text01 benchmark. Disabling delay slot + filler speculation is needed to conserve the loops body size as calculated in + machine reorg phase. More info see github issue#416. */ +#undef TARGET_NO_SPECULATION_IN_DELAY_SLOTS_P +#define TARGET_NO_SPECULATION_IN_DELAY_SLOTS_P hook_bool_void_true + +#undef TARGET_CANNOT_FORCE_CONST_MEM +#define TARGET_CANNOT_FORCE_CONST_MEM arc64_cannot_force_const_mem + +#undef TARGET_OPTION_OVERRIDE +#define TARGET_OPTION_OVERRIDE arc64_override_options + +/* CC regs optimizations. */ +#undef TARGET_FIXED_CONDITION_CODE_REGS +#define TARGET_FIXED_CONDITION_CODE_REGS arc64_fixed_condition_code_regs + +#undef TARGET_FLAGS_REGNUM +#define TARGET_FLAGS_REGNUM CC_REGNUM + +#undef TARGET_SET_CURRENT_FUNCTION +#define TARGET_SET_CURRENT_FUNCTION arc64_set_current_function + +#undef TARGET_ATTRIBUTE_TABLE +#define TARGET_ATTRIBUTE_TABLE arc64_attribute_table + +#undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS +#define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arc64_allocate_stack_slots_for_args + +#undef TARGET_WARN_FUNC_RETURN +#define TARGET_WARN_FUNC_RETURN arc64_warn_func_return + +#undef TARGET_CAN_FOLLOW_JUMP +#define TARGET_CAN_FOLLOW_JUMP arc64_can_follow_jump + +#undef TARGET_SCHED_ISSUE_RATE +#define TARGET_SCHED_ISSUE_RATE arc64_sched_issue_rate + +struct gcc_target targetm = TARGET_INITIALIZER; + +#include "gt-arc64.h" diff --git a/gcc/config/arc64/arc64.h b/gcc/config/arc64/arc64.h new file mode 100644 index 0000000000000..e95bf2037251a --- /dev/null +++ b/gcc/config/arc64/arc64.h @@ -0,0 +1,736 @@ +/* Machine description for ARC64 architecture. + Copyright (C) 2021 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +#ifndef GCC_ARC64_H +#define GCC_ARC64_H + +/* Bits are always numbered from the LSBit. */ +#define BITS_BIG_ENDIAN 0 + +/* Define this if most significant byte of a word is the lowest numbered. */ +#define BYTES_BIG_ENDIAN 0 + +/* Define this if most significant word of a multiword number is the lowest + numbered. */ +#define WORDS_BIG_ENDIAN 0 + +/* Is the 64bit or 32bit variant of the CPU used? */ +#define TARGET_64BIT arc64_target_64bit + +/* Determine TARGET_ARCH64 in all possible cases. */ +#ifdef IN_LIBGCC2 +#if defined(__ARC64_ARCH64__) +#define TARGET_ARCH64 1 +#else +#define TARGET_ARCH64 0 +#endif +#else /* not IN_LIBGCC2 */ +#define TARGET_ARCH64 TARGET_64BIT +#endif + +#define MAX_BITS_PER_WORD 64 + +/* Width of a word, in units (bytes). */ +#define UNITS_PER_WORD (TARGET_ARCH64 ? 8 : 4) +#ifndef IN_LIBGCC2 +#define MIN_UNITS_PER_WORD 4 +#endif + +/* Width of a fp register, in bytes. */ +#define UNITS_PER_FP_REG ((arc64_fp_model == 2) ? 8 : 4) + +/* Maximum number of registers that can appear in a valid memory + address. N.B. The ld insn allows 2, but the st insn only allows + 1. */ +#define MAX_REGS_PER_ADDRESS 2 + +/* Addressing modes. */ +#define HAVE_PRE_INCREMENT 1 +#define HAVE_PRE_DECREMENT 1 +#define HAVE_POST_INCREMENT 1 +#define HAVE_POST_DECREMENT 1 +#define HAVE_PRE_MODIFY_DISP 1 +#define HAVE_POST_MODIFY_DISP 1 +#define HAVE_PRE_MODIFY_REG 1 +#define HAVE_POST_MODIFY_REG 1 + +/* The number of registers used for parameter passing. Local to this + file. */ +#define MAX_ARC64_PARM_REGS 8 + +/* 1 if N is a possible register number for function argument + passing. */ +/* Hard floats: r0-r7, and f0-f7. */ +#define FUNCTION_ARG_REGNO_P(N) \ + (IN_RANGE ((N), R0_REGNUM, R7_REGNUM) \ + || (ARC64_HAS_FP_BASE && IN_RANGE ((N), F0_REGNUM, F7_REGNUM))) + +/* Boundaries. */ +#define PARM_BOUNDARY BITS_PER_WORD +#define STACK_BOUNDARY POINTER_SIZE +#define FUNCTION_BOUNDARY 32 +#define EMPTY_FIELD_BOUNDARY 32 +#define STRUCTURE_SIZE_BOUNDARY 8 + +/* Look at the fundamental type that is used for a bit-field and use + that to impose alignment on the enclosing structure. struct s {int + a:8}; should have same alignment as "int", not "char". */ +#define PCC_BITFIELD_TYPE_MATTERS 1 + +/* Alignments. */ +#define FASTEST_ALIGNMENT BITS_PER_WORD +/* pr64242.c is one interesting test which changing BIGGEST_ALIGNMENT triggers + errors. */ +#define BIGGEST_ALIGNMENT BITS_PER_WORD +#define ARC64_EXPAND_ALIGNMENT(COND, EXP, ALIGN) \ + (((COND) && ((ALIGN) < FASTEST_ALIGNMENT) \ + && (TREE_CODE (EXP) == ARRAY_TYPE)) ? FASTEST_ALIGNMENT : (ALIGN)) + +/* Align global data. */ +#define DATA_ALIGNMENT(EXP, ALIGN) \ + ARC64_EXPAND_ALIGNMENT (!optimize_size, EXP, ALIGN) + +/* Similarly, make sure that objects on the stack are sensibly + aligned. */ +#define LOCAL_ALIGNMENT(EXP, ALIGN) \ + ARC64_EXPAND_ALIGNMENT (!flag_conserve_stack, EXP, ALIGN) + +/* Set this nonzero if move instructions will actually fail to work + when given unaligned data. */ +#define STRICT_ALIGNMENT (!unaligned_access) + +/* Default unaligned accesses. */ +#ifndef UNALIGNED_ACCESS_DEFAULT +#define UNALIGNED_ACCESS_DEFAULT 0 +#endif + +/* Layout of Source Language Data Types. */ +#define SHORT_TYPE_SIZE 16 +#define INT_TYPE_SIZE 32 +#define LONG_LONG_TYPE_SIZE 64 +#define POINTER_SIZE (TARGET_ARCH64 ? 64 : 32) +#define LONG_TYPE_SIZE POINTER_SIZE + +/* Defined for convenience. */ +#define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT) + +#define FLOAT_TYPE_SIZE 32 +#define DOUBLE_TYPE_SIZE 64 +#define LONG_DOUBLE_TYPE_SIZE 64 + +/* Defined by ABI. */ +#define WCHAR_TYPE "int" +#define WCHAR_TYPE_SIZE 32 + +#define DEFAULT_SIGNED_CHAR 0 + +#undef SIZE_TYPE +#define SIZE_TYPE (POINTER_SIZE == 64 ? "long unsigned int" : "unsigned int") + +#undef PTRDIFF_TYPE +#define PTRDIFF_TYPE (POINTER_SIZE == 64 ? "long int" : "int") + +/* Specify the machine mode that the hardware addresses have. + After generation of rtl, the compiler makes no further distinction + between pointers and any other objects of this machine mode. */ + +#define Pmode word_mode + +/* Mode of a function address in a call instruction (for indexing purposes). */ +#define FUNCTION_MODE Pmode + +#ifdef HAVE_AS_TLS +#define ARC64_TLS_REGNO 1 +#else +#define ARC64_TLS_REGNO 0 +#endif + +/* Register usage: + R0-R3 Parameter/result registers + R4-R7 Parameter registers + R8-R13 Temporary registers + R14-R26 Callee-saved registers + R27 FP (frame pointer) + R28 SP (stack pointer) + R29 ILINK (Interrupt link register) + R30 GP/TP Global pointer, also it is used as thread pointer; + otherwise can be used as a temporary register. + R31 BLINK (return register) + R32-R57 Extension registers + R58 ACC (accumulator) + R59 Reserved + --- Special registers --- + R60 sign-extended 32-bit indicator + R61 Reserved + R62 zero extended 32-bit immediate indicator + R63 PCL (program counter) + --- Floating point registers --- + F0 Parameter/result register + F1-F7 Parameter registers + F8-F13 Temporary registers + F14-F31 Callee-saved registers + -- Fake registers -- + AP Argument pointer + SFP Soft frame pointer + CC Status register. + */ + +/* 1 for registers that are not available for the register + allocator. */ +#define FIXED_REGISTERS \ + { \ + 0, 0, 0, 0, 0, 0, 0, 0, /* R0 - R7 */ \ + 0, 0, 0, 0, 0, 0, 0, 0, /* R8 - R15 */ \ + 0, 0, 0, 0, 0, 0, 0, 0, /* R16 - R23 */ \ + 0, 0, 0, 0, 1, 1, ARC64_TLS_REGNO, 1, /* R24 - R26, FP, SP, ILINK, R30, BLINK */ \ + \ + 1, 1, 1, 1, 1, 1, 1, 1, /* R32 - R39 */ \ + 1, 1, 1, 1, 1, 1, 1, 1, /* R40 - R47 */ \ + 1, 1, 1, 1, 1, 1, 1, 1, /* R48 - R55 */ \ + 1, 1, 1, 1, 1, 1, 1, 1, /* R56, R57, ACCL, R59, Specials */ \ + \ + 1, 1, 1, 1, 1, 1, 1, 1, /* F0 - F7 */ \ + 1, 1, 1, 1, 1, 1, 1, 1, /* F8 - F15 */ \ + 1, 1, 1, 1, 1, 1, 1, 1, /* F16 - F23 */ \ + 1, 1, 1, 1, 1, 1, 1, 1, /* F24 - F31 */ \ + \ + 1, 1, 1, /* AP, SFP, CC */ \ + } + +/* 1 for registers not available across function calls. */ +#define CALL_USED_REGISTERS \ + { \ + 1, 1, 1, 1, 1, 1, 1, 1, /* R0 - R7 */ \ + 1, 1, 1, 1, 1, 1, 0, 0, /* R8 - R15 */ \ + 0, 0, 0, 0, 0, 0, 0, 0, /* R16 - R23 */ \ + 0, 0, 0, 0, 1, 1, 1, 1, /* R24 - R26, FP, SP, ILINK, R30, BLINK */ \ + \ + 1, 1, 1, 1, 1, 1, 1, 1, /* R32 - R39 */ \ + 1, 1, 1, 1, 1, 1, 1, 1, /* R40 - R47 */ \ + 1, 1, 1, 1, 1, 1, 1, 1, /* R48 - R55 */ \ + 1, 1, 1, 1, 1, 1, 1, 1, /* R56, R57, ACCL, R59, Specials */ \ + \ + 1, 1, 1, 1, 1, 1, 1, 1, /* F0 - F7 */ \ + 1, 1, 1, 1, 1, 1, 1, 1, /* F8 - F15 */ \ + 1, 1, 1, 1, 1, 1, 1, 1, /* F16 - F23 */ \ + 1, 1, 1, 1, 1, 1, 1, 1, /* F24 - F31 */ \ + \ + 1, 1, 1, /* AP, SFP, CC */ \ + } + +#define REGISTER_NAMES \ + { \ + "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", \ + "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", \ + "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", \ + "r24", "r25", "r26", "r27", "sp", "ilink", "r30", "blink", \ + "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39", \ + "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47", \ + "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55", \ + "r56", "r57", "r58", "r59", "ximm", "rez", "limm", "pcl", \ + \ + "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", \ + "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15", \ + "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", \ + "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31", \ + "ap", "sfp", "cc", \ + } + +#define ADDITIONAL_REGISTER_NAMES \ + { \ + { "fp", 27 }, \ + { "gp", 30 }, \ + { "acc", 58 }, \ + } + +#define EPILOGUE_USES(REGNO) (arc64_epilogue_uses (REGNO)) + +/* EXIT_IGNORE_STACK should be nonzero if, when returning from a function, + the stack pointer does not matter. This is only true if the function + uses alloca. */ +#define EXIT_IGNORE_STACK (cfun->calls_alloca) + +#define STATIC_CHAIN_REGNUM R11_REGNUM +#define HARD_FRAME_POINTER_REGNUM R27_REGNUM +#define FRAME_POINTER_REGNUM SFP_REGNUM +#define STACK_POINTER_REGNUM SP_REGNUM +#define ARG_POINTER_REGNUM AP_REGNUM +#define FIRST_PSEUDO_REGISTER (CC_REGNUM + 1) + +enum reg_class +{ + NO_REGS, + AC16_REGS, + SIBCALL_REGS, + CORE_REGS, + GENERAL_REGS, + FP_REGS, + ALL_REGS, + LIM_REG_CLASSES +}; + +#define N_REG_CLASSES ((int) LIM_REG_CLASSES) + +#define REG_CLASS_NAMES \ +{ \ + "NO_REGS", \ + "AC16_REGS", \ + "SIBCALL_REGS", \ + "CORE_REGS", \ + "GENERAL_REGS", \ + "FP_REGS", \ + "ALL_REGS" \ +} + +#define REG_CLASS_CONTENTS \ +{ \ + { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, /* NO_REGS */ \ + { 0x0000f00f, 0x00000000, 0x00000000, 0x00000000 }, /* AC16_REGS */ \ + { 0x00001fff, 0x00000000, 0x00000000, 0x00000000 }, /* SIBCALL_REGS */ \ + { 0x0000ffff, 0x00000000, 0x00000000, 0x00000000 }, /* CORE_REGS */ \ + { 0xdfffffff, 0x0fffffff, 0x00000000, 0x00000003 }, /* GENERAL_REGS */ \ + { 0x00000000, 0x00000000, 0xffffffff, 0x00000000 }, /* FP_REGS */ \ + { 0xffffffff, 0xffffffff, 0xffffffff, 0x00000007 }, /* ALL_REGS */ \ +} + +/* A C expression whose value is a register class containing hard + register REGNO. In general there is more that one such class; + choose a class which is "minimal", meaning that no smaller class + also contains the register. */ + +#define REGNO_REG_CLASS(REGNO) arc64_regno_to_regclass[ (REGNO) ] + +/* A C expression that is nonzero if it is OK to rename a hard register FROM to + another hard register TO. */ + +#define HARD_REGNO_RENAME_OK(FROM, TO) arc64_hard_regno_rename_ok (FROM, TO) + +/* The class value for valid base registers. A base register is one used in + an address which is the register value plus a displacement. */ + +#define BASE_REG_CLASS GENERAL_REGS +#define INDEX_REG_CLASS GENERAL_REGS + +/* Definitions for register eliminations. + + This is an array of structures. Each structure initializes one pair + of eliminable registers. The "from" register number is given first, + followed by "to". Eliminations of the same "from" register are listed + in order of preference. + + We have two registers that can be eliminated on the ARC. First, the + argument pointer register can always be eliminated in favor of the stack + pointer register or frame pointer register. Secondly, the frame pointer + register can often be eliminated in favor of the stack pointer register. +*/ + +#define ELIMINABLE_REGS \ + { \ + { ARG_POINTER_REGNUM, STACK_POINTER_REGNUM }, \ + { ARG_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM }, \ + { FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM }, \ + { FRAME_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM } \ + } + +#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \ + (OFFSET) = arc64_initial_elimination_offset (FROM, TO) + +/* RTL generation support. */ +#define INIT_EXPANDERS arc64_init_expanders () + +/* Stack layout; function entry, exit and calling. */ +#define STACK_GROWS_DOWNWARD 1 + +/* Addresses of local variables slots are at negative offsets from the + frame pointer. */ +#define FRAME_GROWS_DOWNWARD 1 + +/* If defined, the maximum amount of space required for outgoing + arguments will be computed and placed into the variable + `crtl->outgoing_args_size'. No space will be pushed onto the stack + for each call; instead, the function prologue should increase the + stack frame size by this amount. */ +#define ACCUMULATE_OUTGOING_ARGS 1 + +/* Offset of first parameter from the argument pointer register + value. */ +#define FIRST_PARM_OFFSET(FNDECL) 0 + +/* Define how to find the value returned by a library function + assuming the value has mode MODE. */ +#define LIBCALL_VALUE(MODE) \ + gen_rtx_REG (MODE, arc64_use_fp_regs (MODE) ? F0_REGNUM : R0_REGNUM) + +/* Tell GCC to use RETURN_IN_MEMORY. */ +#define DEFAULT_PCC_STRUCT_RETURN 0 + +/* To be check: WORD_REGISTER_OPERATIONS, ARC64 has 32bit + opertations. */ +#define WORD_REGISTER_OPERATIONS 1 + +/* Define if loading from memory in MODE, an integral mode narrower than + BITS_PER_WORD will either zero-extend or sign-extend. The value of this + macro should be the code that says which one of the two operations is + implicitly done, or UNKNOWN if none. */ +#define LOAD_EXTEND_OP(MODE) (((MODE) == SImode) ? SIGN_EXTEND : ZERO_EXTEND) + +/* Enable wide bitfield accesses for more efficient bitfield code. */ +#define SLOW_BYTE_ACCESS 1 + +#define NO_FUNCTION_CSE 1 + +/* Conditional info. */ +#define SELECT_CC_MODE(OP, X, Y) arc64_select_cc_mode (OP, X, Y) + +/* Restrictions apply to floating-point comparisons. */ +#define REVERSIBLE_CC_MODE(MODE) ((MODE) != CC_FPUmode && (MODE) != CC_FPUEmode) + +/* Returning. */ +#define INCOMING_RETURN_ADDR_RTX gen_rtx_REG (Pmode, BLINK_REGNUM) + +#define RETURN_ADDR_RTX arc64_return_addr + +/* Define this to be nonzero if shift instructions ignore all but the + low-order few bits. */ +#define SHIFT_COUNT_TRUNCATED 1 + +/* Defines if the CLZ result is undefined or has a useful value. */ +#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \ + ((VALUE) = GET_MODE_SIZE((MODE)) * BITS_PER_UNIT - 1, 2) + +/* Defines if the CTZ result is undefined or has a useful value. */ +#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \ + ((VALUE) = GET_MODE_SIZE((MODE)) * BITS_PER_UNIT - 1, 2) + +/* Function argument passing. */ + +/* Define a data type for recording info about an argument list during + the scan of that argument list. This data type should hold all + necessary information about the function itself and about the args + processed so far, enough to enable macros such as FUNCTION_ARG to + determine where the next arg should go. */ +#define CUMULATIVE_ARGS struct arc64_args +struct arc64_args +{ + /* Number of integer registers used so far. */ + int iregs; + + /* Number of floating-point registers used so far. */ + int fregs; +}; + +/* Initialize a variable CUM of type CUMULATIVE_ARGS + for a call to a function whose data type is FNTYPE. + For a library call, FNTYPE is 0. */ +#define INIT_CUMULATIVE_ARGS(CUM,FNTYPE,LIBNAME,INDIRECT,N_NAMED_ARGS) \ + ((CUM).iregs = 0, (CUM).fregs = 0) + +/* An integer expression for the size in bits of the largest integer machine + mode that should actually be used. We allow pairs of registers. */ +#define MAX_FIXED_MODE_SIZE GET_MODE_BITSIZE (TARGET_ARCH64 ? TImode : DImode) + +/* Maximum bytes moved by a single instruction (load/store pair). */ +#define MOVE_MAX (2*UNITS_PER_WORD) +#define MAX_MOVE_MAX 16 + +/* The base cost overhead of a memcpy call, for MOVE_RATIO and friends. */ +#define ARC64_CALL_RATIO 8 + +/* MOVE_RATIO dictates when we will use the move_by_pieces infrastructure. + move_by_pieces will continually copy the largest safe chunks. So a + 7-byte copy is a 4-byte + 2-byte + byte copy. This proves inefficient + for both size and speed of copy, so we will instead use the "cpymem" + standard name to implement the copy. This logic does not apply when + targeting -mstrict-align, so keep a sensible default in that case. */ +#define MOVE_RATIO(speed) \ + (!STRICT_ALIGNMENT ? 2 : ((speed) ? 15 : ARC64_CALL_RATIO)) + +#ifndef USED_FOR_TARGET +extern const enum reg_class arc64_regno_to_regclass[]; +#endif + +#define SIGNED(X,V) \ + ((unsigned long long) ((X) + (1ULL << (V - 1))) < (1ULL << V)) +#define UNSIGNED(X,V) ((unsigned long long) (X) < (1ULL << V)) +#define VERIFY_SHIFT(X,S) ((X & ((1 << S) - 1)) == 0) + +#define UNSIGNED_INT3(X) (UNSIGNED(X,3)) +#define UNSIGNED_INT5(X) (UNSIGNED(X,5)) +#define UNSIGNED_INT6(X) (UNSIGNED(X,6)) +#define UNSIGNED_INT7(X) (UNSIGNED(X,7)) +#define UNSIGNED_INT8(X) (UNSIGNED(X,8)) +#define UNSIGNED_INT9(X) (UNSIGNED(X,9)) +#define UNSIGNED_INT10(X) (UNSIGNED(X,10)) +#define UNSIGNED_INT12(X) (UNSIGNED(X,12)) +#define UNSIGNED_INT16(X) (UNSIGNED(X,16)) +// TODO: Fix for 32 bit compiler host architecture. +#define UNSIGNED_INT32(X) (UNSIGNED(X,32)) + +#define SIGNED_INT3(X) (SIGNED(X,3)) +#define SIGNED_INT6(X) (SIGNED(X,6)) +#define SIGNED_INT7(X) (SIGNED(X,7)) +#define SIGNED_INT8(X) (SIGNED(X,8)) +#define SIGNED_INT9(X) (SIGNED(X,9)) +#define SIGNED_INT10(X) (SIGNED(X,10)) +#define SIGNED_INT11(X) (SIGNED(X,11)) +#define SIGNED_INT12(X) (SIGNED(X,12)) +#define SIGNED_INT13(X) (SIGNED(X,13)) +#define SIGNED_INT16(X) (SIGNED(X,16)) +#define SIGNED_INT21(X) (SIGNED(X,21)) +#define SIGNED_INT25(X) (SIGNED(X,25)) + +// TODO: Fix for 32 bit compiler host architecture. +#define SIGNED_INT32(X) (SIGNED(X,32)) + +#define UNSIGNED_INT7_SHIFTED(X,S) (VERIFY_SHIFT(X,S) && UNSIGNED_INT6(X >> S)) +#define UNSIGNED_INT8_SHIFTED(X,S) (VERIFY_SHIFT(X,S) && UNSIGNED_INT6(X >> S)) +#define UNSIGNED_INT9_SHIFTED(X,S) (VERIFY_SHIFT(X,S) && UNSIGNED_INT6(X >> S)) + +#define SIGNED_INT13_SHIFTED(X,S) (VERIFY_SHIFT(X,S) && SIGNED_INT12(X >> S)) +#define SIGNED_INT14_SHIFTED(X,S) (VERIFY_SHIFT(X,S) && SIGNED_INT12(X >> S)) +#define SIGNED_INT15_SHIFTED(X,S) (VERIFY_SHIFT(X,S) && SIGNED_INT12(X >> S)) + +#define IS_POWEROF2_P(X) (! ( (X) & ((X) - 1)) && (X)) + +/* These assume that REGNO is a hard or pseudo reg number. + They give nonzero only if REGNO is a hard reg of the suitable class + or a pseudo reg currently allocated to a suitable hard reg. + Since they use reg_renumber, they are safe only once reg_renumber + has been allocated, which happens in local-alloc.c. */ +#define REGNO_OK_FOR_BASE_P(REGNO) \ + (GP_REGNUM_P (REGNO) \ + || ((REGNO) == AP_REGNUM) \ + || ((REGNO) == SFP_REGNUM)) + +#define REGNO_OK_FOR_INDEX_P(REGNO) REGNO_OK_FOR_BASE_P(REGNO) + +/* Return true if regno is FP register. */ +#define FP_REGNUM_P(REGNO) \ + (((unsigned) (REGNO - F0_REGNUM)) <= (F31_REGNUM - F0_REGNUM)) + +#define GP_REGNUM_P(REGNO) \ + (((unsigned) (REGNO - R0_REGNUM)) <= (BLINK_REGNUM - R0_REGNUM)) + +/* Trampolines, used for entering nested functions, are a block of code + followed by two pointers. The sizes here are in bytes. */ +#define TRAMPOLINE_CODE_SIZE \ + ((Pmode == SImode) \ + ? 8 /* ld_s, ld, j_s */ \ + : 16) /* nop, ldl, ldl, j */ +#define TRAMPOLINE_SIZE (TRAMPOLINE_CODE_SIZE + 2 * POINTER_BYTES) +/* Alignment required for a trampoline in bits . */ +#define TRAMPOLINE_ALIGNMENT POINTER_SIZE + +/* Names to predefine in the preprocessor for this target machine. */ +#define TARGET_CPU_CPP_BUILTINS() arc64_cpu_cpp_builtins (pfile) + +/* Dispatch tables. */ +#define JUMP_TABLES_IN_TEXT_SECTION 1 +#define CASE_VECTOR_MODE SImode +#define CASE_VECTOR_PC_RELATIVE 1 +#define ADDR_VEC_ALIGN(VEC_INSN) 0 + +/* Define this macro if it is advisable to hold scalars in registers + in a wider mode than that declared by the program. In such cases, + the value is constrained to be within the bounds of the declared + type, but kept valid in the wider mode. The signedness of the + extension may differ from that of the type. */ +#define PROMOTE_MODE(MODE,UNSIGNEDP,TYPE) \ + if (GET_MODE_CLASS (MODE) == MODE_INT \ + && GET_MODE_SIZE (MODE) < 4) \ + { \ + (MODE) = SImode; \ + } + + +/* A C string constant describing how to begin a comment in the target + assembler language. The compiler assumes that the comment will + end at the end of the line. */ +#define ASM_COMMENT_START "#" + +#define ASM_OUTPUT_ALIGN(FILE,LOG) \ + fprintf(FILE, "\t.align\t%d\n", 1 << (int)LOG) + +/* Output to assembler file text saying following lines + may contain character constants, extra white space, comments, etc. */ +#undef ASM_APP_ON +#define ASM_APP_ON "" + +/* Output to assembler file text saying following lines + no longer contain unusual constructs. */ +#undef ASM_APP_OFF +#define ASM_APP_OFF "" + +/* This is how to output a reference to a symbol_ref / label_ref as + (part of) an operand. To disambiguate from register names like a1 + / a2 / status etc, symbols are preceded by '@'. */ +#define ASM_OUTPUT_SYMBOL_REF(FILE,SYM) \ + ASM_OUTPUT_LABEL_REF ((FILE), XSTR ((SYM), 0)) +#define ASM_OUTPUT_LABEL_REF(FILE,STR) \ + do \ + { \ + fputs ("@", (FILE)); \ + assemble_name ((FILE), (STR)); \ + } \ + while (0) + +#define LOCAL_LABEL_PREFIX "." + +/* This is how to output an element of a PIC case-vector. */ +#define ASM_OUTPUT_ADDR_DIFF_ELT(STREAM, BODY, VALUE, REL) \ + fprintf (STREAM, "\tb\t@%sL%d\n", \ + LOCAL_LABEL_PREFIX, VALUE) + +/* Defined to also emit an .align in elfos.h. We don't want that. */ +#undef ASM_OUTPUT_CASE_LABEL + +/* Section selection. */ + +/* Globalizing directive for a label. */ +#define GLOBAL_ASM_OP "\t.global\t" + +#define TEXT_SECTION_ASM_OP "\t.section\t.text" +#define DATA_SECTION_ASM_OP "\t.section\t.data" + +#define BSS_SECTION_ASM_OP "\t.section\t.bss" +#define SDATA_SECTION_ASM_OP "\t.section\t.sdata" +#define SBSS_SECTION_ASM_OP "\t.section\t.sbss" + +/* Expression whose value is a string, including spacing, containing + the assembler operation to identify the following data as + initialization/termination code. If not defined, GCC will assume + such a section does not exist. */ +#define INIT_SECTION_ASM_OP "\t.section\t.init" +#define FINI_SECTION_ASM_OP "\t.section\t.fini" + +/* All the work done in PROFILE_HOOK, but still required. */ +#undef FUNCTION_PROFILER +#define FUNCTION_PROFILER(STREAM, LABELNO) do { } while (0) + +#define NO_PROFILE_COUNTERS 1 + +/* Tell crtstuff.c we're using ELF. */ +#define OBJECT_FORMAT_ELF + +/* Called by crtstuff.c to make calls to function FUNCTION that are defined in + SECTION_OP, and then to switch back to text section. */ +#undef CRT_CALL_STATIC_FUNCTION +#ifdef __ARC64_ARCH32__ +#define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC) \ + asm (SECTION_OP "\n\t" \ + "mov\tr12,@" USER_LABEL_PREFIX #FUNC "\n\t" \ + "jl\t[r12]\n" \ + TEXT_SECTION_ASM_OP); +#elif (defined __ARC64_ARCH64__) +#define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC) \ + asm (SECTION_OP "\n\t" \ + "addl\tr12,pcl,@" USER_LABEL_PREFIX #FUNC "@pcl\n\t" \ + "jl\t[r12]\n" \ + TEXT_SECTION_ASM_OP); +#endif + +/* ATOMIC options. */ +/* FIXME: is 0 okay or should it be -1 like DEFAULT_arc_mpy_option? */ +/* Default atomic option value. */ +#undef DEFAULT_ARC64_ATOMIC_OPTION +#define DEFAULT_ARC64_ATOMIC_OPTION 1 + +#define ARC64_HAS_ATOMIC_1 (arc64_atomic_option > 0) +#define ARC64_HAS_ATOMIC_2 (arc64_atomic_option > 1) +#define ARC64_HAS_ATOMIC_3 (arc64_atomic_option > 2) + +/* DIVREM options. */ +#undef TARGET_ARC64_DIVREM_DEFAULT +#define TARGET_ARC64_DIVREM_DEFAULT 1 + +/* FP options. */ +#define ARC64_HAS_FP_BASE (arc64_fp_model > 0) +#define ARC64_HAS_FPUH (arc64_fp_model > 0) +#define ARC64_HAS_FPUS (arc64_fp_model > 0) +#define ARC64_HAS_FPUD (arc64_fp_model > 1) + +#define TARGET_HARD_FLOAT ARC64_HAS_FP_BASE + +/* Vector SIMD length. */ +#define ARC64_VFP_32 (arc64_fp_model == 1) +#define ARC64_VFP_64 ((arc64_fp_model == 2) && !TARGET_WIDE_SIMD) +#define ARC64_VFP_128 ((arc64_fp_model == 2) && TARGET_WIDE_SIMD) + +/* IFCVT macros. */ +#define STORE_FLAG_VALUE 1 +#define MAX_CONDITIONAL_EXECUTE 12 +#define BRANCH_COST(speed_p, predictable_p) 10 + +/* DWARF macros. */ +#define DWARF2_DEBUGGING_INFO 1 +/* The mapping from gcc register number to DWARF2 CFA column number. */ +#define DWARF_FRAME_REGNUM(REGNO) DBX_REGISTER_NUMBER(REGNO) +/* DWARF2 CFA column which tracks the return address. */ +#define DWARF_FRAME_RETURN_COLUMN BLINK_REGNUM +/* DWARF registers encodings. */ +#define DBX_REGISTER_NUMBER(REGNO) arc64_dbx_register_number (REGNO) +/* The DWARF 2 CFA column which tracks the return address from a signal handler + context. This value must not correspond to a hard register and must be out + of the range of DWARF_FRAME_REGNUM(). The unwind-dw2.c file is using + DWARF_REG_TO_UNWIND_COLUMN and DWARF_FRAME_REGISTERS macros. The + DWARF_FRAME_REGNUM macro returns no equivalent DWARF register for + AP_REGNUM. Thus, we should be safe using AP_REGNUM. */ +#define DWARF_ALT_FRAME_RETURN_COLUMN AP_REGNUM + +/* Exception Handling support. */ +/* Use R0 through R3 to pass exception handling information. */ +#define EH_RETURN_DATA_REGNO(N) \ + ((N) < 4 ? ((unsigned int) R0_REGNUM + (N)) : INVALID_REGNUM) +#define EH_RETURN_STACKADJ_RTX gen_rtx_REG (Pmode, R4_REGNUM) +#define EH_RETURN_HANDLER_RTX arc64_eh_return_handler_rtx () +#define EH_USES(REGNO) (arc64_eh_uses((REGNO))) + +/* Select a format to encode pointers in exception handling data. */ +#define ASM_PREFERRED_EH_DATA_FORMAT(CODE, GLOBAL) \ + arc64_asm_preferred_eh_data_format ((CODE), (GLOBAL)) + +/* Specs. */ + +/* Support for a compile-time defalt CPU or FPU. */ +#define OPTION_DEFAULT_SPECS \ + { "fpu", "%{!mfpu=*:-mfpu=%(VALUE)}"}, \ + { "cpu", "%{!mcpu=*:-mcpu=%(VALUE)}"} + +#define CPP_SPEC "%(subtarget_cpp_spec)" + +#define EXTRA_SPECS \ + { "subtarget_cpp_spec", SUBTARGET_CPP_SPEC }, \ + SUBTARGET_EXTRA_SPECS + +#undef ASM_SPEC +#define ASM_SPEC \ + "%{mcpu=*:-mcpu=%*}" + +#ifndef SUBTARGET_EXTRA_SPECS +#define SUBTARGET_EXTRA_SPECS +#endif + +#ifndef SUBTARGET_CPP_SPEC +#define SUBTARGET_CPP_SPEC "" +#endif + +#undef ARC64_SUBTARGET_DEFAULT +#define ARC64_SUBTARGET_DEFAULT 0 + +#endif /* GCC_ARC64_H */ diff --git a/gcc/config/arc64/arc64.md b/gcc/config/arc64/arc64.md new file mode 100644 index 0000000000000..1eaee6fb05277 --- /dev/null +++ b/gcc/config/arc64/arc64.md @@ -0,0 +1,3238 @@ +;; Register numbers +(define_constants + [ + (R0_REGNUM 0) + (R1_REGNUM 1) + (R2_REGNUM 2) + (R3_REGNUM 3) + (R4_REGNUM 4) + (R5_REGNUM 5) + (R6_REGNUM 6) + (R7_REGNUM 7) + (R8_REGNUM 8) + (R9_REGNUM 9) + (R10_REGNUM 10) + (R11_REGNUM 11) + (R12_REGNUM 12) + (R13_REGNUM 13) + (R14_REGNUM 14) + (R15_REGNUM 15) + (R16_REGNUM 16) + (R17_REGNUM 17) + (R18_REGNUM 18) + (R19_REGNUM 19) + (R20_REGNUM 20) + (R21_REGNUM 21) + (R22_REGNUM 22) + (R23_REGNUM 23) + (R24_REGNUM 24) + (R25_REGNUM 25) + (R26_REGNUM 26) + (R27_REGNUM 27) + (SP_REGNUM 28) + (ILINK_REGNUM 29) + (R30_REGNUM 30) + (BLINK_REGNUM 31) + (R32_REGNUM 32) + (R33_REGNUM 33) + (R34_REGNUM 34) + (R35_REGNUM 35) + (R36_REGNUM 36) + (R37_REGNUM 37) + (R38_REGNUM 38) + (R39_REGNUM 39) + (R40_REGNUM 40) + (R41_REGNUM 41) + (R42_REGNUM 42) + (R43_REGNUM 43) + (R44_REGNUM 44) + (R45_REGNUM 45) + (R46_REGNUM 46) + (R47_REGNUM 47) + (R48_REGNUM 48) + (R49_REGNUM 49) + (R50_REGNUM 50) + (R51_REGNUM 51) + (R52_REGNUM 52) + (R53_REGNUM 53) + (R54_REGNUM 54) + (R55_REGNUM 55) + (R56_REGNUM 56) + (R57_REGNUM 57) + (R58_REGNUM 58) + (R59_REGNUM 59) + + (R60_REGNUM 60) + (R61_REGNUM 61) + (R62_REGNUM 62) + (R63_REGNUM 63) + + (F0_REGNUM 64) + (F1_REGNUM 65) + (F2_REGNUM 66) + (F3_REGNUM 67) + (F4_REGNUM 68) + (F5_REGNUM 69) + (F6_REGNUM 70) + (F7_REGNUM 71) + (F8_REGNUM 72) + (F9_REGNUM 73) + (F10_REGNUM 74) + (F11_REGNUM 75) + (F12_REGNUM 76) + (F13_REGNUM 77) + (F14_REGNUM 78) + (F15_REGNUM 79) + (F16_REGNUM 80) + (F17_REGNUM 81) + (F18_REGNUM 82) + (F19_REGNUM 83) + (F20_REGNUM 84) + (F21_REGNUM 85) + (F22_REGNUM 86) + (F23_REGNUM 87) + (F24_REGNUM 88) + (F25_REGNUM 89) + (F26_REGNUM 90) + (F27_REGNUM 91) + (F28_REGNUM 92) + (F29_REGNUM 93) + (F30_REGNUM 94) + (F31_REGNUM 95) + + (AP_REGNUM 96) + (SFP_REGNUM 97) + (CC_REGNUM 98) + ] + ) + +(define_c_enum "unspec" + [ + ARC64_UNSPEC_PCREL + ARC64_UNSPEC_GOT + ARC64_UNSPEC_GOT32 + ARC64_UNSPEC_TLS_GD + ARC64_UNSPEC_TLS_IE + ARC64_UNSPEC_TLS_OFF + ARC64_VUNSPEC_BLOCKAGE + + ARC64_VUNSPEC_LR + ARC64_VUNSPEC_SR + ARC64_VUNSPEC_LRL + ARC64_VUNSPEC_SRL + ARC64_VUNSPEC_FLAG + ARC64_VUNSPEC_BRK + ARC64_VUNSPEC_NOP + ARC64_VUNSPEC_TRAP_S + + ARC64_VUNSPEC_EX + ARC64_VUNSPEC_CAS + ARC64_VUNSPEC_SC + ARC64_VUNSPEC_LL + ARC64_VUNSPEC_SYNC + ARC64_VUNSPEC_ATOOPS + ARC64_VUNSPEC_RTIE + + ARC64_UNSPEC_MEMBAR + ARC64_UNSPEC_FLS + ARC64_UNSPEC_COPYSIGN + ARC64_UNSPEC_XORSIGN + ARC64_UNSPEC_ROUND + ARC64_UNSPEC_BTRUNC + ARC64_UNSPEC_CASESI + ARC64_UNSPEC_VECINIT + ARC64_UNSPEC_QMPYH + ARC64_UNSPEC_QMACH + ARC64_UNSPEC_DMPYWH + ARC64_UNSPEC_DMPYWHU + ARC64_UNSPEC_DMACWH + ARC64_UNSPEC_DMACWHU + ARC64_UNSPEC_VPACK4HL + ARC64_UNSPEC_VPACK4HM + ARC64_UNSPEC_VPACK2WL + ARC64_UNSPEC_SWAPL + ARC64_UNSPEC_SWAP + ARC64_UNSPEC_VEC_SHR + ARC64_UNSPEC_VEC_SHL + ARC64_UNSPEC_HEXCH + ARC64_UNSPEC_SEXCH + ARC64_UNSPEC_DEXCH + ARC64_UNSPEC_HUNPKL + ARC64_UNSPEC_SUNPKL + ARC64_UNSPEC_DUNPKL + ARC64_UNSPEC_HUNPKM + ARC64_UNSPEC_SUNPKM + ARC64_UNSPEC_DUNPKM + ARC64_UNSPEC_HPACKL + ARC64_UNSPEC_SPACKL + ARC64_UNSPEC_DPACKL + ARC64_UNSPEC_HPACKM + ARC64_UNSPEC_SPACKM + ARC64_UNSPEC_DPACKM + ARC64_UNSPEC_HBFLYL + ARC64_UNSPEC_SBFLYL + ARC64_UNSPEC_DBFLYL + ARC64_UNSPEC_HBFLYM + ARC64_UNSPEC_SBFLYM + ARC64_UNSPEC_DBFLYM + ARC64_UNSPEC_VFADDSUB + ARC64_UNSPEC_VFSUBADD + ARC64_UNSPEC_VADDSUB + ARC64_UNSPEC_VSUBADD + ]) + +(include "constraints.md") +(include "predicates.md") + +;; ------------------------------------------------------------------- +;; Mode Iterators +;; ------------------------------------------------------------------- + +;; Iterator for General Purpose Integer registers (32- and 64-bit modes) +(define_mode_iterator GPI [SI (DI "TARGET_64BIT")]) + +;; For doubling width of an integer mode +(define_mode_attr DWI [(QI "HI") (HI "SI") (SI "DI") (DI "TI")]) + +;; Iterator for QI and HI modes +(define_mode_iterator SHORT [QI HI]) + +;; Iterator for QI HI and SI modes +(define_mode_iterator EXT [QI HI SI]) + +;; Iterator for all integer modes (up to 64-bit) +(define_mode_iterator ALLI [QI HI SI (DI "TARGET_64BIT")]) +(define_mode_iterator MV_ALLI [QI HI SI (DI "TARGET_64BIT || TARGET_LL64")]) + +;; Iterator for HI SI and DI modes +(define_mode_iterator EPI [HI SI (DI "TARGET_64BIT")]) + +;; Iterator for HI and SI modes +(define_mode_iterator HI_SI [HI SI]) + +;; This mode iterator allows :P to be used for patterns that operate on +;; pointer-sized quantities. Exactly one of the two alternatives will match. +(define_mode_iterator P [(SI "Pmode == SImode") (DI "Pmode == DImode")]) + +;; Iterator for integer modes which map into a pair of registers. +(define_mode_iterator DBLI [DI (TI "TARGET_64BIT")]) + +;; Iterator for General Purpose Floating-point registers (16 -, 32- +;; and 64-bit modes) +(define_mode_iterator GPF_HF [(HF "ARC64_HAS_FPUH") + (SF "ARC64_HAS_FPUS") (DF "ARC64_HAS_FPUD")]) + +;; Iterator for General Purpose Floating-point registers (32- and 64-bit modes) +(define_mode_iterator GPF [(SF "ARC64_HAS_FPUS") (DF "ARC64_HAS_FPUD")]) + +;; Iterator for General Purpose Floating-point registers (16- and 32-bit modes) +(define_mode_iterator HF_SF [(HF "ARC64_HAS_FPUH") (SF "ARC64_HAS_FPUS")]) + +;; All int vectors +(define_mode_iterator VALL [V2HI V4HI V2SI]) + +;; All 64b int vectors +(define_mode_iterator V64I [V4HI V2SI]) + +;; All fp vectors +(define_mode_iterator VALLF [(V2HF "ARC64_VFP_32") + (V4HF "ARC64_VFP_64") (V2SF "ARC64_VFP_64") + (V8HF "ARC64_VFP_128") (V4SF "ARC64_VFP_128") + (V2DF "ARC64_VFP_128")]) + +;; ALl fp vectors up to 64bit +(define_mode_iterator VALLF_64 [(V2HF "ARC64_VFP_32") + (V4HF "ARC64_VFP_64") (V2SF "ARC64_VFP_64")]) + +;; All 128b fp vectos +(define_mode_iterator VALLF_128 [(V8HF "ARC64_VFP_128") (V4SF "ARC64_VFP_128") + (V2DF "ARC64_VFP_128")]) + +;; All 2xfp Vectors +(define_mode_iterator V2xF [(V2HF "ARC64_VFP_32") (V2SF "ARC64_VFP_64") + (V2DF "ARC64_VFP_128")]) + +;; All 4xfp Vectors +(define_mode_iterator V4xF [(V4HF "ARC64_VFP_64") (V4SF "ARC64_VFP_128")]) + +;; All 2xreg wide vectors +;; All 2xfp Vectors +(define_mode_iterator W2xF [(V2DF "ARC64_VFP_128")]) + +;; All HF and SF vectors +(define_mode_iterator V1FRF [(V2HF "ARC64_VFP_32") + (V4HF "ARC64_VFP_64") (V2SF "ARC64_VFP_64") + (V8HF "ARC64_VFP_128") (V4SF "ARC64_VFP_128")]) + +;; All HF vectors +(define_mode_iterator VxHF [(V2HF "ARC64_VFP_32") + (V4HF "ARC64_VFP_64") + (V8HF "ARC64_VFP_128")]) + +;; ------------------------------------------------------------------- +;; Code Iterators +;; ------------------------------------------------------------------- + +;; Code iterator for sign/zero extension +(define_code_iterator ANY_EXTEND [sign_extend zero_extend]) + +;; This code iterator allows the shifts supported in arithmetic instructions +(define_code_iterator ASHIFT [ashift ashiftrt lshiftrt]) + +;; Only logical shifts +(define_code_iterator LSHIFT [ashift lshiftrt]) + +;; Iterates over the SETcc instructions +(define_code_iterator SETCC [eq ne gt lt ge le ltu geu]) +(define_code_iterator ALLCC [eq ne gt lt ge le ltu geu gtu leu]) + +;; Three operand arithmetic operations +(define_code_iterator ARITH [plus minus mult]) +(define_code_iterator ADDSUB [plus minus] ) + +;; Three operand logic operations +(define_code_iterator LOGIC [and ior xor smin smax]) + +;; Two operand logic operations +(define_code_iterator NOT_ABS [not abs]) + +;; Two operand logic operations extended, used for zero_extend +;; patterns +(define_code_iterator LOP2EX [not abs neg]) + +;; Min/Max iterator +(define_code_iterator MINMAX [smin smax]) + +;; Three operand floating point arithmetic instructions +(define_code_iterator DOPF [plus minus mult div smin smax]) + +;; Vector operations +(define_code_iterator VOPS [plus minus mult div]) + +;; Comutative VF operations +(define_code_iterator VCOP [plus mult]) + +;; Emulated 1 operand vector operations +(define_code_iterator ABS_NEG [abs neg]) + +;; Code iterator for unary negate and bitwise complement. +(define_code_iterator NEG_NOT [neg not]) + +;; Code iterator for bit logic ops. +(define_code_iterator BIT [ior xor]) + +;; Code iterator for div/mod ops. +(define_code_iterator DIVREM [div udiv mod umod]) + +;; Comutative operations +(define_code_iterator COMMUTATIVE [and ior xor]) +(define_code_iterator COMMUTATIVEF [plus and ior xor]) + +;; ------------------------------------------------------------------- +;; Mode Attributes +;; ------------------------------------------------------------------- + +;; Map rtl mode to ARC mnemonic suffixes used in sign extend +;; instructions. +(define_mode_attr exttab [(QI "b") (HI "h") (SI "w")]) + +;; Map rtl mode to ARC mnemonic suffixes +(define_mode_attr sfxtab [(QI "b") (HI "h") (SI "") (DI "l") + (HF "h") (SF "s") (DF "d") + (V2HI "2h") (V4HI "4h") (V2SI "2") + (V2HF "h") (V4HF "h") (V2SF "s") + (V8HF "h") (V4SF "s") (V2DF "d")]) + +;; Used by FPABS patterns. +(define_mode_attr fptab [(SF "") (DF "l")]) + +;; Same as above but to be used by mov conditional +(define_mode_attr mcctab [(QI "") (HI "") (SI "") (DI "l") + (HF "") (SF "") (DF "l") + (V2HI "") (V4HI "l") (V2SI "l") + (V2HF "") (V4HF "l") (V2SF "l")]) + +(define_mode_attr slfp [(HF "h") (SF "") (DF "l") + (V2HF "") (V4HF "l") (V2SF "l")]) + +(define_mode_attr fmvftab [(HF "s") (SF "s") (DF "d") + (V2HF "s") (V4HF "d") (V2SF "d")]) +(define_mode_attr fmvitab [(HF "i") (SF "i") (DF "l") + (V2HF "i") (V4HF "l") (V2SF "l")]) + +;; To be used by vector exch instructions emitted by reduction +;; patterns. +(define_mode_attr fmextab [(V4HF "s") (V4SF "d")]) + +;; Used to implement cadd{90,270} functions +(define_mode_attr cplxtab [(V2HF "H") + (V4HF "H") + (V2SF "S") + (V8HF "H") + (V4SF "S") + (V2DF "D")]) + +;; Give the number of bits-1 in the mode +(define_mode_attr sizen [(QI "7") (HI "15") (SI "31") (DI "63") + (HF "15") (SF "31") (DF "63")]) + +;; Same like above but without -1 used for fp loads/stores +(define_mode_attr sizef [(HF "16") (SF "32") (DF "64") + (V2HF "32") (V4HF "64") (V2SF "64") + (V8HF "d64") (V4SF "d64") (V2DF "d64")]) + +;; Used to implement predicated sign extension patterns +(define_mode_attr sexsft [(QI "24") (HI "16") (SI "8")]) + +;; Used by float conv patterns. +(define_mode_attr f2tab [(SI "int") (DI "l")]) + +;; Define element mode for each vector mode. +(define_mode_attr VEL [(V2HI "HI") (V4HI "HI") (V2SI "SI") + (V2HF "HF") (V4HF "HF") (V2SF "SF") + (V8HF "HF") (V4SF "SF") (V2DF "DF")]) +(define_mode_attr vel [(V2HI "hi") (V4HI "hi") (V2SI "si") + (V2HF "hf") (V4HF "hf") (V2SF "sf") + (V8HF "hf") (V4SF "sf") (V2DF "df")]) + +;; Define element mode for each double-r mode. +(define_mode_attr REL [(DI "SI") (TI "DI")]) +(define_mode_attr rel [(DI "si") (TI "di")]) + +;; Used by vector extract pattern +(define_mode_attr vextrsz [(V2HI "16") (V4HI "16") (V2SI "32")]) +(define_mode_attr vextrmsk [(V2HI "0x1f") (V4HI "0x3f") (V2SI "0x3f")]) +(define_mode_attr vextrsh [(V2HI "5") (V4HI "6") (V2SI "6")]) + +;; ------------------------------------------------------------------- +;; Code Attributes +;; ------------------------------------------------------------------- +;; Map rtl objects to optab names +(define_code_attr optab [(ashift "ashl") + (ashiftrt "ashr") + (lshiftrt "lshr") + (rotatert "rotr") + (sign_extend "extend") + (zero_extend "zero_extend") + (sign_extract "extv") + (zero_extract "extzv") + (fix "fix") + (unsigned_fix "fixuns") + (float "float") + (unsigned_float "floatuns") + (popcount "popcount") + (and "and") + (ior "ior") + (xor "xor") + (not "one_cmpl") + (neg "neg") + (plus "add") + (minus "sub") + (mult "mul") + (div "div") + (udiv "udiv") + (mod "mod") + (umod "umod") + (ss_plus "qadd") + (us_plus "qadd") + (ss_minus "qsub") + (us_minus "qsub") + (ss_neg "qneg") + (ss_abs "qabs") + (smin "smin") + (smax "smax") + (umin "umin") + (umax "umax") + (eq "eq") + (ne "ne") + (lt "lt") + (ge "ge") + (le "le") + (gt "gt") + (ltu "ltu") + (leu "leu") + (geu "geu") + (gtu "gtu") + (abs "abs") + (sqrt "sqrt")]) + +;; map rtl to ARC's cc-mnemonic names, slightly different than above. +(define_code_attr cctab [(eq "eq") + (ne "ne") + (lt "lt") + (ge "ge") + (le "le") + (gt "gt") + (ltu "lo") + (leu "NA") + (geu "hs") + (gtu "NA")]) + +;; used for inverting predicated SET instructions. +(define_code_attr CCTAB [(eq "EQ") + (ne "NE") + (lt "LT") + (ge "GE") + (le "LE") + (gt "GT") + (ltu "LTU") + (leu "NA") + (geu "GEU") + (gtu "NA")]) + +;; Sign- or zero-extend data-op +(define_code_attr su [(sign_extend "s") (zero_extend "u")]) + +;; Optab prefix for sign/zero-extending operations +(define_code_attr su_optab [(sign_extend "") (zero_extend "u")]) + +;; Map rtl objects to arc instuction names +(define_code_attr mntab [(abs "abs") + (not "not") + (neg "neg") + (ashift "asl") + (ashiftrt "asr") + (sign_extend "sex") + (zero_extend "ext") + (div "div") + (udiv "divu") + (mult "mul") + (mod "rem") + (umod "remu") + (lshiftrt "lsr") + (and "and") + (ior "or") + (xor "xor") + (plus "add") + (minus "sub") + (smax "max") + (smin "min")]) + +;; Map rtl objects to arc's bit operation instructions +(define_code_attr bit_optab [(ior "bset") + (xor "bxor")]) + +;; ------------------------------------------------------------------- +;; Int Iterators. +;; ------------------------------------------------------------------- +(define_int_iterator PERMUTED [ARC64_UNSPEC_DUNPKL + ARC64_UNSPEC_DUNPKM + ARC64_UNSPEC_DPACKL + ARC64_UNSPEC_DPACKM + ARC64_UNSPEC_DBFLYL + ARC64_UNSPEC_DBFLYM]) +(define_int_iterator PERMUTES [ARC64_UNSPEC_SUNPKL + ARC64_UNSPEC_SUNPKM + ARC64_UNSPEC_SPACKL + ARC64_UNSPEC_SPACKM + ARC64_UNSPEC_SBFLYL + ARC64_UNSPEC_SBFLYM]) +(define_int_iterator PERMUTEH [ARC64_UNSPEC_HUNPKL + ARC64_UNSPEC_HUNPKM + ARC64_UNSPEC_HPACKL + ARC64_UNSPEC_HPACKM + ARC64_UNSPEC_HBFLYL + ARC64_UNSPEC_HBFLYM]) + +;; ------------------------------------------------------------------- +;; Int Iterators Attributes. +;; ------------------------------------------------------------------- +(define_int_attr perm_pat [(ARC64_UNSPEC_HUNPKL "unpkl") + (ARC64_UNSPEC_SUNPKL "unpkl") + (ARC64_UNSPEC_DUNPKL "unpkl") + (ARC64_UNSPEC_HUNPKM "unpkm") + (ARC64_UNSPEC_SUNPKM "unpkm") + (ARC64_UNSPEC_DUNPKM "unpkm") + (ARC64_UNSPEC_HPACKL "packl") + (ARC64_UNSPEC_SPACKL "packl") + (ARC64_UNSPEC_DPACKL "packl") + (ARC64_UNSPEC_HPACKM "packm") + (ARC64_UNSPEC_SPACKM "packm") + (ARC64_UNSPEC_DPACKM "packm") + (ARC64_UNSPEC_HBFLYL "bflyl") + (ARC64_UNSPEC_SBFLYL "bflyl") + (ARC64_UNSPEC_DBFLYL "bflyl") + (ARC64_UNSPEC_HBFLYM "bflym") + (ARC64_UNSPEC_SBFLYM "bflym") + (ARC64_UNSPEC_DBFLYM "bflym")]) + +;; ------------------------------------------------------------------- +;; Instruction types and attributes +;; ------------------------------------------------------------------- + +;; What is the insn_cost for this insn? The target hook can still +;; override this. For optimizing for size the "length" attribute is +;; used instead. +(define_attr "cost" "" (const_int 0)) + +(define_attr "type" "abs, adc, adcl, add, addhl, addl, and, andl, asl, +asll, asr, asrl, atldlop, atldop, bbit, bclr, bi, bic, bl, block, +bmsk, branch, branchcc, brcc, brk, bset, bsetl, btst, bxor, bxorl, +cmp, dbnz, div, divl, dmb, dmpywh, ex, ext, fadd, fcmp, fd2s, fdiv, +ffs, fh2s, flag, fls, fmadd, fmax, fmin, fmov, fmsub, fmul, fnmadd, +fnmsub, fp2int, fp2uint, frnd, fs2d, fs2h, fsgnj, fsgnjn, fsgnjx, +fsqrt, fsub, int2fp, jl, jump, ld, llock, lr, lsr, lsrl, mac, max, +maxl, min, minl, mod, modl, move, movecc, mpy, mpyl, neg, nop, norm, +normh, norml, not, notl, or, orl, qmach, qmpyh, return, rol, ror, +rtie, sbc, sbcl, scond, setcc, sex, sr, st, sub, subl, swap, swape, +swapel, swapl, sync, trap, tst, udiv, udivl, uint2fp, umod, umodl, +unknown, vadd, vaddsub, vfadd, vfaddsub, vfbflyl, vfbflym, vfdiv, +vfexch, vfext, vfins, vfmul, vfpackl, vfpackm, vfrep, vfsub, vfsubadd, +vfunpkl, vfunpkm, vmac2h, vmpy2h, vpack, vsub, vsubadd, xbfu, xor, +xorl" + (const_string "unknown")) + +(define_attr "iscompact" "yes,no,maybe" (const_string "no")) + +(define_attr "predicable" "yes,no" (const_string "no")) + +(define_attr "length" "" + (cond + [(eq_attr "iscompact" "yes") + (const_int 2) + + (eq_attr "type" "ld") + (if_then_else + (match_operand 1 "limm_ldst_operand" "") + (const_int 8) (const_int 4)) + + (eq_attr "type" "st") + (if_then_else + (ior (match_operand 0 "limm_ldst_operand" "") + (and (not (match_operand 1 "S06S0_immediate_operand" "")) + (match_operand 1 "immediate_operand" ""))) + (const_int 8) (const_int 4)) + + (eq_attr "type" "bl") + (if_then_else + (ior (match_operand 0 "plt34_symbol_p" "") + (match_operand 1 "plt34_symbol_p" "")) + (const_int 6) (const_int 4)) + + (eq_attr "iscompact" "maybe") + (cond + [(match_test "GET_CODE (PATTERN (insn)) == COND_EXEC") + (const_int 4) + + (eq_attr "type" "and") + (const_int 2) + + (eq_attr "type" "or") + (const_int 2) + + (match_operand:DI 0 "" "") + (const_int 4) + ] + (const_int 2)) + ] + (const_int 8))) + +;; Select various CPU features. +(define_attr "cpu_facility" "std,cd,ncd" + (const_string "std")) + +(define_attr "enabled" "no,yes" + (cond [(and (eq_attr "cpu_facility" "cd") + (not (match_test ("TARGET_CODE_DENSITY")))) + (const_string "no") + (and (eq_attr "cpu_facility" "ncd") + (match_test ("TARGET_CODE_DENSITY"))) + (const_string "no") + ] + (const_string "yes"))) + +;; ------------------------------------------------------------------- +;; Delay slots +;; ------------------------------------------------------------------- + +;; Define what can go in a delay slot, generic. +(define_attr "slottable" "false,true" + (cond + [(eq_attr "type" "jump,branch,jl,bl,bi,branchcc,dbnz,return,bbit,brcc") + (const_string "false") + + (eq_attr "length" "2,4") + (const_string "true") + ] + (const_string "false"))) + +;; Define what can go in a call delay slot. +(define_attr "call_slottable" "false,true" + (cond + [(eq_attr "slottable" "false") + (const_string "false") + + (match_test "regno_clobbered_p (BLINK_REGNUM, insn, Pmode, 1)") + (const_string "false") + ] + (const_string "true"))) + +;; Calls delay slots +(define_delay (and (eq_attr "type" "jl,bl,return") + (eq_attr "length" "2,4,8")) + [(eq_attr "call_slottable" "true") (nil) (nil)]) + +;; Jumps delay slots +(define_delay (ior (eq_attr "type" "jump,branch,branchcc,dbnz,bbit") +;; Accordingly to PRM jumps with LIMM and delay slots are illegal. + (and (eq_attr "type" "brcc") + (eq_attr "length" "4,12"))) + [(eq_attr "slottable" "true") (nil) (nil)]) + +;; Is there an instruction that we are actually putting into the delay +;; slot? N.B. Until after delay slot filler consider full insn size. +;; This is required for computing a correct loop body size. +(define_attr "delay_slot_filled" "no,yes" + (cond [(match_test "!crtl->dbr_scheduled_p") + (const_string "yes") + (match_test "NEXT_INSN (PREV_INSN (insn)) == insn") + (const_string "no") + (match_test "JUMP_P (insn) + && INSN_ANNULLED_BRANCH_P (insn) + && !INSN_FROM_TARGET_P (NEXT_INSN (insn))") + (const_string "no")] + (const_string "yes"))) + +(define_attr "delay_slot_length" "" + (cond [(match_test "NEXT_INSN (PREV_INSN (insn)) == insn") + (const_int 0)] + (symbol_ref "get_attr_length (NEXT_INSN (PREV_INSN (insn))) + - get_attr_length (insn)"))) + +;; ------------------------------------------------------------------- +;; Pipeline descriptions and scheduling +;; ------------------------------------------------------------------- + +(include "hs6x.md") + +;; ------------------------------------------------------------------- +;; Moves +;; ------------------------------------------------------------------- + +(define_expand "mov" + [(set (match_operand:MV_ALLI 0 "nonimmediate_operand") + (match_operand:MV_ALLI 1 "general_operand"))] + "" + " + if (arc64_prepare_move_operands (operands[0], operands[1], mode)) + DONE; + " + ) + +(define_expand "movti" + [(set (match_operand:TI 0 "nonimmediate_operand") + (match_operand:TI 1 "general_operand"))] + "TARGET_WIDE_LDST" + { + if (CONSTANT_P (operands[1])) + { + emit_move_insn (gen_lowpart (DImode, operands[0]), + gen_lowpart (DImode, operands[1])); + emit_move_insn (gen_highpart (DImode, operands[0]), + gen_highpart_mode (DImode, TImode, operands[1])); + DONE; + } + else if (!register_operand (operands[0], TImode) + && !register_operand (operands[1], TImode)) + operands[1] = force_reg (TImode, operands[1]); + arc64_prepare_move_operands (operands[0], operands[1], TImode); + DONE; + + }) + +;; We use movsf for soft and hard floats. +(define_expand "movsf" + [(set (match_operand:SF 0 "nonimmediate_operand" "") + (match_operand:SF 1 "general_operand"))] + "" + { + if (arc64_prepare_move_operands (operands[0], operands[1], SFmode)) + DONE; + }) + +(define_expand "movhf" + [(set (match_operand:HF 0 "nonimmediate_operand" "") + (match_operand:HF 1 "general_operand"))] + "ARC64_HAS_FPUH" + { + if (arc64_prepare_move_operands (operands[0], operands[1], HFmode)) + DONE; + }) + +(define_expand "movdf" + [(set (match_operand:DF 0 "nonimmediate_operand" "") + (match_operand:DF 1 "general_operand"))] + "ARC64_HAS_FPUD" + { + if (arc64_prepare_move_operands (operands[0], operands[1], DFmode)) + DONE; + }) + +;; mov<.f> b, c +;; mov<.f> b, s12 +;; mov_s b, u8 +;; mov_s g, h +;; mov_s h, s3 +;; +;; ld a, [b, s9] +;; ld a, [b, c] +;; ld a, [limm ] +;; +;; ldb_s a, [b, c] +;; ldb_s c, [b, u5] +;; +;; st c , [b , s9] +;; st limm, [b , s9] +;; stb_s b , [sp, u7] +;; stb_s c , [b , u5] +(define_insn "*arc64_movqi" + [(set + (match_operand:QI 0 "arc64_dest_operand" "=qh, q, r, q,Ustms,Ustor,Ucnst, r,Ustor") + (match_operand:QI 1 "general_operand" " qhS03MV,U08S0,ri,Uldms, q,S06S0, i, m, r")) + ] + ; in general, at least one of the operands must be a register + "register_operand (operands[0], QImode) + || register_operand (operands[1], QImode) + /* this is to match 'stb w6, [limm]' (S06S0 is the w6). */ + || (satisfies_constraint_S06S0 (operands[1]) + && memory_operand (operands[0], QImode)) + /* writing a byte into memory using limm variant. */ + || (immediate_operand (operands[1], QImode) + && memory_operand (operands[0], QImode))" + "@ + mov_s\\t%0,%1 + mov_s\\t%0,%1 + mov\\t%0,%1 + ldb_s\\t%0,%1 + stb_s\\t%1,%0 + stb%U0\\t%1,%0 + stb%U0\\t%1,%0 + ldb%U1\\t%0,%1 + stb%U0\\t%1,%0" + [(set_attr "type" "move,move,move,ld,st,st,st,ld,st") + (set_attr "length" "2,2,4,2,2,*,8,*,*")] +) + +(define_insn "*arc64_movhi" + [(set + (match_operand:HI 0 "arc64_dest_operand" "=qh,r, q, r,h,r, q,Ustms,Ustw6,Ucnst, r,Ustor") + (match_operand:HI 1 "general_operand" "qhS03MV,r,U08S0,S12S0,i,i,Uldms, q,S06S0, i, m, r")) + ] + "register_operand (operands[0], HImode) + || register_operand (operands[1], HImode) + || (satisfies_constraint_S06S0 (operands[1]) + && memory_operand (operands[0], HImode)) + || (CONST_INT_P (operands[1]) + && satisfies_constraint_Ucnst (operands[0]))" + "@ + mov_s\\t%0,%1 + mov\\t%0,%1 + mov_s\\t%0,%1 + mov\\t%0,%1 + mov_s\\t%0,%1 + mov\\t%0,%1 + ldh_s\\t%0,%1 + sth_s\\t%1,%0 + sth%U0\\t%1,%0 + sth%U0\\t%1,%0 + ldh%U1\\t%0,%1 + sth%U0\\t%1,%0" + [(set_attr "type" "move,move,move,move,move,move,ld,st,st,st,ld,st") + (set_attr "length" "2,4,2,4,6,8,2,2,*,8,*,*")] +) + +(define_insn "*arc64_movsi" + [(set + (match_operand:SI 0 "arc64_dest_operand" "=qh,r, q, r, r,h,r, q,Ustms,Ustor,Ucnst, r,Ustor") + (match_operand:SI 1 "arc64_movl_operand" "qhS03MV,r,U08S0,S12S0,SyPic,i,i,Uldms, q,S06S0, i, m, r")) + ] + "register_operand (operands[0], SImode) + || register_operand (operands[1], SImode) + || (satisfies_constraint_S06S0 (operands[1]) + && memory_operand (operands[0], SImode)) + || (CONST_INT_P (operands[1]) + && satisfies_constraint_Ucnst (operands[0]))" + "@ + mov_s\\t%0,%1 + mov\\t%0,%1 + mov_s\\t%0,%1 + mov\\t%0,%1 + add\\t%0,pcl,%1 + mov_s\\t%0,%1 + mov\\t%0,%1 + ld_s\\t%0,%1 + st_s\\t%1,%0 + st%U0\\t%1,%0 + st%U0\\t%1,%0 + ld%U1\\t%0,%1 + st%U0\\t%1,%0" + [(set_attr "type" "move,move,move,move,add,move,move,ld,st,st,st,ld,st") + (set_attr "length" "2,4,2,4,8,6,8,2,2,*,8,*,*")] +) + +(define_insn "*mov_cmp0" + [(set (reg:CC_ZN CC_REGNUM) + (compare:CC_ZN (match_operand:ALLI 1 "nonmemory_operand" "S12S0r,S32S0") + (const_int 0))) + (set (match_operand:ALLI 0 "register_operand" "=r,r") (match_dup 1))] + "" + "mov.f\\t%0,%1" + [(set_attr "type" "move") + (set_attr "length" "4,8")]) + +;; Softcore float move. +(define_insn "*movsf_softfp" + [(set (match_operand:SF 0 "arc64_dest_operand" "=qh,r,qh,r, q,Ustms,r,Ustor") + (match_operand:SF 1 "general_operand" "qhZ,r, E,E,Uldms, q,m,r")) + ] + "!ARC64_HAS_FP_BASE + && (register_operand (operands[0], SFmode) + || register_operand (operands[1], SFmode))" + "@ + mov_s\\t%0,%1 + mov\\t%0,%1 + mov_s\\t%0,%1 + mov\\t%0,%1 + ld_s\\t%0,%1 + st_s\\t%1,%0 + ld%U1\\t%0,%1 + st%U0\\t%1,%0" + [(set_attr "type" "move,move,move,move,ld,st,ld,st") + (set_attr "length" "2,4,6,8,2,2,*,*")]) + +;; For a fp move I use FSMOV. instruction. However, we can also +;; use FSSGNJ. +;; FIXME! add short instruction selection +(define_insn "*mov_hardfp" + [(set (match_operand:GPF_HF 0 "arc64_dest_operand" "=w, w,Ufpms,*r,*w,*r,*r,*r,*Ustor") + (match_operand:GPF_HF 1 "arc64_movf_operand" "w,Ufpms, w,*w,*r,*r,*G,*m, *r"))] + "ARC64_HAS_FP_BASE + && (register_operand (operands[0], mode) + || register_operand (operands[1], mode))" + "@ + fmov\\t%0,%1 + fld%U1\\t%0,%1 + fst%U0\\t%1,%0 + fmv2\\t%0,%1 + fmv2\\t%0,%1 + mov\\t%0,%1 + mov\\t%0,%1 + ld%U1\\t%0,%1 + st%U0\\t%1,%0" + [(set_attr "type" "fmov,ld,st,move,move,move,move,ld,st") + (set_attr "length" "4,*,*,4,4,4,8,*,*")]) + +;; move 128bit +(define_insn_and_split "*arc64_movti" + [(set (match_operand:TI 0 "arc64_dest_operand" "=r,r,Ustor") + (match_operand:TI 1 "nonimmediate_operand" "r,m,r"))] + "TARGET_WIDE_LDST + && (register_operand (operands[0], TImode) + || register_operand (operands[1], TImode))" + "@ + # + lddl%U1\\t%0,%1 + stdl%U0\\t%1,%0" + "&& reload_completed + && arc64_split_double_move_p (operands, TImode)" + [(const_int 0)] + { + arc64_split_double_move (operands, TImode); + DONE; + } + [(set_attr "type" "move,ld,st") + (set_attr "length" "8,*,*")]) +;; +;; Short insns: movl_s g,h; movl_s b,u8 +;; Long insns: movl, stl, ldl +;; +(define_insn "*arc64_movdi" + [(set (match_operand:DI 0 "arc64_dest_operand" "=qh, q,r, r, r, r,Ucnst, r,r,Ustk<,Ustor") + (match_operand:DI 1 "arc64_movl_operand" "qh,U08S0,r,S12S0,S32S0SymMV,SyPic,S32S0,Ustk>,m, r, r"))] + "TARGET_64BIT + && (register_operand (operands[0], DImode) + || register_operand (operands[1], DImode) + || (CONST_INT_P (operands[1]) + && satisfies_constraint_Ucnst (operands[0])))" + "@ + movl_s\\t%0,%1 + movl_s\\t%0,%1 + movl\\t%0,%1 + movl\\t%0,%1 + movl\\t%0,%1 + addl\\t%0,pcl,%1 + stl%U0\\t%1,%0 + popl_s\\t%0 + ldl%U1\\t%0,%1 + pushl_s\\t%1 + stl%U0\\t%1,%0" + [(set_attr "type" "move,move,move,move,move,addl,st,ld,ld,st,st") + (set_attr "length" "2,2,4,4,8,8,8,2,*,2,*")] +) + +;; Hi/Low moves for constant and symbol loading. + +(define_insn "*movdi_high" + [(set (match_operand:DI 0 "register_operand" "= r, qh, r,r") + (high:DI + (match_operand:DI 1 "arc64_immediate_or_pic" "S12S0,SymIm,SymIm,SyPic")))] + "" + "@ + movhl\\t%0,%H1 + movhl_s\\t%0,%H1 + movhl\\t%0,%H1 + addhl\\t%0,pcl,%H1" + [(set_attr "type" "move") + (set_attr "length" "4,6,8,8")]) + +;; The immediates are already trimmed to fit the 32 bit limm field. +(define_insn "*movh_shift" + [(set (match_operand:DI 0 "register_operand" "= r, qh, r") + (ashift:DI (match_operand:DI 1 "nonmemory_operand" "rS12S0,S32S0,S32S0") + (const_int 32)))] + "" + "@ + movhl\\t%0,%1 + movhl_s\\t%0,%1 + movhl\\t%0,%1" + [(set_attr "type" "move") + (set_attr "length" "4,6,8")]) + +;; N.B. All immediates needs to be unsiged to endup at most in u32. +(define_insn "*movdi_lo_sum_iori" + [(set (match_operand:DI 0 "register_operand" "=q, r, h, r") + (lo_sum:DI (match_operand:DI 1 "register_operand" "0, 0, 0, r") + (match_operand:DI 2 "immediate_operand" "q,U10S0,SymIm,SymIm")))] + "" + "@ + orl%?\\t%0,%1,%2 + orl%?\\t%0,%1,%L2 + orl%?\\t%0,%1,%L2 + orl%?\\t%0,%1,%L2" + [(set_attr "type" "or") + (set_attr "iscompact" "yes,no,yes,no") + (set_attr "length" "2,4,6,8")]) + +(define_insn "*adddi_high" + [(set (match_operand:DI 0 "register_operand" "= qh, r, r,r, r") + (plus:DI (match_operand:DI 1 "register_operand" " 0, 0, r,r, r") + (high:DI + (match_operand:DI 2 "nonmemory_operand" "S32S0,S12S0,U06S0,r,S32S0"))))] + "" + "@ + addhl_s\\t%0,%1,%2 + addhl\\t%0,%1,%2 + addhl\\t%0,%1,%2 + addhl\\t%0,%1,%2 + addhl\\t%0,%1,%2" + [(set_attr "type" "addhl") + (set_attr "iscompact" "yes,no,no,no,no") + (set_attr "length" "6,4,4,4,8")]) + +; conditional execution patterns +(define_insn "*mov_ce" + [(cond_exec + (match_operator 3 "arc64_comparison_operator" + [(match_operand 2 "cc_register" "") (const_int 0)]) + (set (match_operand:ALLI 0 "register_operand" "= r,r") + (match_operand:ALLI 1 "nonmemory_operand" "rU06S0,S32S0")))] + "" + "mov.%m3\\t%0,%1" + [(set_attr "type" "move") + (set_attr "length" "4,8")]) + +(define_insn "*mov_ce" + [(cond_exec + (match_operator 3 "arc64_comparison_operator" + [(match_operand 2 "cc_register" "") (const_int 0)]) + (set (match_operand:GPF_HF 0 "register_operand" "=w,*r,*r") + (match_operand:GPF_HF 1 "nonmemory_operand" "w,*r,*E")))] + "" + "@ + fmov.%m3\\t%0,%1 + mov.%m3\\t%0,%1 + mov.%m3\\t%0,%1" + [(set_attr "type" "fmov,move,move") + (set_attr "length" "4,4,8")]) + +;; 0 is dst +;; 1 is src +;; 2 is size of copy in bytes +;; 3 is alignment + +(define_expand "cpymem" + [(match_operand:BLK 0 "memory_operand") + (match_operand:BLK 1 "memory_operand") + (match_operand:P 2 "immediate_operand") + (match_operand:P 3 "immediate_operand")] + "!STRICT_ALIGNMENT" +{ + if (arc64_expand_cpymem (operands)) + DONE; + FAIL; +} +) + +;; ------------------------------------------------------------------- +;; Subroutine calls and sibcalls +;; ------------------------------------------------------------------- + +(define_expand "call" + [(parallel [(call (match_operand 0 "memory_operand") + (match_operand 1 "general_operand")) + (use (match_operand 2 "" "")) + (clobber (reg BLINK_REGNUM))])] + "" + { + arc64_expand_call (NULL_RTX, operands[0], false); + DONE; + } +) + +(define_insn "*call_insn" + [(call (mem:P (match_operand:P 0 "arc64_call_insn_operand" "q,r,BLsym,S12S0,S32S0")) + (match_operand 1 "" "")) + (clobber (reg:P BLINK_REGNUM))] + "" + "@ + jl_s%*\\t[%0] + jl%*\\t[%0] + bl%P0%*\\t%C0 + jl%*\\t%0 + jl%*\\t%0" + [(set_attr "type" "jl,jl,bl,jl,jl") + (set_attr "length" "2,4,*,4,8")]) + +(define_expand "call_value" + [(parallel [(set (match_operand 0 "" "") + (call (match_operand 1 "memory_operand") + (match_operand 2 "general_operand"))) + (use (match_operand 3 "" "")) + (clobber (reg BLINK_REGNUM))])] + "" + " + { + arc64_expand_call (operands[0], operands[1], false); + DONE; + }" +) + +(define_insn "*call_value_insn" + [(set (match_operand 0 "" "") + (call (mem:P (match_operand:P 1 "arc64_call_insn_operand" + "q,r,BLsym,S12S0,S32S0")) + (match_operand 2 "" ""))) + (clobber (reg:P BLINK_REGNUM))] + "" + "@ + jl_s%*\\t[%1] + jl%*\\t[%1] + bl%P1%*\\t%C1 + jl%*\\t%1 + jl%*\\t%1" + [(set_attr "type" "jl,jl,bl,jl,jl") + (set_attr "length" "2,4,*,4,8")]) + +(define_expand "sibcall" + [(parallel [(call (match_operand 0 "memory_operand") + (match_operand 1 "general_operand")) + (return) + (use (match_operand 2 "" ""))])] + "" + { + arc64_expand_call (NULL_RTX, operands[0], true); + DONE; + } + ) + +(define_expand "sibcall_value" + [(parallel [(set (match_operand 0 "" "") + (call (match_operand 1 "memory_operand") + (match_operand 2 "general_operand"))) + (return) + (use (match_operand 3 "" ""))])] + "" + { + arc64_expand_call (operands[0], operands[1], true); + DONE; + } +) + +;FIXME! add short variant for jump +(define_insn "*sibcall_insn" + [(call + (mem:P + (match_operand:P 0 "arc64_call_insn_operand" "Sbreg,BLsym,S12S0,S32S0")) + (match_operand 1 "" "")) + (return)] + "SIBLING_CALL_P (insn)" + "@ + j%*\\t[%0] + b%*\\t%C0 + j%*\\t%0 + j%*\\t%0" + [(set_attr "type" "jump,branch,jump,jump") + (set_attr "length" "4,4,4,8")] +) + +;FIXME! add short variant for jump +(define_insn "*sibcall_value_insn" + [(set (match_operand 0 "" "") + (call + (mem:P + (match_operand:P 1 "arc64_call_insn_operand" "Sbreg,BLsym,S12S0,S32S0")) + (match_operand 2 "" ""))) + (return)] + "SIBLING_CALL_P (insn)" + "@ + j%*\\t[%1] + b%*\\t%C1 + j%*\\t%1 + j%*\\t%1" + [(set_attr "type" "jump,branch,jump,jump") + (set_attr "length" "4,4,4,8")] +) + +; conditional execution patterns +(define_insn "*call_ce" + [(cond_exec + (match_operator 3 "arc64_comparison_operator" + [(match_operand 2 "cc_register" "") (const_int 0)]) + (parallel + [(call (mem:P + (match_operand:P 0 "arc64_call_insn_operand" "r,BLsym,U06S0")) + (match_operand 1 "" "")) + (clobber (reg:P BLINK_REGNUM))]))] + "(arc64_cmodel_var == ARC64_CMODEL_SMALL) + || register_operand (operands[0], Pmode)" + "@ + jl%m3%*\\t[%0] + bl%m3%*\\t%C0 + jl%m3%*\\t%0" + [(set_attr "type" "jl,bl,jl") + (set_attr "length" "4")]) + +(define_insn "*callv_ce" + [(cond_exec + (match_operator 3 "arc64_comparison_operator" + [(match_operand 4 "cc_register" "") (const_int 0)]) + (parallel + [(set (match_operand 0 "" "") + (call (mem:P (match_operand:P 1 "arc64_call_insn_operand" + "r,BLsym,U06S0")) + (match_operand 2 "" ""))) + (clobber (reg:P BLINK_REGNUM))]))] + "(arc64_cmodel_var == ARC64_CMODEL_SMALL) + || register_operand (operands[1], Pmode)" + "@ + jl%m3%*\\t[%1] + bl%m3%*\\t%C1 + jl%m3%*\\t%1" + [(set_attr "type" "jl,bl,jl") + (set_attr "length" "4")]) + +(define_insn "*sibcall_insn_ce" + [(cond_exec + (match_operator 3 "arc64_comparison_operator" + [(match_operand 2 "cc_register" "") (const_int 0)]) + (parallel + [(call (mem:P + (match_operand:P 0 "arc64_call_insn_operand" "Sbreg,BLsym,U06S0")) + (match_operand 1 "" "")) + (return)]))] + "SIBLING_CALL_P (insn) + && ((arc64_cmodel_var == ARC64_CMODEL_SMALL) + || register_operand (operands[0], Pmode))" + "@ + j%m3%*\\t[%0] + b%m3%*\\t%C0 + j%m3%*\\t%0" + [(set_attr "type" "jump,branch,jump") + (set_attr "length" "4")]) + +(define_insn "*sibcall_value_insn_ce" + [(cond_exec + (match_operator 3 "arc64_comparison_operator" + [(match_operand 4 "cc_register" "") (const_int 0)]) + (parallel + [(set (match_operand 0 "" "") + (call + (mem:P + (match_operand:P 1 "arc64_call_insn_operand" "Sbreg,BLsym,U06S0")) + (match_operand 2 "" ""))) + (return)]))] + "SIBLING_CALL_P (insn) + && ((arc64_cmodel_var == ARC64_CMODEL_SMALL) + || register_operand (operands[1], Pmode))" + "@ + j%m3%*\\t[%1] + b%m3%*\\t%C1 + j%m3%*\\t%1" + [(set_attr "type" "jump,branch,jump") + (set_attr "length" "4")]) + +(define_expand "untyped_call" + [(parallel [(call (match_operand 0 "") + (const_int 0)) + (match_operand 1 "") + (match_operand 2 "")])] + "" +{ + int i; + + emit_call_insn (gen_call (operands[0], const0_rtx, const0_rtx)); + + for (i = 0; i < XVECLEN (operands[2], 0); i++) + { + rtx set = XVECEXP (operands[2], 0, i); + emit_move_insn (SET_DEST (set), SET_SRC (set)); + } + + emit_insn (gen_blockage ()); + DONE; +}) + +;; ------------------------------------------------------------------- +;; Jumps and other miscellaneous insns +;; ------------------------------------------------------------------- + +(define_expand "indirect_jump" + [(set (pc) (match_operand 0 "register_operand"))] + "" +{ + operands[0] = force_reg (Pmode, operands[0]); + if (Pmode == SImode) + emit_jump_insn (gen_indirect_jumpsi (operands[0])); + else + emit_jump_insn (gen_indirect_jumpdi (operands[0])); + DONE; +}) + +(define_insn "indirect_jump" + [(set (pc) (match_operand:P 0 "register_operand" "q,r"))] + "" + "j%?%*\\t[%0]" + [(set_attr "type" "jump") + (set_attr "length" "2,4")] +) + +(define_insn "jump" + [(set (pc) (label_ref (match_operand 0 "" "")))] + "" + "b%?%*\\t%l0" + [(set_attr "type" "branch") + (set (attr "length") + (if_then_else + (and (ge (minus (match_dup 0) (pc)) (const_int -512)) + (le (minus (match_dup 0) (pc)) (const_int 506)) + (match_test "!CROSSING_JUMP_P (insn)") + (eq_attr "delay_slot_filled" "no")) + (const_int 2) + (const_int 4)))] +) + +(define_expand "cbranch4" + [(set (pc) (if_then_else + (match_operator 0 "arc64_comparison_operator" + [(match_operand:GPI 1 "nonmemory_operand") + (match_operand:GPI 2 "nonmemory_operand")]) + (label_ref (match_operand 3 "" "")) + (pc)))] + "" + " + operands[1] = arc64_gen_compare_reg (GET_CODE (operands[0]), operands[1], + operands[2]); + operands[2] = const0_rtx; + " + ) + +(define_expand "cbranch4" + [(set (pc) (if_then_else (match_operator 0 "arc64_comparison_operator" + [(match_operand:GPF_HF 1 "register_operand") + (match_operand:GPF_HF 2 "register_operand")]) + (label_ref (match_operand 3 "" "")) + (pc)))] + "ARC64_HAS_FP_BASE" + " + operands[1] = arc64_gen_compare_reg (GET_CODE (operands[0]), operands[1], + operands[2]); + operands[2] = const0_rtx; + " +) + +(define_expand "cbranchcc4" + [(set (pc) (if_then_else + (match_operator 0 "arc64_comparison_operator" + [(match_operand 1 "cc_register") + (match_operand 2 "const0_operand")]) + (label_ref (match_operand 3 "" "")) + (pc)))] + "" + "") + +(define_insn "condjump" + [(set (pc) (if_then_else + (match_operator 0 "arc64_comparison_operator" + [(match_operand 1 "cc_register" "") + (const_int 0)]) + (label_ref (match_operand 2 "" "")) + (pc)))] + "" + "b%m0%?%*\\t%l2" + [(set_attr "type" "branchcc") + (set (attr "length") + (cond + [(eq_attr "delay_slot_filled" "yes") + (const_int 4) + + (and (match_operand 0 "equality_comparison_operator" "") + (and (ge (minus (match_dup 2) (pc)) (const_int -512)) + (le (minus (match_dup 2) (pc)) (const_int 506)))) + (const_int 2) + + (and (match_operand 0 "ccmode_comparison_operator" "") + (and (ge (minus (match_dup 2) (pc)) (const_int -60)) + (le (minus (match_dup 2) (pc)) (const_int 58)))) + (const_int 2)] + (const_int 4)))]) + +(define_expand "prologue" + [(clobber (const_int 0))] + "" + " + arc64_expand_prologue (); + DONE; + " +) + +(define_expand "epilogue" + [(clobber (const_int 0))] + "" + " + arc64_expand_epilogue (false); + DONE; + " +) + +(define_expand "sibcall_epilogue" + [(clobber (const_int 0))] + "" + " + arc64_expand_epilogue (true); + DONE; + " +) + +(define_expand "return" + [(simple_return)] + "arc64_can_use_return_insn_p ()" + "") + +(define_insn "simple_return" + [(simple_return)] + "" + { + return arc64_output_return (); + } + [(set_attr "type" "return") + (set_attr "length" "2")]) + +(define_insn "trap_s" + [(unspec_volatile [(match_operand:SI 0 "immediate_operand" "U06S0")] + ARC64_VUNSPEC_TRAP_S)] + "" + "trap_s\\t%0" + [(set_attr "length" "2") + (set_attr "type" "trap")]) + +(define_insn "trap" + [(trap_if (const_int 1) (const_int 0))] + "" + "trap_s\\t5" + [(set_attr "length" "2") + (set_attr "type" "trap")]) + +(define_insn "nop" + [(const_int 0)] + "" + "nop_s" + [(set_attr "type" "nop") + (set_attr "length" "2")]) + +(define_insn "blockage" + [(unspec_volatile [(const_int 0)] ARC64_VUNSPEC_BLOCKAGE)] + "" + "" + [(set_attr "length" "0") + (set_attr "type" "block")] + ) + +(define_insn "rtie" + [(return) + (unspec_volatile [(const_int 0)] ARC64_VUNSPEC_RTIE)] + "" + "rtie" + [(set_attr "length" "4") + (set_attr "type" "rtie")] + ) + +;; Don't need initialization instructions. +(define_expand "doloop_begin" + [(use (match_operand 0 "" "")) ; loop pseudo + (use (match_operand 1 "" ""))] ; doloop_end pattern + "" + { + FAIL; + } +) + +; operand 0 is the loop count pseudo register +; operand 1 is the label to jump to at the top of the loop +(define_expand "doloop_end" + [(use (match_operand 0 "" "")) ; loop pseudo + (use (match_operand 1 "" ""))] ; doloop_end pattern + "" + { + machine_mode mode = GET_MODE (operands[0]); + if (mode != Pmode) + FAIL; + + operands[0] = force_reg (Pmode, operands[0]); + + if (mode == SImode) + emit_jump_insn (gen_dbnzsi (operands[0], operands[1])); + else + emit_jump_insn (gen_dbnzdi (operands[0], operands[1])); + DONE; + }) + +(define_insn_and_split "dbnz" + [(set (pc) + (if_then_else + (ne (match_operand:P 0 "arc64_dest_operand" "+r,!Ustor") + (const_int 1)) + (label_ref (match_operand 1 "" "")) + (pc))) + (set (match_dup 0) + (plus:P (match_dup 0) + (const_int -1))) + (clobber (match_scratch:P 2 "=X,r"))] + "" + "* +{ + switch (which_alternative) + { + default: + return \"#\"; + + case 0: + switch (get_attr_length (insn)) + { + case 4: + /* This is the normal case. */ + return \"dbnz%*\\t%0,%l1\"; + + case 8: + /* The dbnz is too short, use sub.f/bne instructions. */ + return \"sub.f\\t%0,%0,1\\n\\tbne%*\\t%l1\"; + + default: + gcc_unreachable (); + } + break; + } +}" + "reload_completed && memory_operand (operands[0], Pmode)" + [(set (match_dup 2) (match_dup 0)) + (parallel + [(set (reg:CC_ZN CC_REGNUM) + (compare:CC_ZN (plus:P (match_dup 2) (const_int -1)) + (const_int 0))) + (set (match_dup 2) (plus:P (match_dup 2) (const_int -1)))]) + (set (match_dup 0) (match_dup 2)) + (set (pc) (if_then_else (ne (reg:CC_ZN CC_REGNUM) + (const_int 0)) + (label_ref (match_dup 1)) + (pc)))] + "" + [(set_attr "type" "dbnz") + (set (attr "length") + (cond [(eq_attr "alternative" "1") + (const_int 20) + (and (eq_attr "alternative" "0") + (ge (minus (match_dup 1) (pc)) (const_int -4092)) + (le (minus (match_dup 1) (pc)) + (minus (const_int 4094) + (symbol_ref "get_attr_delay_slot_length (insn)")))) + (const_int 4)] + (const_int 8)))]) + +; conditional execution +(define_insn "*returnt_ce" + [(set (pc) + (if_then_else (match_operator 0 "arc64_comparison_operator" + [(reg CC_REGNUM) (const_int 0)]) + (simple_return) (pc)))] + "" + "j%m0%*\\t[blink]" + [(set_attr "type" "return") + (set_attr "length" "4")]) + +; Jump tables +(define_expand "casesi" + [(match_operand:SI 0 "register_operand" "") ; Index + (match_operand:SI 1 "const_int_operand" "") ; Lower bound + (match_operand:SI 2 "const_int_operand" "") ; Total range + (match_operand 3 "" "") ; Table label + (match_operand 4 "" "")] ; Out of range label + "" + { + arc64_expand_casesi (operands); + DONE; + }) + +(define_insn "casesi_dispatch" + [(set (pc) + (unspec:DI [(match_operand:SI 0 "register_operand" "r,q,r") + (label_ref (match_operand 1 "" "")) + (const_int 0)] + ARC64_UNSPEC_CASESI))] + "" + "@ + bi\\t[%0] + j_s%*\\t[%0] + j%*\\t[%0]" + [(set_attr "type" "bi,jump,jump") + (set_attr "length" "4,2,4") + (set_attr "cpu_facility" "cd,ncd,ncd")]) + +(define_insn "casesi_addaddr" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand:SI 1 "register_operand" "r") + (label_ref (match_operand 2 "" "")) + (const_int 1)] + ARC64_UNSPEC_CASESI))] + "" + "add2\\t%0,%l2,%1" + [(set_attr "type" "add") + (set_attr "length" "8")]) + +(define_insn "casesi_addaddrdi" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(match_operand:SI 1 "register_operand" "r") + (match_operand:DI 2 "register_operand" "r") + (const_int 2)] + ARC64_UNSPEC_CASESI))] + "" + "add2l\\t%0,%2,%1" + [(set_attr "type" "addl") + (set_attr "length" "4")]) + +(define_insn "casesi_dispatchdi" + [(set (pc) (match_operand:DI 0 "register_operand" "q,r")) + (use (label_ref (match_operand 1 "" "")))] + "" + "j%?%*\\t[%0]" + [(set_attr "type" "jump") + (set_attr "length" "2,4")]) + +;; combiner patterns used to match bbit0/1 instructions. +;; Unfortunately, I cannot use splitting for this pattern as the +;; insn length is know very late during compilation process. +(define_insn "*bbit_and" + [(set (pc) + (if_then_else + (match_operator 3 "equality_comparison_operator" + [(and:GPI + (match_operand:GPI 1 "register_operand" "r") + (match_operand 2 "bbitimm_operand" "")) + (const_int 0)]) + (label_ref (match_operand 0 "" "")) + (pc))) + (clobber (reg:CC_ZN CC_REGNUM))] + "!CROSSING_JUMP_P (insn) && (TARGET_BBIT || reload_completed)" + { + operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2]))); + switch (get_attr_length (insn)) + { + case 4: + return (GET_CODE (operands[3]) == EQ + ? \"bbit0%*\\t%1,%2,%l0\" : \"bbit1%*\\t%1,%2,%l0\"); + default: + return \"btst\\t%1,%2\\n\\tb%m3%*\\t%l0\"; + } + } + [(set_attr "type" "bbit") + (set (attr "length") + (if_then_else + (and (ge (minus (match_dup 0) (pc)) (const_int -254)) + (le (minus (match_dup 0) (pc)) + (minus (const_int 248) + (symbol_ref "get_attr_delay_slot_length (insn)")))) + (const_int 4) + (const_int 8)))]) + +;; BBITx instructions need to be generated as late as possible. +;; Hence, we need to postpone it untill 2nd peephole2 step. However, +;; this may need an upstream change. + +;;(define_peephole2 +;; [(set (match_operand 0 "cc_register") +;; (compare:CC_ZN (and:GPI (match_operand:GPI 1 "register_operand" "") +;; (match_operand 2 "bbitimm_operand" "")) +;; (const_int 0))) +;; (set (pc) (if_then_else +;; (match_operator 3 "equality_comparison_operator" +;; [(match_dup 0) (const_int 0)]) +;; (label_ref (match_operand 4 "" "")) +;; (pc)))] +;; "(peephole2_instance == 1) && peep2_reg_dead_p (2, operands[0])" +;; [(parallel +;; [(set (pc) +;; (if_then_else +;; (match_op_dup 3 [(and:GPI (match_dup 1) (match_dup 2)) +;; (const_int 0)]) +;; (label_ref (match_operand 4 "" "")) +;; (pc))) +;; (clobber (reg:CC_ZN CC_REGNUM))])]) + +(define_insn "*bbit_zext" + [(set (pc) + (if_then_else + (match_operator 3 "equality_comparison_operator" + [(zero_extract:GPI + (match_operand:GPI 1 "register_operand" "r") + (const_int 1) + (match_operand:GPI 2 "nonmemory_operand" "ir")) + (const_int 0)]) + (label_ref (match_operand 0 "" "")) + (pc))) + (clobber (reg:CC_ZN CC_REGNUM))] + "!CROSSING_JUMP_P (insn) && (TARGET_BBIT || reload_completed)" + { + switch (get_attr_length (insn)) + { + case 4: + return (GET_CODE (operands[3]) == EQ + ? \"bbit0%*\\t%1,%2,%l0\" : \"bbit1%*\\t%1,%2,%l0\"); + default: + return \"btst\\t%1,%2\\n\\tb%m3%*\\t%l0\"; + } + } + [(set_attr "type" "bbit") + (set (attr "length") + (if_then_else + (and (ge (minus (match_dup 0) (pc)) (const_int -254)) + (le (minus (match_dup 0) (pc)) + (minus (const_int 248) + (symbol_ref "get_attr_delay_slot_length (insn)")))) + (const_int 4) + (const_int 8)))]) + +;;(define_peephole2 +;; [(set (match_operand 0 "cc_register") +;; (compare:CC_ZN (zero_extract:GPI +;; (match_operand:GPI 1 "register_operand" "") +;; (const_int 1) +;; (match_operand:GPI 2 "nonmemory_operand" "")) +;; (const_int 0))) +;; (set (pc) (if_then_else +;; (match_operator 3 "equality_comparison_operator" +;; [(match_dup 0) (const_int 0)]) +;; (label_ref (match_operand 4 "" "")) +;; (pc)))] +;; "(peephole2_instance == 1) && peep2_reg_dead_p (2, operands[0])" +;; [(parallel +;; [(set (pc) +;; (if_then_else +;; (match_op_dup 3 [(zero_extract:GPI +;; (match_dup 1) (const_int 1) (match_dup 2)) +;; (const_int 0)]) +;; (label_ref (match_operand 4 "" "")) +;; (pc))) +;; (clobber (reg:CC_ZN CC_REGNUM))])]) + +;; combiner/instruction pattern for BRcc instructions. We consider +;; all BRcc supported comparisons but compare with zero. The positive +;; range needs to take into account the limm size, and the pcl +;; rounding. This pattern is under an option as it may prohibit +;; further optimizations like if-conversion. +(define_insn "*brcc" + [(set (pc) + (if_then_else + (match_operator 3 "brcc_comparison_operator" + [(match_operand:GPI 1 "register_operand" "q, r,r") + (match_operand:GPI 2 "nonmemory_operand" "U0000,U06S0r,S32S0")]) + (label_ref (match_operand 0 "" "")) + (pc))) + (clobber (reg:CC CC_REGNUM))] + "!CROSSING_JUMP_P (insn) && (TARGET_BRCC || reload_completed)" + { + switch (get_attr_length (insn)) + { + case 2: + return \"br%m3_s\\t%1,%2,%l0\"; + case 4: + case 8: + return \"br%m3%*\\t%1,%2,%l0\"; + default: + return \"cmp\\t%1,%2\\n\\tb%m3%*\\t%l0\"; + } + } + [(set_attr "type" "brcc") + (set (attr "length") + (cond [(and (match_operand 3 "equality_comparison_operator" "") + (ge (minus (match_dup 0) (pc)) (const_int -126)) + (le (minus (match_dup 0) (pc)) (const_int 122)) + (eq (symbol_ref "which_alternative") (const_int 0)) + ;; no delay slot for short version. + (eq_attr "delay_slot_filled" "no") + (ior (and (match_operand:DI 1 "" "") + (match_test "TARGET_64BIT")) + (and (match_operand:SI 1 "" "") + (match_test "!TARGET_64BIT")))) + (const_int 2) + (and (ge (minus (match_dup 0) (pc)) (const_int -254)) + (le (minus (match_dup 0) (pc)) (const_int 244)) + (ior (eq (symbol_ref "which_alternative") (const_int 0)) + (eq (symbol_ref "which_alternative") (const_int 1)))) + (const_int 4) + (and (ge (minus (match_dup 0) (pc)) (const_int -254)) + (le (minus (match_dup 0) (pc)) (const_int 244)) + (eq_attr "delay_slot_filled" "no") + (eq (symbol_ref "which_alternative") (const_int 2))) + (const_int 8) + ;; This should be variable as well... + (eq (symbol_ref "which_alternative") (const_int 1)) + (const_int 12)] + (const_int 12))) + ]) + +;; BRcc is not complete, emulate missing variants: +;; brgt rb,rc,label => brlt rc,rb,label +;; brgt rb,u6,label => brge rb,u6+1,label +;; brhi rb,rc,label => brlo rc,rb,label +;; brhi rb,u6,label => brhs rb,u6+1,label +;; brle rb,rc,label => brge rc,rb,label +;; brle rb,u6,label => brlt rb,u6+1,label +;; brls rb,rc,label => brhs rc,rb,label +;; brls rb,u6,label => brlo rb,u6+1,label +(define_insn "*emu_brcc" + [(set (pc) + (if_then_else + (match_operator 3 "ebrcc_comparison_operator" + [(match_operand:GPI 1 "register_operand" "r,r,r") + (match_operand:GPI 2 "arc64_nonmem_operand" "U06M1,r,n")]) + (label_ref (match_operand 0 "" "")) + (pc))) + (clobber (reg:CC CC_REGNUM))] + "!CROSSING_JUMP_P (insn) && reload_completed" + { + switch (get_attr_length (insn)) + { + case 4: + case 8: + if (which_alternative == 0) + { + return \"br%w3%*\\t%1,%2 + 1,%l0\"; + } + return \"br%W3%*\\t%2,%1,%l0\"; + default: + return \"cmp\\t%1,%2\\n\\tb%m3%*\\t%l0\"; + } + } + [(set_attr "type" "brcc") + (set (attr "length") + (cond [(and (ge (minus (match_dup 0) (pc)) (const_int -254)) + (le (minus (match_dup 0) (pc)) (const_int 244)) + (ior (eq (symbol_ref "which_alternative") (const_int 0)) + (eq (symbol_ref "which_alternative") (const_int 1)))) + (const_int 4) + (and (ge (minus (match_dup 0) (pc)) (const_int -254)) + (le (minus (match_dup 0) (pc)) (const_int 244)) + (eq_attr "delay_slot_filled" "no") + (eq (symbol_ref "which_alternative") (const_int 2))) + (const_int 8)] + (const_int 12))) + ]) + +;; Peephole pattern for matching BRcc instructions. +(define_peephole2 + [(set (match_operand 0 "cc_register") + (compare:CC (match_operand:GPI 1 "register_operand") + (match_operand:GPI 2 "nonmemory_operand"))) + (set (pc) (if_then_else + (match_operator 3 "arc64_comparison_operator" + [(match_dup 0) (const_int 0)]) + (label_ref (match_operand 4 "")) + (pc)))] + "peep2_reg_dead_p (2, operands[0])" + [(parallel [(set (pc) + (if_then_else + (match_op_dup 3 [(match_dup 1) (match_dup 2)]) + (label_ref (match_dup 4)) + (pc))) + (clobber (reg:CC CC_REGNUM))])]) + +;; Similar like the one above. +(define_peephole2 + [(set (match_operand 0 "cc_register") + (compare:CC_ZN (match_operand:GPI 1 "register_operand") + (const_int 0))) + (set (pc) (if_then_else + (match_operator 2 "brcc_comparison_operator" + [(match_dup 0) (const_int 0)]) + (label_ref (match_operand 3 "")) + (pc)))] + "peep2_reg_dead_p (2, operands[0])" + [(parallel [(set (pc) + (if_then_else + (match_op_dup 2 [(match_dup 1) (const_int 0)]) + (label_ref (match_dup 3)) + (pc))) + (clobber (reg:CC CC_REGNUM))])]) + +;; ------------------------------------------------------------------- +;; Sign/Zero extension +;; ------------------------------------------------------------------- + +(define_expand "sidi2" + [(set (match_operand:DI 0 "register_operand") + (ANY_EXTEND:DI (match_operand:SI 1 "nonimmediate_operand")))] + "TARGET_64BIT" +) + +(define_expand "2" + [(set (match_operand:GPI 0 "register_operand") + (ANY_EXTEND:GPI (match_operand:SHORT 1 "nonimmediate_operand")))] + "" +) + +;; TODO: Commented out this to fix issues in dejagnu. +;; NEEDS TO BE VERIFIED LATER ON. +;; (define_expand "qihi2" +;; [(set (match_operand:HI 0 "register_operand") +;; (ANY_EXTEND:HI (match_operand:QI 1 "nonimmediate_operand")))] +;; "" +;; ) + +(define_insn "*zero_extendsi2" + [(set (match_operand:SI 0 "register_operand" "=q,r, q,r") + (zero_extend:SI + (match_operand:SHORT 1 "nonimmediate_operand" "q,r,Uldms,m")))] + "" + "@ + ext_s\\t%0,%1 + ext\\t%0,%1 + ld_s\\t%0,%1 + ld%U1\\t%0,%1" + [(set_attr "type" "sex,sex,ld,ld") + (set_attr "length" "2,4,2,*")]) + +(define_insn "*zero_extenddi2" + [(set (match_operand:DI 0 "register_operand" "=r, q,r") + (zero_extend:DI + (match_operand:EXT 1 "nonimmediate_operand" "r,Uldms,m")))] + "TARGET_64BIT" + "@ + bmskl\\t%0,%1, + ld_s\\t%0,%1 + ld%U1\\t%0,%1" + [(set_attr "type" "and,ld,ld") + (set_attr "length" "4,2,*")] +) + +(define_insn "*sign_extenddi2" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (sign_extend:DI + (match_operand:EXT 1 "nonimmediate_operand" "r,m")))] + "((!TARGET_VOLATILE_DI) || (!MEM_VOLATILE_P (operands[1]))) + && TARGET_64BIT" + "@ + sexl\\t%0,%1 + ld.x%U1\\t%0,%1" + [(set_attr "type" "sex,ld") + (set_attr "length" "4,*")]) + +(define_insn "*sign_extendsi2" + [(set (match_operand:SI 0 "register_operand" "=q,r,r") + (sign_extend:SI + (match_operand:SHORT 1 "nonimmediate_operand" "q,r,m")))] + "" + "@ + sex_s\\t%0,%1 + sex\\t%0,%1 + ld.x%U1\\t%0,%1" + [(set_attr "type" "sex,sex,ld") + (set_attr "length" "2,4,8")]) + +;; ------------------------------------------------------------------- +;; Simple arithmetic +;; ------------------------------------------------------------------- + +;; TODO: Allow symbols in LIMM field +(define_expand "si3" + [(set (match_operand:SI 0 "register_operand") + (ADDSUB:SI (match_operand:SI 1 "register_operand") + (match_operand:SI 2 "nonmemory_operand")))] + "" + { + if (!register_operand (operands[1], SImode) + && !register_operand (operands[2], SImode)) + { + if (!CONST_INT_P (operands[1])) + operands[1] = force_reg (SImode, operands[1]); + else + operands[2] = force_reg (SImode, operands[2]); + } + }) + +(define_expand "mul3" + [(set (match_operand:GPI 0 "register_operand") + (mult:GPI (match_operand:GPI 1 "register_operand") + (match_operand:GPI 2 "nonmemory_operand")))] + "" + { + if (!register_operand (operands[2], mode) + && !satisfies_constraint_S32S0 (operands[2])) + operands[2] = force_reg (mode, operands[2]); + }) + +;; The overflow patterns are tested using expensive tests and dg-torture.exp +(define_expand "addv4" + [(match_operand:GPI 0 "register_operand") + (match_operand:GPI 1 "register_operand") + (match_operand:GPI 2 "register_operand") + (label_ref (match_operand 3 "" ""))] + "" + { + emit_insn (gen_add3_Vcmp (operands[0], operands[1], operands[2])); + arc64_gen_unlikely_cbranch (NE, CC_Vmode, operands[3]); + DONE; + }) + +(define_insn "add3_Vcmp" + [(parallel + [(set + (reg:CC_V CC_REGNUM) + (compare:CC_V + (plus: + (sign_extend: (match_operand:GPI 1 "arc64_nonmem_operand" " 0, r,r,S32S0, r")) + (sign_extend: (match_operand:GPI 2 "arc64_nonmem_operand" "S12S0,U06S0,r, r,S32S0"))) + (sign_extend: (plus:GPI (match_dup 1) (match_dup 2))))) + (set (match_operand:GPI 0 "register_operand" "= r, r,r, r, r") + (plus:GPI (match_dup 1) (match_dup 2)))])] + "register_operand (operands[1], mode) + || register_operand (operands[2], mode)" + "add.f\\t%0,%1,%2" + [(set_attr "length" "4,4,4,8,8") + (set_attr "type" "add")]) + +(define_expand "uaddv4" + [(match_operand:GPI 0 "register_operand") + (match_operand:GPI 1 "register_operand") + (match_operand:GPI 2 "register_operand") + (label_ref (match_operand 3 "" ""))] + "" + { + emit_insn (gen_add3_Ccmp (operands[0], operands[1], operands[2])); + arc64_gen_unlikely_cbranch (LTU, CC_Cmode, operands[3]); + DONE; + }) + +(define_expand "subv4" + [(match_operand:GPI 0 "register_operand") + (match_operand:GPI 1 "register_operand") + (match_operand:GPI 2 "register_operand") + (label_ref (match_operand 3 "" ""))] + "" + { + emit_insn (gen_sub3_Vcmp (operands[0], operands[1], operands[2])); + arc64_gen_unlikely_cbranch (NE, CC_Vmode, operands[3]); + DONE; + }) + +(define_insn "sub3_Vcmp" + [(set + (reg:CC_V CC_REGNUM) + (compare:CC_V + (sign_extend: + (minus:GPI + (match_operand:GPI 1 "arc64_nonmem_operand" " 0, r,r,S32S0, r") + (match_operand:GPI 2 "arc64_nonmem_operand" "S12S0,U06S0,r, r,S32S0"))) + (minus: (sign_extend: (match_dup 1)) + (sign_extend: (match_dup 2))))) + (set (match_operand:GPI 0 "register_operand" "= r, r,r, r, r") + (minus:GPI (match_dup 1) (match_dup 2)))] + "register_operand (operands[1], mode) + || register_operand (operands[2], mode)" + "sub.f\\t%0,%1,%2" + [(set_attr "length" "4,4,4,8,8") + (set_attr "type" "sub")]) + +(define_expand "negv3" + [(match_operand:GPI 0 "register_operand") + (match_operand:GPI 1 "register_operand") + (label_ref (match_operand 2 "" ""))] + "" + { + emit_insn (gen_neg2_Vcmp (operands[0], operands[1])); + arc64_gen_unlikely_cbranch (NE, CC_Vmode, operands[2]); + DONE; + }) + +(define_insn "negsi2_Vcmp" + [(set (reg:CC_V CC_REGNUM) + (compare:CC_V + (sign_extend:DI + (neg:SI (match_operand:SI 1 "register_operand" "r"))) + (neg:DI (sign_extend:DI (match_dup 1))))) + (set (match_operand:SI 0 "register_operand" "=r") + (neg:SI (match_dup 1)))] + "" + "neg.f\\t%0,%1" + [(set_attr "type" "neg") + (set_attr "length" "4")]) + +(define_insn "negdi2_Vcmp" + [(set (reg:CC_V CC_REGNUM) + (compare:CC_V + (sign_extend:TI + (neg:DI (match_operand:DI 1 "register_operand" "r"))) + (neg:TI (sign_extend:TI (match_dup 1))))) + (set (match_operand:DI 0 "register_operand" "=r") + (neg:DI (match_dup 1)))] + "" + "rsubl.f\\t%0,%1,0" + [(set_attr "type" "neg") + (set_attr "length" "4")]) + +(define_expand "usubv4" + [(match_operand:GPI 0 "register_operand") + (match_operand:GPI 1 "register_operand") + (match_operand:GPI 2 "register_operand") + (label_ref (match_operand 3 "" ""))] + "" + { + emit_insn (gen_sub3_cmp (operands[0], operands[1], operands[2])); + arc64_gen_unlikely_cbranch (LTU, CCmode, operands[3]); + DONE; + }) + +(define_expand "mulvsi4" + [(ANY_EXTEND:DI (match_operand:SI 0 "register_operand")) + (ANY_EXTEND:DI (match_operand:SI 1 "register_operand")) + (ANY_EXTEND:DI (match_operand:SI 2 "register_operand")) + (label_ref (match_operand 3 "" ""))] + "" + { + emit_insn (gen_mulsi3_Vcmp (operands[0], operands[1], operands[2])); + arc64_gen_unlikely_cbranch (NE, CC_Vmode, operands[3]); + DONE; + }) + +(define_insn "mulsi3_Vcmp" + [(parallel + [(set + (reg:CC_V CC_REGNUM) + (compare:CC_V + (mult:DI + (ANY_EXTEND:DI (match_operand:SI 1 "register_operand" "%0, r,r, r")) + (ANY_EXTEND:DI (match_operand:SI 2 "arc64_nonmem_operand" "S12S0,U06S0,r,S32S0"))) + (ANY_EXTEND:DI (mult:SI (match_dup 1) (match_dup 2))))) + (set (match_operand:SI 0 "register_operand" "=r, r,r, r") + (mult:SI (match_dup 1) (match_dup 2)))])] + "register_operand (operands[1], SImode) + || register_operand (operands[2], SImode)" + "mpy.f\\t%0,%1,%2" + [(set_attr "length" "4,4,4,8") + (set_attr "type" "mpy")]) + +;; ------------------------------------------------------------------- +;; Comparison insns +;; ------------------------------------------------------------------- + +(define_expand "cmp" + [(set (reg:CC CC_REGNUM) + (compare:CC (match_operand:GPI 0 "register_operand" "") + (match_operand:GPI 1 "nonmemory_operand" "")))] + "" + { + if (!register_operand (operands[1], DImode)) + operands[1] = force_reg (DImode, operands[1]); + }) + +(define_insn "*cmp" + [(set (reg:CC CC_REGNUM) + (compare:CC + (match_operand:GPI 0 "nonmemory_operand" " q, qh,r, r, r,U06S0,S12S0,S32S0,r") + (match_operand:GPI 1 "nonmemory_operand" "qh,S03MV,r,U06S0,S12S0, r, r, r,S32S0")))] + "register_operand (operands[0], mode) + || register_operand (operands[1], mode)" + "@ + cmp%?\\t%0,%1 + cmp%?\\t%0,%1 + cmp%?\\t%0,%1 + cmp%?\\t%0,%1 + cmp%?\\t%0,%1 + rcmp%?\\t%1,%0 + rcmp%?\\t%1,%0 + rcmp%?\\t%1,%0 + cmp%?\\t%0,%1" + [(set_attr "type" "cmp") + (set_attr "iscompact" "maybe,maybe,no,no,no,no,no,no,no") + (set_attr "predicable" "no,no,yes,yes,no,yes,no,no,no") + (set_attr "length" "*,*,4,4,4,4,4,8,8")]) + + +(define_insn "*cmp_ce" + [(cond_exec + (match_operator 2 "arc64_comparison_operator" + [(match_operand 3 "cc_register" "") (const_int 0)]) + (set (reg:CC CC_REGNUM) + (compare:CC + (match_operand:GPI 0 "nonmemory_operand" "r, r,U06S0,S32S0,r") + (match_operand:GPI 1 "nonmemory_operand" "r,U06S0, r, r,S32S0"))))] + "register_operand (operands[0], mode) + || register_operand (operands[1], mode)" + "@ + cmp.%m2\\t%0,%1 + cmp.%m2\\t%0,%1 + rcmp.%m2\\t%1,%0 + rcmp.%m2\\t%1,%0 + cmp.%m2\\t%0,%1" + [(set_attr "type" "cmp") + (set_attr "length" "4,4,4,8,8")]) + +(define_insn "*cmp_zn" + [(set (reg:CC_ZN CC_REGNUM) + (compare:CC_ZN (match_operand:GPI 0 "register_operand" "q,r") + (const_int 0)))] + "" + "tst%?\\t%0,%0" + [(set_attr "type" "tst") + (set_attr "iscompact" "maybe,no") + (set_attr "length" "*,4")]) + +(define_insn "*cmp_znce" + [(cond_exec + (match_operator 2 "arc64_comparison_operator" + [(match_operand 1 "cc_register" "") (const_int 0)]) + (set (reg:CC_ZN CC_REGNUM) + (compare:CC_ZN (match_operand:GPI 0 "register_operand" "r") + (const_int 0))))] + "" + "tst.%m2\\t%0,%0" + [(set_attr "type" "tst") + (set_attr "length" "4")]) + +(define_insn "fcmp" + [(set (reg:CC_FPU CC_REGNUM) + (compare:CC_FPU (match_operand:GPF_HF 0 "register_operand" "w") + (match_operand:GPF_HF 1 "register_operand" "w")))] + "ARC64_HAS_FP_BASE" + "fcmp\\t%0,%1" + [(set_attr "length" "4") + (set_attr "type" "fcmp")]) + +(define_insn "fcmpf" + [(set (reg:CC_FPUE CC_REGNUM) + (compare:CC_FPUE (match_operand:GPF_HF 0 "register_operand" "w") + (match_operand:GPF_HF 1 "register_operand" "w")))] + "ARC64_HAS_FP_BASE" + "fcmpf\\t%0,%1" + [(set_attr "length" "4") + (set_attr "type" "fcmp")]) + +;; ------------------------------------------------------------------- +;; Store-flag and conditional select insns +;; ------------------------------------------------------------------- + +(define_expand "cstore4" + [(set (match_operand:SI 0 "register_operand") + (match_operator:SI 1 "arc64_comparison_operator" + [(match_operand:GPI 2 "nonmemory_operand") + (match_operand:GPI 3 "nonmemory_operand")]))] + "" + { + if (!register_operand (operands[2], mode)) + operands[2] = force_reg (mode, operands[2]); + if (!arc64_nonmem_operand (operands[3], mode)) + operands[3] = force_reg (mode, operands[3]); + }) + +(define_expand "cstore4" + [(set (match_operand:SI 0 "register_operand") + (match_operator:SI 1 "arc64_comparison_operator" + [(match_operand:GPF_HF 2 "register_operand") + (match_operand:GPF_HF 3 "register_operand")]))] + "ARC64_HAS_FP_BASE" + " + operands[2] = arc64_gen_compare_reg (GET_CODE (operands[1]), operands[2], + operands[3]); + operands[3] = const0_rtx; + " +) + +(define_insn_and_split "*scc_insn" + [(set (match_operand:SI 0 "register_operand" "=r") + (match_operator:SI 1 "arc64_comparison_operator" + [(reg CC_REGNUM) (const_int 0)]))] + "" + "#" + "reload_completed" + [(set (match_dup 0) (const_int 0)) + (cond_exec + (match_dup 1) + (set (match_dup 0) (const_int 1)))] +{ + operands[1] + = gen_rtx_fmt_ee (GET_CODE (operands[1]), + VOIDmode, + XEXP (operands[1], 0), XEXP (operands[1], 1)); +} + [(set_attr "type" "movecc")]) + +;; SETcc instructions +(define_expand "set" + [(set (match_operand:SI 0 "register_operand") + (ALLCC:SI + (match_operand:GPI 1 "register_operand") + (match_operand:GPI 2 "nonmemory_operand")))] + "" + { + if (!arc64_nonmem_operand (operands[2], mode)) + operands[2] = force_reg (mode, operands[2]); + }) + +(define_insn "*set" + [(set (match_operand:SI 0 "register_operand" "=r, r, r,r") + (SETCC:SI + (match_operand:GPI 1 "register_operand" "r, r, 0,r") + (match_operand:GPI 2 "arc64_nonmem_operand" "r,U06S0,S12S0,n")))] + "" + "set%?\\t%0,%1,%2" + [(set_attr "length" "4,4,4,8") + (set_attr "type" "setcc")]) + +(define_insn "*set_cmp" + [(set (reg:CC CC_REGNUM) + (compare:CC + (match_operand:GPI 1 "register_operand" "r, r, 0,r") + (match_operand:GPI 2 "arc64_nonmem_operand" "r,U06S0,S12S0,n"))) + (set (match_operand:SI 0 "register_operand" "=r, r, r,r") + (SETCC:SI (match_dup 1) (match_dup 2)))] + "" + "set.f\\t%0,%1,%2" + [(set_attr "length" "4,4,4,8") + (set_attr "type" "setcc")]) + +;; Special cases of SETCC +(define_insn_and_split "*sethi" + [(set (match_operand:SI 0 "register_operand" "=r, r,r") + (gtu:SI + (match_operand:GPI 1 "register_operand" "r, r,r") + (match_operand:GPI 2 "arc64_nonmem_operand" "r,U06M1,n")))] + "" + "setlo%?\\t%0,%2,%1" + "reload_completed + && CONST_INT_P (operands[2]) + && satisfies_constraint_U06M1 (operands[2])" + [(const_int 0)] + "{ + /* sethi a,b,u6 => seths a,b,u6 + 1. */ + operands[2] = GEN_INT (INTVAL (operands[2]) + 1); + emit_insn (gen_setgeu (operands[0], operands[1], operands[2])); + DONE; + }" + [(set_attr "length" "4,4,8") + (set_attr "type" "setcc")]) + +(define_insn_and_split "*setls" + [(set (match_operand:SI 0 "register_operand" "=r, r,r") + (leu:SI + (match_operand:GPI 1 "register_operand" "r, r,r") + (match_operand:GPI 2 "arc64_nonmem_operand" "r,U06M1,n")))] + "" + "seths%?\\t%0,%2,%1" + "reload_completed + && satisfies_constraint_U06M1 (operands[2])" + [(const_int 0)] + "{ + /* setls a,b,u6 => setlo a,b,u6 + 1. */ + operands[2] = GEN_INT (INTVAL (operands[2]) + 1); + emit_insn (gen_setltu (operands[0], operands[1], operands[2])); + DONE; + }" + [(set_attr "length" "4,4,8") + (set_attr "type" "setcc")]) + +;; MOVCC patterns +(define_expand "movcc" + [(set (match_operand:ALLI 0 "register_operand") + (if_then_else:ALLI (match_operand 1 "arc64_comparison_operator") + (match_operand:ALLI 2 "register_operand") + (match_operand:ALLI 3 "register_operand")))] + "" + { + rtx tmp; + enum rtx_code code = GET_CODE (operands[1]); + + if (code == UNEQ || code == LTGT) + FAIL; + + tmp = arc64_gen_compare_reg (code, XEXP (operands[1], 0), + XEXP (operands[1], 1)); + operands[1] = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx); + }) + +(define_expand "movcc" + [(set (match_operand:GPF_HF 0 "register_operand") + (if_then_else:GPF_HF (match_operand 1 "arc64_comparison_operator") + (match_operand:GPF_HF 2 "register_operand") + (match_operand:GPF_HF 3 "register_operand")))] + "" + { + rtx tmp; + enum rtx_code code = GET_CODE (operands[1]); + + if (code == UNEQ || code == LTGT) + FAIL; + + tmp = arc64_gen_compare_reg (code, XEXP (operands[1], 0), + XEXP (operands[1], 1)); + operands[1] = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx); + }) + +(define_insn "*cmov" + [(set (match_operand:ALLI 0 "register_operand" "=r,r,r,r") + (if_then_else:ALLI + (match_operator 3 "arc64_comparison_operator" + [(match_operand 4 "cc_register" "") (const_int 0)]) + (match_operand:ALLI 1 "nonmemory_operand" "0,0,rU06S0,S32S0") + (match_operand:ALLI 2 "nonmemory_operand" "rU06S0,S32S0,0,0") + ))] + "register_operand (operands[0], mode) + || register_operand (operands[1], mode)" + "@ + mov.%M3\\t%0,%2 + mov.%M3\\t%0,%2 + mov.%m3\\t%0,%1 + mov.%m3\\t%0,%1" + [(set_attr "length" "4,8,4,8") + (set_attr "type" "move")]) + +(define_insn "*cmov" + [(set (match_operand:HF_SF 0 "register_operand" "=w,*r,*r,w,*r,*r") + (if_then_else:HF_SF + (match_operator 3 "arc64_comparison_operator" + [(match_operand 4 "cc_register" "") (const_int 0)]) + (match_operand:HF_SF 1 "nonmemory_operand" "w,*r,*E,0, 0, 0") + (match_operand:HF_SF 2 "nonmemory_operand" "0, 0, 0,w,*r,*E")))] + "register_operand (operands[0], mode) + || register_operand (operands[1], mode)" + "@ + fmov.%m3\\t%0,%1 + mov.%m3\\t%0,%1 + mov.%m3\\t%0,%1 + fmov.%M3\\t%0,%2 + mov.%M3\\t%0,%2 + mov.%M3\\t%0,%2" + [(set_attr "length" "4,4,8,4,4,8") + (set_attr "type" "fmov,move,move,fmov,move,move")]) + +(define_insn "*cmovdf" + [(set (match_operand:DF 0 "register_operand" "=w,*r,w,*r") + (if_then_else:DF + (match_operator 3 "arc64_comparison_operator" + [(match_operand 4 "cc_register" "") (const_int 0)]) + (match_operand:DF 1 "register_operand" "w,*r,0, 0") + (match_operand:DF 2 "register_operand" "0, 0,w,*r")))] + "ARC64_HAS_FPUD" + "@ + fdmov.%m3\\t%0,%1 + movl.%m3\\t%0,%1 + fdmov.%M3\\t%0,%2 + movl.%M3\\t%0,%2" + [(set_attr "length" "4") + (set_attr "type" "fmov,move,fmov,move")]) + +;; ------------------------------------------------------------------- +;; Logical operations +;; ------------------------------------------------------------------- + +(define_expand "3" + [(set (match_operand:GPI 0 "register_operand") + (LOGIC:GPI (match_operand:GPI 1 "register_operand") + (match_operand:GPI 2 "nonmemory_operand")))] + "" + { + if (!arc64_nonmem_operand (operands[2], mode)) + operands[2] = force_reg (mode, operands[2]); + }) + +(define_expand "2" + [(set (match_operand:GPI 0 "register_operand") + (NOT_ABS:GPI (match_operand:GPI 1 "register_operand")))] + "" + ) + +(define_insn "negsi2" + [(set (match_operand:SI 0 "register_operand" "=q,q,r,r") + (neg:SI (match_operand:SI 1 "register_operand" "0,q,0,r")))] + "" + "neg%?\\t%0,%1" + [(set_attr "type" "neg") + (set_attr "iscompact" "maybe,yes,no,no") + (set_attr "predicable" "yes,no,yes,no") + (set_attr "length" "*,2,4,4")]) + +(define_insn "*2" + [(set (match_operand:GPI 0 "register_operand" "=q,r") + (NOT_ABS:GPI (match_operand:GPI 1 "register_operand" "q,r")))] + "" + "%?\\t%0,%1" + [(set_attr "type" "") + (set_attr "iscompact" "maybe,no") + (set_attr "length" "*,4")]) + +(define_insn "*3" + [(set (match_operand:GPI 0 "register_operand" "=r, r, r,r") + (MINMAX:GPI (match_operand:GPI 1 "register_operand" "%0, 0, r,r") + (match_operand:GPI 2 "nonmemory_operand" "rU06S0,S12S0,rU06S0,S32S0")))] + "" + "%?\\t%0,%1,%2" + [(set_attr "type" "") + (set_attr "length" "4,4,4,8") + (set_attr "predicable" "yes,no,no,no")] +) + +;; Zero-extend pattern +(define_insn "*si_zextend" + [(set (match_operand:DI 0 "register_operand" "=q,r") + (zero_extend:DI + (LOP2EX:SI (match_operand:SI 1 "register_operand" "q,r"))))] + "TARGET_64BIT" + "%?\\t%0,%1" + [(set_attr "type" "") + (set_attr "iscompact" "yes,no") + (set_attr "length" "*,4")]) + +(define_insn "*3_zextend" + [(set (match_operand:DI 0 "register_operand" "=r, r, r,r") + (zero_extend:DI + (MINMAX:SI + (match_operand:SI 1 "register_operand" "%0, 0, r,r") + (match_operand:SI 2 "nonmemory_operand" "rU06S0,S12S0,rU06S0,S32S0"))))] + "TARGET_64BIT" + "%?\\t%0,%1,%2" + [(set_attr "type" "max") + (set_attr "length" "4,4,4,8") + (set_attr "predicable" "yes,no,no,no")]) + +;; NEGCC and NOTCC patterns used by ifcvt. +(define_expand "cc" + [(set (match_operand:GPI 0 "register_operand") + (if_then_else:GPI (match_operand 1 "arc64_comparison_operator") + (NEG_NOT:GPI (match_operand:GPI 2 "register_operand")) + (match_operand:GPI 3 "register_operand")))] + "" + { + rtx tmp; + enum rtx_code code = GET_CODE (operands[1]); + + if (code == UNEQ || code == LTGT) + FAIL; + + tmp = arc64_gen_compare_reg (code, XEXP (operands[1], 0), + XEXP (operands[1], 1)); + operands[1] = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx); + }) + +(define_insn "*cneg" + [(set (match_operand:GPI 0 "register_operand" "=r,r,r") + (if_then_else:GPI + (match_operator 3 "arc64_comparison_operator" + [(match_operand 4 "cc_register" "") (const_int 0)]) + (neg:GPI (match_operand:GPI 1 "register_operand" "0,0,0")) + (match_operand:GPI 2 "nonmemory_operand" "0,rU06S0,S32S0")))] + "" + "@ + rsub.%m3\\t%0,%1,0 + rsub.%m3\\t%0,%1,0\\n\\tmov.%M3\\t%0,%2 + rsub.%m3\\t%0,%1,0\\n\\tmov.%M3\\t%0,%2" + [(set_attr "length" "4,8,12") + (set_attr "type" "neg")]) + +(define_insn "*cnot" + [(set (match_operand:GPI 0 "register_operand" "=r,r,r") + (if_then_else:GPI + (match_operator 3 "arc64_comparison_operator" + [(match_operand 4 "cc_register" "") (const_int 0)]) + (not:GPI (match_operand:GPI 1 "register_operand" "0,0,0")) + (match_operand:GPI 2 "register_operand" "0,rU06S0,S32S0")))] + "" + "@ + xor.%m3\\t%0,%1,-1 + xor.%m3\\t%0,%1,-1\\n\\tmov.%M3\\t%0,%2 + xor.%m3\\t%0,%1,-1\\n\\tmov.%M3\\t%0,%2" + [(set_attr "length" "8,12,16") + (set_attr "type" "xor")]) + +;; ------------------------------------------------------------------- +;; Shifts +;; ------------------------------------------------------------------- + +;; FIXME! check if we get better code if we use QI for op 2. +(define_expand "3" + [(set (match_operand:GPI 0 "register_operand") + (ASHIFT:GPI (match_operand:GPI 1 "register_operand") + (match_operand:GPI 2 "nonmemory_operand")))] + "") + +(define_expand "rotrsi3" + [(set (match_operand:SI 0 "register_operand") + (rotatert:SI (match_operand:SI 1 "nonmemory_operand") + (match_operand:SI 2 "nonmemory_operand")))] + "") + +(define_insn "*rotrsi3" + [(set (match_operand:SI 0 "register_operand" "=r, r, r, r,r") + (rotatert:SI (match_operand:SI 1 "nonmemory_operand" "r, r, r, r,i") + (match_operand:SI 2 "nonmemory_operand" "U0001,U0008,U0016,rU06S0,r")))] + ;; FIXME! this needs BARREL_SHIFTER option + "register_operand (operands[0], SImode) + || register_operand (operands[1], SImode)" + "@ + ror\\t%0,%1 + ror8\\t%0,%1 + swap\\t%0,%1 + ror\\t%0,%1,%2 + ror\\t%0,%1,%2" + [(set_attr "type" "ror,ror,swap,ror,ror") + (set_attr "length" "4,4,4,4,8")]) + +(define_expand "rotlsi3" + [(set (match_operand:SI 0 "register_operand") + (rotatert:SI (match_operand:SI 1 "nonmemory_operand") + (match_operand:SI 2 "nonmemory_operand")))] + "" + " + if (CONST_INT_P (operands[2]) + && (INTVAL (operands[2]) == 1)) + { + gen_rotl1 (operands[0], operands[1]); + DONE; + } + + if (CONST_INT_P (operands[2]) + && (INTVAL (operands[2]) == 8)) + { + gen_rotl8 (operands[0], operands[1]); + DONE; + } + + if (CONST_INT_P (operands[2])) + operands[2] = GEN_INT ((32 - INTVAL (operands[2])) % 32); + else + { + rtx reg = gen_reg_rtx (SImode); + emit_insn (gen_subsi3 (reg, GEN_INT (32), operands[2])); + operands[2] = reg; + } + ") + +(define_insn "rotl1" + [(set (match_operand:SI 0 "register_operand" "= r,r") + (rotate:SI (match_operand:SI 1 "nonmemory_operand" "rU06S0,i") + (const_int 1)))] + "" + "rol%?\\t%0,%1" + [(set_attr "type" "rol") + (set_attr "predicable" "no") + (set_attr "length" "4,8")]) + +(define_insn "rotl8" + [(set (match_operand:SI 0 "register_operand" "= r,r") + (rotate:SI (match_operand:SI 1 "nonmemory_operand" "rU06S0,i") + (const_int 8)))] + "" + "rol8%?\\t%0,%1" + [(set_attr "type" "rol") + (set_attr "predicable" "no") + (set_attr "length" "4,8")]) + + +;; ------------------------------------------------------------------- +;; Bitfields +;; ------------------------------------------------------------------- + +(define_expand "extzv" + [(set (match_operand:GPI 0 "register_operand" "") + (zero_extract:GPI (match_operand:GPI 1 "register_operand" "") + (match_operand 2 "const_int_operand" "") + (match_operand 3 "const_int_operand" "")))] + "") + +(define_insn "*extzvsi" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (zero_extract:SI (match_operand:SI 1 "register_operand" "0,r") + (match_operand 2 "const_int_operand" "n,n") + (match_operand 3 "const_int_operand" "n,n")))] + "" + { + int assemble_op2 = (((INTVAL (operands[2]) - 1) & 0x1f) << 5) + | (INTVAL (operands[3]) & 0x1f); + operands[2] = GEN_INT (assemble_op2); + return "xbfu%?\\t%0,%1,%2"; + } + [(set_attr "type" "xbfu") + (set_attr "iscompact" "no") + (set_attr "length" "4,8") + (set_attr "predicable" "no")]) + +(define_insn "*zextzvsi" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (zero_extract:DI (match_operand:SI 1 "register_operand" "0,r") + (match_operand 2 "const_int_operand" "n,n") + (match_operand 3 "const_int_operand" "n,n")))] + "" + { + int assemble_op2 = (((INTVAL (operands[2]) - 1) & 0x1f) << 5) + | (INTVAL (operands[3]) & 0x1f); + operands[2] = GEN_INT (assemble_op2); + return "xbfu%?\\t%0,%1,%2"; + } + [(set_attr "type" "xbfu") + (set_attr "iscompact" "no") + (set_attr "length" "4,8") + (set_attr "predicable" "no")]) + +;;FIXME! compute length based on the input args. +(define_insn "*extzvdi" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (zero_extract:DI (match_operand:DI 1 "register_operand" "0,r") + (match_operand 2 "const_int_operand" "n,n") + (match_operand 3 "const_int_operand" "n,n")))] + "" + { + int assemble_op2 = (((INTVAL (operands[2]) - 1) & 0x3f) << 6) + | (INTVAL (operands[3]) & 0x3f); + operands[2] = GEN_INT (assemble_op2); + return "xbful%?\\t%0,%1,%2"; + } + [(set_attr "type" "xbfu") + (set_attr "iscompact" "no") + (set_attr "length" "8,8") + (set_attr "predicable" "no")]) + +(define_insn "*extzvsi_cmp0" + [(set (reg:CC_ZN CC_REGNUM) + (compare:CC_ZN + (zero_extract:SI + (match_operand:SI 1 "register_operand" "0,r") + (match_operand 2 "const_int_operand" "n,n") + (match_operand 3 "const_int_operand" "n,n")) + (const_int 0))) + (set (match_operand:SI 0 "register_operand" "=r,r") + (zero_extract:SI (match_dup 1) + (match_dup 2) + (match_dup 3)))] + "" + { + int assemble_op2 = (((INTVAL (operands[2]) - 1) & 0x1f) << 5) + | (INTVAL (operands[3]) & 0x1f); + operands[2] = GEN_INT (assemble_op2); + return "xbfu.f\\t%0,%1,%2"; + } + [(set_attr "type" "xbfu") + (set_attr "length" "4,8")]) + +(define_insn "*extzvsi_cmp0_noout" + [(set (reg:CC_ZN CC_REGNUM) + (compare:CC_ZN + (zero_extract:SI + (match_operand:SI 0 "register_operand" "r") + (match_operand 1 "const_int_operand" "n") + (match_operand 2 "const_int_operand" "n")) + (const_int 0)))] + "" + { + int assemble_op2 = (((INTVAL (operands[1]) - 1) & 0x1f) << 5) + | (INTVAL (operands[2]) & 0x1f); + operands[1] = GEN_INT (assemble_op2); + return "xbfu.f\\t0,%0,%1"; + } + [(set_attr "type" "xbfu") + (set_attr "length" "8")]) + +(define_insn "bswap2" + [(set (match_operand:GPI 0 "register_operand" "=r,r") + (bswap:GPI + (match_operand:GPI 1 "nonmemory_operand" "rU06S0,S32S0")))] + "" + "swape\\t%0,%1" + [(set_attr "length" "4,8") + (set_attr "type" "swap")]) + +;; ------------------------------------------------------------------- +;; Bitscan +;; ------------------------------------------------------------------- + +(define_insn "clrsb2" + [(set (match_operand:EPI 0 "register_operand" "=r") + (clrsb:EPI (match_operand:EPI 1 "register_operand" "r")))] + "TARGET_BITSCAN" + "norm\\t%0,%1" + [(set_attr "length" "4") + (set_attr "type" "norm")]) + +(define_expand "clz2" + [(match_operand:GPI 0 "register_operand") + (match_operand:GPI 1 "register_operand")] + "TARGET_BITSCAN" + { + rtx tmp = gen_reg_rtx (mode); + unsigned int size = GET_MODE_SIZE (mode) * BITS_PER_UNIT - 1; + emit_insn (gen_arc64_fls2 (tmp, operands[1])); + emit_insn (gen_sub3 (operands[0], GEN_INT (size), tmp)); + DONE; + }) + +(define_insn "ctz2" + [(set (match_operand:GPI 0 "register_operand" "=r") + (ctz:GPI (match_operand:GPI 1 "register_operand" "r")))] + "TARGET_BITSCAN" + "ffs\\t%0,%1" + [(set_attr "length" "4") + (set_attr "type" "ffs")]) + +(define_insn "arc64_fls2" + [(set (match_operand:GPI 0 "register_operand" "=r") + (unspec:GPI [(match_operand:GPI 1 "register_operand" "r")] + ARC64_UNSPEC_FLS))] + "TARGET_BITSCAN" + "fls\\t%0,%1" + [(set_attr "length" "4") + (set_attr "type" "fls")]) + +;; ------------------------------------------------------------------- +;; Floating-point intrinsics +;; ------------------------------------------------------------------- + +(define_insn "round2" + [(set (match_operand:GPF 0 "register_operand" "=w") + (unspec:GPF [(match_operand:GPF 1 "register_operand" "w")] + ARC64_UNSPEC_ROUND))] + "ARC64_HAS_FP_BASE" + "frnd\\t%0,%1" + [(set_attr "length" "4") + (set_attr "type" "frnd")]) + +(define_insn "btrunc2" + [(set (match_operand:GPF 0 "register_operand" "=w") + (unspec:GPF [(match_operand:GPF 1 "register_operand" "w")] + ARC64_UNSPEC_BTRUNC))] + "ARC64_HAS_FP_BASE" + "frnd_rz\\t%0,%1" + [(set_attr "length" "4") + (set_attr "type" "frnd")]) + +;; ------------------------------------------------------------------- +;; Floating-point conversions +;; ------------------------------------------------------------------- + +(define_insn "extendsfdf2" + [(set (match_operand:DF 0 "register_operand" "=w") + (float_extend:DF (match_operand:SF 1 "register_operand" "w")))] + "ARC64_HAS_FPUD" + "fs2d\\t%0,%1" + [(set_attr "length" "4") + (set_attr "type" "fs2d")]) + +(define_insn "extendhfsf2" + [(set (match_operand:SF 0 "register_operand" "=w") + (float_extend:SF (match_operand:HF 1 "register_operand" "w")))] + "ARC64_HAS_FPUH" + "fh2s\\t%0,%1" + [(set_attr "length" "4") + (set_attr "type" "fh2s")]) + +(define_expand "extendhfdf2" + [(match_operand:DF 0 "register_operand") + (match_operand:HF 1 "register_operand")] + "ARC64_HAS_FPUS" + { + rtx tmp = gen_reg_rtx (SFmode); + emit_insn (gen_extendhfsf2 (tmp, operands[1])); + if (ARC64_HAS_FPUD) + emit_insn (gen_extendsfdf2 (operands[0], tmp)); + else + { + rtx ret; + ret = emit_library_call_value (gen_rtx_SYMBOL_REF (Pmode, + "__extendsfdf2"), + operands[0], LCT_NORMAL, DFmode, + tmp, SFmode); + if (ret != operands[0]) + emit_move_insn (operands[0], ret); + } + DONE; + }) + +(define_insn "truncdfsf2" + [(set (match_operand:SF 0 "register_operand" "=w") + (float_truncate:SF (match_operand:DF 1 "register_operand" "w")))] + "ARC64_HAS_FPUD" + "fd2s\\t%0,%1" + [(set_attr "length" "4") + (set_attr "type" "fd2s")]) + +(define_insn "truncsfhf2" + [(set (match_operand:HF 0 "register_operand" "=w") + (float_truncate:HF (match_operand:SF 1 "register_operand" "w")))] + "ARC64_HAS_FPUH" + "fs2h\\t%0,%1" + [(set_attr "length" "4") + (set_attr "type" "fs2h")]) + +(define_expand "truncdfhf2" + [(match_operand:HF 0 "register_operand") + (match_operand:DF 1 "register_operand")] + "ARC64_HAS_FPUS" + { + rtx tmp = gen_reg_rtx (SFmode); + if (ARC64_HAS_FPUD) + emit_insn (gen_truncdfsf2 (tmp, operands[1])); + else + { + rtx ret; + ret = emit_library_call_value (gen_rtx_SYMBOL_REF (Pmode, + "__truncdfsf2"), + tmp, LCT_NORMAL, SFmode, + operands[1], DFmode); + if (ret != tmp) + emit_move_insn (tmp, ret); + } + emit_insn (gen_truncsfhf2 (operands[0], tmp)); + DONE; + }) + +;; SI->SF SI->DF DI->SF DI->DF +;; FINT2S FINT2D FL2S FL2D +(define_insn "float2" + [(set (match_operand:GPF 0 "register_operand" "=w") + (float:GPF (match_operand:GPI 1 "core_register_operand" "c")))] + "ARC64_HAS_FP_BASE" + "f2\\t%0,%1" + [(set_attr "length" "4") + (set_attr "type" "int2fp")]) + +(define_expand "floatsihf2" + [(match_operand:HF 0 "register_operand") + (match_operand:SI 1 "core_register_operand")] + "ARC64_HAS_FPUH" + { + rtx tmp = gen_reg_rtx (SFmode); + emit_insn (gen_floatsisf2 (tmp, operands[1])); + emit_insn (gen_truncsfhf2 (operands[0], tmp)); + DONE; + }) + +(define_expand "floatdihf2" + [(match_operand:HF 0 "register_operand") + (match_operand:DI 1 "core_register_operand")] + "ARC64_HAS_FPUH" + { + rtx tmp = gen_reg_rtx (SFmode); + emit_insn (gen_floatdisf2 (tmp, operands[1])); + emit_insn (gen_truncsfhf2 (operands[0], tmp)); + DONE; + }) + +;; uSI->SF uSI->DF uDI->SF uDI->DF +;; FUINT2S FUINT2D FUL2S FUL2D +(define_insn "floatuns2" + [(set (match_operand:GPF 0 "register_operand" "=w") + (unsigned_float:GPF (match_operand:GPI 1 "core_register_operand" "c")))] + "ARC64_HAS_FP_BASE" + "fu2\\t%0,%1" + [(set_attr "length" "4") + (set_attr "type" "uint2fp")]) + +;; SF->uSI SF->uDI DF->uSI DF->uDI (using rounding towards zero) +;; FS2UINT_RZ FS2UL_RZ FD2UINT_RZ FD2UL_RZ +(define_insn "fixuns_trunc2" + [(set (match_operand:GPI 0 "core_register_operand" "=c") + (unsigned_fix:GPI (match_operand:GPF 1 "register_operand" "w")))] + "ARC64_HAS_FP_BASE" + "f2u_rz\\t%0,%1" + [(set_attr "length" "4") + (set_attr "type" "fp2uint")]) + +;; SF->SI SF->DI DF->SI DF->DI (using rounding towards zero) +;; FS2INT_RZ FS2L_RZ FD2INT_RZ FD2L_RZ +(define_insn "fix_trunc2" + [(set (match_operand:GPI 0 "core_register_operand" "=c") + (fix:GPI (match_operand:GPF 1 "register_operand" "w")))] + "ARC64_HAS_FP_BASE" + "f2_rz\\t%0,%1" + [(set_attr "length" "4") + (set_attr "type" "fp2int")]) + +;; ------------------------------------------------------------------- +;; Floating-point arithmetic +;; ------------------------------------------------------------------- + +;; F

ADD F

SUB F

MUL F

DIV F

MIN F

MAX +(define_insn "3" + [(set (match_operand:GPF_HF 0 "register_operand" "=w") + (DOPF:GPF_HF (match_operand:GPF_HF 1 "register_operand" "w") + (match_operand:GPF_HF 2 "register_operand" "w")))] + "ARC64_HAS_FP_BASE" + "f\\t%0,%1,%2" + [(set_attr "length" "4") + (set_attr "type" "f")]) + +;; F

ABS +;; FIXME! bclr can be short. Also we can predicate it +(define_insn "abs2" + [(set (match_operand:GPF_HF 0 "register_operand" "=w,*r") + (abs:GPF_HF (match_operand:GPF_HF 1 "register_operand" "w,*r")))] + "" + "@ + fsgnjx\\t%0,%1,%1 + bclr\\t%0,%1," + [(set_attr "length" "4") + (set_attr "type" "fsgnjx,bclr")]) + +;; F

NEG +;; FIXME! bxor can be predicated +(define_insn "neg2" + [(set (match_operand:GPF_HF 0 "register_operand" "=w,*r") + (neg:GPF_HF (match_operand:GPF_HF 1 "register_operand" "w,*r")))] + "" + "@ + fsgnjn\\t%0,%1,%1 + bxor\\t%0,%1," + [(set_attr "length" "4") + (set_attr "type" "fsgnjn,bxor")]) + +;; F

MADD +(define_insn "fma4" + [(set (match_operand:GPF_HF 0 "register_operand" "=w") + (fma:GPF_HF (match_operand:GPF_HF 1 "register_operand" "w") + (match_operand:GPF_HF 2 "register_operand" "w") + (match_operand:GPF_HF 3 "register_operand" "w")))] + "ARC64_HAS_FP_BASE" + "fmadd\\t%0,%1,%2,%3" + [(set_attr "length" "4") + (set_attr "type" "fmadd")]) + +;; F

MSUB +(define_insn "fnma4" + [(set (match_operand:GPF_HF 0 "register_operand" "=w") + (fma:GPF_HF (neg:GPF_HF (match_operand:GPF_HF 1 "register_operand" "w")) + (match_operand:GPF_HF 2 "register_operand" "w") + (match_operand:GPF_HF 3 "register_operand" "w")))] + "ARC64_HAS_FP_BASE" + "fmsub\\t%0,%1,%2,%3" + [(set_attr "length" "4") + (set_attr "type" "fmsub")]) + +;; F

NMSUB +;; TBI: the md.texi says Like @code{fma@var{m}4}, except +;; operand 3 subtracted from the product instead of added to the +;; product. However, fnmsub does -(s3 - (s1 * s2)) +(define_insn "fms4" + [(set (match_operand:GPF_HF 0 "register_operand" "=w") + (fma:GPF_HF (match_operand:GPF_HF 1 "register_operand" "w") + (match_operand:GPF_HF 2 "register_operand" "w") + (neg:GPF_HF (match_operand:GPF_HF 3 "register_operand" "w"))))] + "!HONOR_SIGNED_ZEROS (mode) && ARC64_HAS_FP_BASE" + "fnmsub\\t%0,%1,%2,%3" + [(set_attr "length" "4") + (set_attr "type" "fnmsub")]) + +;; -(op3 - (op1 * op2)) +(define_insn "*nfnms4" + [(set (match_operand:GPF_HF 0 "register_operand" "=w") + (neg:GPF_HF (fma:GPF_HF (neg:GPF_HF (match_operand:GPF_HF 1 "register_operand" "w")) + (match_operand:GPF_HF 2 "register_operand" "w") + (match_operand:GPF_HF 3 "register_operand" "w"))))] + "ARC64_HAS_FP_BASE" + "fnmsub\\t%0,%1,%2,%3" + [(set_attr "length" "4") + (set_attr "type" "fnmsub")]) + +;; F

NMADD +;; Likewise like above +(define_insn "fnms4" + [(set (match_operand:GPF_HF 0 "register_operand" "=w") + (fma:GPF_HF (neg:GPF_HF (match_operand:GPF_HF 1 "register_operand" "w")) + (match_operand:GPF_HF 2 "register_operand" "w") + (neg:GPF_HF (match_operand:GPF_HF 3 "register_operand" "w"))))] + "!HONOR_SIGNED_ZEROS (mode) && ARC64_HAS_FP_BASE" + "fnmadd\\t%0,%1,%2,%3" + [(set_attr "length" "4") + (set_attr "type" "fnmadd")]) + +;; -(op3 + (op1 * op2)) +(define_insn "*nfms4" + [(set (match_operand:GPF_HF 0 "register_operand" "=w") + (neg:GPF_HF (fma:GPF_HF (match_operand:GPF_HF 1 "register_operand" "w") + (match_operand:GPF_HF 2 "register_operand" "w") + (match_operand:GPF_HF 3 "register_operand" "w"))))] + "ARC64_HAS_FP_BASE" + "fnmadd\\t%0,%1,%2,%3" + [(set_attr "length" "4") + (set_attr "type" "fnmadd")]) + +;; F

SQRT +(define_insn "sqrt2" + [(set (match_operand:GPF_HF 0 "register_operand" "=w") + (sqrt:GPF_HF (match_operand:GPF_HF 1 "register_operand" "w")))] + "ARC64_HAS_FP_BASE" + "fsqrt\\t%0,%1" + [(set_attr "length" "4") + (set_attr "type" "fsqrt")]) + +;; F

SGNJ +(define_insn "copysign3" + [(set (match_operand:GPF_HF 0 "register_operand" "=w") + (unspec:GPF_HF [(match_operand:GPF_HF 1 "register_operand" "w") + (match_operand:GPF_HF 2 "register_operand" "w")] + ARC64_UNSPEC_COPYSIGN))] + "ARC64_HAS_FP_BASE" + "fsgnj\\t%0,%1,%2" + [(set_attr "length" "4") + (set_attr "type" "fsgnj")]) + +;; F

SGNJX +(define_insn "xorsign3" + [(set (match_operand:GPF_HF 0 "register_operand" "=w") + (unspec:GPF_HF [(match_operand:GPF_HF 1 "register_operand" "w") + (match_operand:GPF_HF 2 "register_operand" "w")] + ARC64_UNSPEC_XORSIGN))] + "ARC64_HAS_FP_BASE" + "fsgnjx\\t%0,%1,%2" + [(set_attr "length" "4") + (set_attr "type" "fsgnjx")]) + +;; F

SGNJN +(define_insn "*ncopysign3" + [(set (match_operand:GPF_HF 0 "register_operand" "=w") + (neg:GPF_HF (unspec:GPF_HF + [(match_operand:GPF_HF 1 "register_operand" "w") + (match_operand:GPF_HF 2 "register_operand" "w")] + ARC64_UNSPEC_COPYSIGN)))] + "ARC64_HAS_FP_BASE" + "fsgnjn\\t%0,%1,%2" + [(set_attr "length" "4") + (set_attr "type" "fsgnjn")]) + +;; ------------------------------------------------------------------- +;; Builtins +;; ------------------------------------------------------------------- + +(define_insn "lr" + [(set (match_operand:SI 0 "register_operand" "=r,r,r,r") + (unspec_volatile:SI + [(match_operand:SI 1 "nonmemory_operand" "U06S0,S12S0,r,i")] + ARC64_VUNSPEC_LR))] + "" + "lr\\t%0,[%1]" + [(set_attr "length" "4,4,4,8") + (set_attr "type" "lr")]) + +(define_insn "sr" + [(unspec_volatile + [(match_operand:SI 0 "register_operand" " r, r, r, r") + (match_operand:SI 1 "nonmemory_operand" "U06S0,S12S0, i, r")] + ARC64_VUNSPEC_SR)] + "" + "sr\\t%0,[%1]" + [(set_attr "length" "4,4,4,8") + (set_attr "type" "sr")]) + +(define_insn "lrl" + [(set (match_operand:DI 0 "register_operand" "=r,r,r,r") + (unspec_volatile:DI + [(match_operand:DI 1 "nonmemory_operand" "U06S0,S12S0,r,i")] + ARC64_VUNSPEC_LRL))] + "" + "lrl\\t%0,[%1]" + [(set_attr "length" "4,4,4,8") + (set_attr "type" "lr")]) + +(define_insn "srl" + [(unspec_volatile + [(match_operand:DI 0 "register_operand" " r, r, r, r") + (match_operand:DI 1 "nonmemory_operand" "U06S0,S12S0, i, r")] + ARC64_VUNSPEC_SRL)] + "" + "srl\\t%0,[%1]" + [(set_attr "length" "4,4,4,8") + (set_attr "type" "sr")]) + +(define_insn "flag" + [(unspec_volatile [(match_operand:SI 0 "nonmemory_operand" "U06S0,S12S0,r,i")] + ARC64_VUNSPEC_FLAG)] + "" + "@ + flag%?\\t%0 + flag\\t%0 + flag%?\\t%0 + flag%?\\t%0" + [(set_attr "length" "4,4,4,8") + (set_attr "type" "flag") + (set_attr "predicable" "yes,no,yes,yes")]) + +(define_insn "brk" + [(unspec_volatile [(const_int 0)] ARC64_VUNSPEC_BRK)] + "" + "brk" + [(set_attr "length" "4") + (set_attr "type" "brk")]) + +(define_insn "nopv" + [(unspec_volatile [(const_int 0)] ARC64_VUNSPEC_NOP)] + "" + "nop_s" + [(set_attr "type" "nop") + (set_attr "length" "2")]) + + +;; For thread pointer builtins +(define_expand "get_thread_pointer" + [(set (match_operand:P 0 "register_operand") (match_dup 1))] + "" + "operands[1] = gen_rtx_REG (Pmode, R30_REGNUM);") + +(define_expand "set_thread_pointer" + [(set (match_dup 1) (match_operand:P 0 "register_operand"))] + "" + "operands[1] = gen_rtx_REG (Pmode, R30_REGNUM);") + +(define_insn "sync" + [(unspec_volatile [(const_int 1)] + ARC64_VUNSPEC_SYNC)] + "" + "sync" + [(set_attr "length" "4") + (set_attr "type" "sync")]) + +(include "arith.md") +(include "atomic.md") +(include "arc32.md") +(include "condexec.md") + +;; mode:emacs-lisp +;; comment-start: ";; " +;; eval: (set-syntax-table (copy-sequence (syntax-table))) +;; eval: (modify-syntax-entry ?[ "(]") +;; eval: (modify-syntax-entry ?] ")[") +;; eval: (modify-syntax-entry ?{ "(}") +;; eval: (modify-syntax-entry ?} "){") +;; eval: (setq indent-tabs-mode t) +;; End: diff --git a/gcc/config/arc64/arc64.opt b/gcc/config/arc64/arc64.opt new file mode 100644 index 0000000000000..6c60a513dcb21 --- /dev/null +++ b/gcc/config/arc64/arc64.opt @@ -0,0 +1,132 @@ +; Machine description for ARC64 architecture. +; Copyright (C) 2021 Free Software Foundation, Inc. +; +; This file is part of GCC. +; +; GCC is free software; you can redistribute it and/or modify it +; under the terms of the GNU General Public License as published by +; the Free Software Foundation; either version 3, or (at your option) +; any later version. +; +; GCC is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with GCC; see the file COPYING3. If not see +; . + +HeaderInclude +config/arc64/arc64-opts.h + +TargetVariable +bool arc64_target_64bit = ARC64_64BIT_DEFAULT + +Enum +Name(cmodel) Type(enum arc64_code_model) +The code model option names for -mcmodel: + +EnumValue +Enum(cmodel) String(small) Value(ARC64_CMODEL_SMALL) + +EnumValue +Enum(cmodel) String(medium) Value(ARC64_CMODEL_MEDIUM) + +EnumValue +Enum(cmodel) String(large) Value(ARC64_CMODEL_LARGE) + +mcmodel= +Target RejectNegative Joined Enum(cmodel) Var(arc64_cmodel_var) Init(ARC64_CMODEL_MEDIUM) Save +Specify the code model. + +Enum +Name(arc64_atomic) Type(int) + +EnumValue +Enum(arc64_atomic) String(0) Value(0) + +EnumValue +Enum(arc64_atomic) String(1) Value(1) + +EnumValue +Enum(arc64_atomic) String(2) Value(2) + +EnumValue +Enum(arc64_atomic) String(3) Value(3) + +matomic= +Target RejectNegative Joined Enum(arc64_atomic) Var(arc64_atomic_option) Init(DEFAULT_ARC64_ATOMIC_OPTION) +Enable atomic instructions: {0, 1, 2, 3}. + +Enum +Name(fpmodel) Type(int) + +EnumValue +Enum(fpmodel) String(none) Value(0) + +EnumValue +Enum(fpmodel) String(fpus) Value(1) + +EnumValue +Enum(fpmodel) String(fpud) Value(2) + +mfpu= +Target RejectNegative Joined Enum(fpmodel) Var(arc64_fp_model) Init(0) +Specify hardware FP model used. + +mdiv-rem +Target Var(TARGET_ARC64_DIVREM) Init(TARGET_ARC64_DIVREM_DEFAULT) +Enable DIV-REM instructions. + +mbitscan +Target Mask(BITSCAN) +Enable NORM, NORMH, FFS, FLS, NORML, FFSL, and FLSL bitscan instructions. + +mcpu= +Target RejectNegative ToLower Joined Var(arcv3_cpu_string) +-mcpu=CPU Generate code for specific ARCv3 CPU variant. + +munaligned-access +Target Var(unaligned_access) Init(UNALIGNED_ACCESS_DEFAULT) +Enable unaligned accesse for packed data. + +mvolatile-di +Target Mask(VOLATILE_DI) +Enable uncached access for volatile memories. + +mcode-density +Target Mask(CODE_DENSITY) +Enable code-density instructions. + +msimd +Target Mask(SIMD) +Enable integer SIMD instructions. + +mwide +Target Mask(WIDE_SIMD) +Enable wide floating point SIMD support. + +mll64 +Target Mask(LL64) +Enable double load/store instructions for arc64:32. + +m128 +Target Mask(WIDE_LDST) +Enable wide data transfer support. + +mfpmov +Target Mask(FP_MOVE) +Reduce pressure on GPRs by using FPRs for memory operations like memcpy. + +mbrcc +Target Mask(BRCC) +Generate BRcc instructions during combiner step. + +mbbit +Target Mask(BBIT) +Generate BBITx instructions during combiner step. + +mexperimental +Target Mask(EXP) +Experimental option, to be removed. diff --git a/gcc/config/arc64/arith.md b/gcc/config/arc64/arith.md new file mode 100644 index 0000000000000..60ba93f811a6d --- /dev/null +++ b/gcc/config/arc64/arith.md @@ -0,0 +1,2963 @@ +;; SI instructions having short instruction variant +(define_insn "*_insn" + [(set ( match_operand:GPI 0 "register_operand" "=q,q, r, r, r, r, r, r,r") + (COMMUTATIVE:GPI (match_operand:GPI 1 "nonmemory_operand" "%0,q, 0, 0, r,U06S0,S12S0,S32S0,r") + (match_operand:GPI 2 "nonmemory_operand" " q,0,rU06S0,S12S0,rU06S0, r, 0, r,S32S0")))] + "register_operand (operands[1], mode) + || register_operand (operands[2], mode)" + "@ + %?\\t%0,%1,%2 + %?\\t%0,%2,%1 + %?\\t%0,%1,%2 + %?\\t%0,%1,%2 + %?\\t%0,%1,%2 + %?\\t%0,%2,%1 + %?\\t%0,%2,%1 + %?\\t%0,%1,%2 + %?\\t%0,%1,%2" + [(set_attr "iscompact" "maybe,maybe,no,no,no,no,no,no,no") + (set_attr "length" "*,*,4,4,4,4,4,8,8") + (set_attr "type" "")] + ) + +;; The zero extend variant of the above +(define_insn "*si3_zextend" + [(set (match_operand:DI 0 "register_operand" "=q,q, r, r, r, r, r, r,r") + (zero_extend:DI + (COMMUTATIVE:SI + (match_operand:SI 1 "nonmemory_operand" "%0,q, 0, 0, r,U06S0,S12S0,S32S0,r") + (match_operand:SI 2 "nonmemory_operand" " q,0,rU06S0,S12S0,rU06S0, r, 0, r,S32S0"))))] + "register_operand (operands[1], SImode) + || register_operand (operands[2], SImode)" + "@ + %?\\t%0,%1,%2 + %?\\t%0,%2,%1 + %?\\t%0,%1,%2 + %?\\t%0,%1,%2 + %?\\t%0,%1,%2 + %?\\t%0,%2,%1 + %?\\t%0,%2,%1 + %?\\t%0,%1,%2 + %?\\t%0,%1,%2" + [(set_attr "iscompact" "maybe,maybe,no,no,no,no,no,no,no") + (set_attr "length" "*,*,4,4,4,4,4,8,8") + (set_attr "type" "")] + ) + +(define_insn "*_insn" + [(set ( match_operand:GPI 0 "register_operand" "=q, r, r, r, r,r") + (ASHIFT:GPI (match_operand:GPI 1 "nonmemory_operand" " 0, 0, 0, r,S32S0,r") + (match_operand:GPI 2 "nonmemory_operand" " q,rU06S0,S12S0,rU06S0, r,S32S0")))] + "register_operand (operands[1], mode) + || register_operand (operands[2], mode)" + "%?\\t%0,%1,%2" + [(set_attr "iscompact" "maybe,no,no,no,no,no") + (set_attr "length" "*,4,4,4,8,8") + (set_attr "type" "")]) + +(define_insn "*zsidi_insn" + [(set (match_operand:DI 0 "register_operand" "=q, r, r, r, r,r") + (zero_extend:DI + (ASHIFT:SI + (match_operand:SI 1 "nonmemory_operand" " 0, 0, 0, r,S32S0,r") + (match_operand:SI 2 "nonmemory_operand" " q,rU06S0,S12S0,rU06S0, r,S32S0"))))] + "register_operand (operands[1], SImode) + || register_operand (operands[2], SImode)" + "%?\\t%0,%1,%2" + [(set_attr "iscompact" "yes,no,no,no,no,no") + (set_attr "length" "*,4,4,4,8,8") + (set_attr "type" "")]) + +(define_insn "*_cmp0" + [(set (reg:CC_ZN CC_REGNUM) + (compare:CC_ZN + (ASHIFT:GPI + (match_operand:GPI 1 "register_operand" " 0,0") + (match_operand:GPI 2 "nonmemory_operand" "rU06S0,S32S0")) + (const_int 0))) + (set (match_operand:GPI 0 "register_operand" "= r,r") + (ASHIFT:GPI (match_dup 1) (match_dup 2)))] + "" + ".f\\t%0,%1,%2" + [(set_attr "iscompact" "no") + (set_attr "length" "4,8") + (set_attr "type" "")]) + +(define_insn "*_cmp0_noout" + [(set (reg:CC_ZN CC_REGNUM) + (compare:CC_ZN + (ASHIFT:GPI + (match_operand:GPI 0 "register_operand" " r,r") + (match_operand:GPI 1 "nonmemory_operand" "rU06S0,S32S0")) + (const_int 0)))] + "" + ".f\\t0,%0,%1" + [(set_attr "iscompact" "no") + (set_attr "length" "4,8") + (set_attr "type" "")]) + + +(define_insn "*sub_insn" + [(set ( match_operand:GPI 0 "register_operand" "=q, q, r, r, r, r, r, r, r,r") + (minus:GPI (match_operand:GPI 1 "nonmemory_operand" " 0, 0, 0,rU06S0, 0, r,U06S0,S12S0,S32S0,r") + (match_operand:GPI 2 "nonmemory_operand" " q,U05S0,rU06Sx, 0,S12S0,rU06Sx, r, 0, r,S32S0")))] + "register_operand (operands[1], mode) + || register_operand (operands[2], mode)" + "@ + sub%?\\t%0,%1,%2 + sub%?\\t%0,%1,%2 + sub%s2%?\\t%0,%1,%S2 + rsub%?\\t%0,%2,%1 + sub%?\\t%0,%1,%2 + sub%s2%?\\t%0,%1,%S2 + rsub%?\\t%0,%2,%1 + rsub%?\\t%0,%2,%1 + sub%?\\t%0,%1,%2 + sub%?\\t%0,%1,%2" + [(set_attr "iscompact" "yes,maybe,no,no,no,no,no,no,no,no") + (set_attr "length" "2,*,4,4,4,4,4,4,8,8") + (set_attr "type" "sub")] + ) + +;; zero extend of the above +(define_insn "*subsi3r_zextend" + [(set (match_operand:DI 0 "register_operand" "=q,r") + (zero_extend:DI + (minus:SI + (match_operand:SI 1 "register_operand" " 0,r") + (match_operand:SI 2 "register_operand" " q,r"))))] + "" + "sub%?\\t%0,%1,%2" + [(set_attr "iscompact" "yes,no") + (set_attr "length" "2,4") + (set_attr "type" "sub")] + ) + +(define_insn "*add_insn" + [(set ( match_operand:GPI 0 "register_operand" "=q, q,q, r, r, r, r, r,r") + (plus:GPI (match_operand:GPI 1 "register_operand" "%0, 0,q, 0, 0, 0, r, r,r") + (match_operand:GPI 2 "nonmemory_operand" " q,qh,q,rU06Sx,N06Sx,S12Sx,rU06Sx,N06Sx,S32S0")))] + "register_operand (operands[1], mode) + || register_operand (operands[2], mode)" + "@ + add%?\\t%0,%1,%2 + add%?\\t%0,%1,%2 + add%?\\t%0,%1,%2 + add%s2%?\\t%0,%1,%S2 + sub%s2%?\\t%0,%1,%N2 + add%s2%?\\t%0,%1,%S2 + add%s2%?\\t%0,%1,%S2 + sub%s2%?\\t%0,%1,%N2 + add%?\\t%0,%1,%2" + [(set_attr "iscompact" "yes,maybe,maybe,no,no,no,no,no,no") + (set_attr "length" "2,*,*,4,4,4,4,4,8") + (set_attr "type" "add")] + ) + +;; zero extend of the above +(define_insn "*addsi3_zextend" + [(set (match_operand:DI 0 "register_operand" "=q, q,q, r, r, r, r, r,r") + (zero_extend:DI + (plus:SI (match_operand:SI 1 "register_operand" "%0, 0,q, 0, 0, 0, r, r,r") + (match_operand:SI 2 "nonmemory_operand" " q,qh,q,rU06Sx,N06Sx,S12Sx,rU06Sx,N06Sx,S32S0"))))] + "register_operand (operands[1], SImode) + || register_operand (operands[2], SImode)" + "@ + add%?\\t%0,%1,%2 + add%?\\t%0,%1,%2 + add%?\\t%0,%1,%2 + add%s2%?\\t%0,%1,%S2 + sub%s2%?\\t%0,%1,%N2 + add%s2%?\\t%0,%1,%S2 + add%s2%?\\t%0,%1,%S2 + sub%s2%?\\t%0,%1,%N2 + add%?\\t%0,%1,%2" + [(set_attr "iscompact" "yes,maybe,maybe,no,no,no,no,no,no") + (set_attr "length" "2,*,*,4,4,4,4,4,8") + (set_attr "type" "add")]) + +;; This pattern is needed because the GT (pnz) is not reversible and I +;; cannot convert CCmode to CC_ZNmode. +(define_insn "*3_f" + [(set (reg:CC CC_REGNUM) + (compare:CC + (ADDSUB:GPI + (match_operand:GPI 1 "arc64_nonmem_operand" "0, 0, 0, r,r,S32S0, r") + (match_operand:GPI 2 "arc64_nonmem_operand" "r,U06S0,S12S0,U06S0,r, r,S32S0")) + (const_int 0))) + (set (match_operand:GPI 0 "register_operand" "=r, r, r, r,r, r, r") + (ADDSUB:GPI (match_dup 1) (match_dup 2)))] + "register_operand (operands[1], mode) + || register_operand (operands[2], mode)" + ".f\\t%0,%1,%2" + [(set_attr "predicable" "yes,yes,no,no,no,no,no") + (set_attr "length" "4,4,4,4,4,8,8") + (set_attr "type" "")]) + +;; Arithmetic patterns used by the combiner. +(define_insn "*bic3" + [(set ( match_operand:GPI 0 "register_operand" "=q,r,r, r") + (and:GPI (not:GPI (match_operand:GPI 1 "register_operand" "q,r,r, r")) + ( match_operand:GPI 2 "nonmemory_operand" "0,0,r,S32S0")))] + "" + "bic%?\\t%0,%2,%1" + [(set_attr "iscompact" "maybe,no,no,no") + (set_attr "predicable" "no,yes,no,no") + (set_attr "length" "*,4,4,8") + (set_attr "type" "bic")]) + +(define_insn "*bic3_cmp0" + [(set (reg:CC_ZN CC_REGNUM) + (compare:CC_ZN + (and:GPI + (not:GPI (match_operand:GPI 1 "register_operand" "r,r, r")) + (match_operand:GPI 2 "nonmemory_operand" "0,r,S32S0")) + (const_int 0))) + (set (match_operand:GPI 0 "register_operand" "=r,r,r") + (and:GPI (not:GPI (match_dup 1)) (match_dup 2)))] + "" + "bic%?.f\\t%0,%2,%1" + [(set_attr "iscompact" "no,no,no") + (set_attr "predicable" "yes,no,no") + (set_attr "length" "4,4,8") + (set_attr "type" "bic")]) + +(define_insn "*bic3_cmp0_noout" + [(set (reg:CC_ZN CC_REGNUM) + (compare:CC_ZN + (and:GPI + (not:GPI (match_operand:GPI 0 "register_operand" "r,r")) + (match_operand:GPI 1 "nonmemory_operand" "r,S32S0")) + (const_int 0)))] + "" + "bic.f\\t0,%1,%0" + [(set_attr "iscompact" "no,no") + (set_attr "predicable" "no,no") + (set_attr "length" "4,8") + (set_attr "type" "bic")]) + +(define_insn "*3" + [(set (match_operand:GPI 0 "register_operand" "=r,r,r") + (BIT:GPI + (ashift:GPI + (const_int 1) + (match_operand:GPI 1 "register_operand" "r,r,r")) + (match_operand:GPI 2 "nonmemory_operand" "0,r,S32S0")))] + "" + "%?\\t%0,%2,%1" + [(set_attr "type" "") + (set_attr "iscompact" "no") + (set_attr "length" "4,4,8") + (set_attr "predicable" "yes,no,no")]) + +(define_insn "*bset3_cmp0" + [(set (reg:CC_ZN CC_REGNUM) + (compare:CC_ZN + (BIT:GPI + (ashift:GPI + (const_int 1) + (match_operand:GPI 1 "register_operand" "r,r,r")) + (match_operand:GPI 2 "nonmemory_operand" "0,r,S32S0")) + (const_int 0))) + (set (match_operand:GPI 0 "register_operand" "=r,r,r") + (BIT:GPI + (ashift:GPI + (const_int 1) + (match_dup 1)) + (match_dup 2)))] + "" + "%?.f\\t%0,%2,%1" + [(set_attr "type" "") + (set_attr "iscompact" "no") + (set_attr "length" "4,4,8") + (set_attr "predicable" "yes,no,no")]) + +(define_insn "*bset3_cmp0_noout" + [(set (reg:CC_ZN CC_REGNUM) + (compare:CC_ZN + (BIT:GPI + (ashift:GPI + (const_int 1) + (match_operand:GPI 0 "register_operand" "r,r")) + (match_operand:GPI 1 "nonmemory_operand" "r,S32S0")) + (const_int 0)))] + "" + ".f\\t0,%1,%0" + [(set_attr "type" "") + (set_attr "iscompact" "no") + (set_attr "length" "4,8") + (set_attr "predicable" "no,no")]) + +(define_insn "_cmp0" + [(set (reg:CC_ZN CC_REGNUM) + (compare:CC_ZN + (COMMUTATIVEF:GPI + (match_operand:GPI 1 "nonmemory_operand" "% 0, 0, r,U06S0,S12S0,S32S0,r") + (match_operand:GPI 2 "nonmemory_operand" " rU06S0,S12S0,rU06S0, r, 0, r,S32S0")) + (const_int 0))) + (set (match_operand:GPI 0 "register_operand" "= r, r, r, r, r, r,r") + (COMMUTATIVEF:GPI (match_dup 1) (match_dup 2)))] + "register_operand (operands[1], mode) + || register_operand (operands[2], mode)" + "@ + %?.f\\t%0,%1,%2 + %?.f\\t%0,%1,%2 + %?.f\\t%0,%1,%2 + %?.f\\t%0,%2,%1 + %?.f\\t%0,%2,%1 + %?.f\\t%0,%1,%2 + %?.f\\t%0,%1,%2" + [(set_attr "iscompact" "no,no,no,no,no,no,no") + (set_attr "predicable" "yes,no,no,no,no,no,no") + (set_attr "length" "4,4,4,4,4,8,8") + (set_attr "type" "")] + ) + +;; It may be worth to have a separate pattern for AND to take +;; advantage of TST_S instruction. +(define_insn "*_cmp0_noout" + [(set (reg:CC_ZN CC_REGNUM) + (compare:CC_ZN + (COMMUTATIVEF:GPI + (match_operand:GPI 0 "nonmemory_operand" "% r,U06S0,S32S0,r") + (match_operand:GPI 1 "nonmemory_operand" " rU06S0, r, r,S32S0")) + (const_int 0)))] + "register_operand (operands[0], mode) + || register_operand (operands[1], mode)" + "@ + .f\\t0,%0,%1 + .f\\t0,%1,%0 + .f\\t0,%0,%1 + .f\\t0,%0,%1" + [(set_attr "iscompact" "no") + (set_attr "predicable" "no") + (set_attr "length" "4,4,8,8") + (set_attr "type" "")] + ) + +(define_insn "*sub_insn_cmp0" + [(set (reg:CC_ZN CC_REGNUM) + (compare:CC_ZN + (minus:GPI (match_operand:GPI 1 "nonmemory_operand" " 0, r,U06S0,S12S0,S32S0,r") + (match_operand:GPI 2 "nonmemory_operand" "S12S0,rU06Sx, r, 0, r,S32S0")) + (const_int 0))) + (set ( match_operand:GPI 0 "register_operand" "= r, r, r, r, r,r") + (minus:GPI (match_dup 1) (match_dup 2)))] + "register_operand (operands[1], mode) + || register_operand (operands[2], mode)" + "@ + sub.f\\t%0,%1,%2 + sub%s2.f\\t%0,%1,%S2 + rsub.f\\t%0,%2,%1 + rsub.f\\t%0,%2,%1 + sub.f\\t%0,%1,%2 + sub.f\\t%0,%1,%2" + [(set_attr "iscompact" "no") + (set_attr "length" "4,4,4,4,8,8") + (set_attr "type" "sub")] + ) + +(define_insn "*sub_insn_cmp0_noout" + [(set (reg:CC_ZN CC_REGNUM) + (compare:CC_ZN + (minus:GPI (match_operand:GPI 0 "nonmemory_operand" " r,U06S0,S32S0,r") + (match_operand:GPI 1 "nonmemory_operand" "rU06Sx, r, r,S32S0")) + (const_int 0)))] + "register_operand (operands[0], mode) + || register_operand (operands[1], mode)" + "@ + sub%s1.f\\t0,%0,%S1 + rsub.f\\t0,%1,%0 + sub.f\\t0,%0,%1 + sub.f\\t0,%0,%1" + [(set_attr "iscompact" "no") + (set_attr "length" "4,4,8,8") + (set_attr "type" "sub")] + ) + +(define_insn "*si2_cmp0_noout" + [(set (reg:CC_ZN CC_REGNUM) + (compare:CC_ZN + (ANY_EXTEND:SI (match_operand:SHORT 0 "register_operand" "r")) + (const_int 0)))] + "" + ".f\\t0,%0" + [(set_attr "type" "") + (set_attr "length" "4")]) + +(define_insn "*extenddi2_cmp0_noout" + [(set (reg:CC_ZN CC_REGNUM) + (compare:CC_ZN + (sign_extend:DI (match_operand:EXT 0 "register_operand" "r")) + (const_int 0)))] + "" + "sexl.f\\t0,%0" + [(set_attr "type" "sex") + (set_attr "length" "4")]) + +(define_insn "*si_cmp0" + [(set (reg:CC_ZN CC_REGNUM) + (compare:CC_ZN + (ANY_EXTEND:SI (match_operand:SHORT 1 "register_operand" "r")) + (const_int 0))) + (set (match_operand:SI 0 "register_operand" "=r") + (ANY_EXTEND:SI (match_dup 1)))] + "" + ".f\\t%0,%1" + [(set_attr "type" "") + (set_attr "length" "4")]) + +(define_insn "*extenddi_cmp0" + [(set (reg:CC_ZN CC_REGNUM) + (compare:CC_ZN + (sign_extend:DI (match_operand:EXT 1 "register_operand" "r")) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (sign_extend:DI (match_dup 1)))] + "" + "sexl.f\\t%0,%1" + [(set_attr "type" "sex") + (set_attr "length" "4")]) + +(define_insn "*btst" + [(set (reg:CC_ZN CC_REGNUM) + (compare:CC_ZN (zero_extract:GPI + (match_operand:GPI 0 "register_operand" "q,r") + (const_int 1) + (match_operand 1 "const_int_operand" "U05S0,S12S0")) + (const_int 0)))] + "" + "btst%?\\t%0,%1" + [(set_attr "type" "btst") + (set_attr "length" "*,4") + (set_attr "iscompact" "maybe,no") + (set_attr "cost" "2,4")]) + +;; SI/DI DIV/REM instructions. +(define_expand "3" + [(set (match_operand:GPI 0 "register_operand") + (DIVREM:GPI (match_operand:GPI 1 "nonmemory_operand") + (match_operand:GPI 2 "nonmemory_operand")))] + "TARGET_ARC64_DIVREM" + { + if (mode == DImode + && !register_operand (operands[2], DImode)) + operands[2] = force_reg (DImode, operands[2]); + if (mode == DImode + && !register_operand (operands[1], DImode)) + operands[1] = force_reg (DImode, operands[1]); + } + ) + +(define_insn "*3" + [(set (match_operand:GPI 0 "register_operand" "=r, r, r, r,r, r, r") + (DIVREM:GPI (match_operand:GPI 1 "arc64_nonmem_operand" " 0, 0, 0, r,r,S32S0, r") + (match_operand:GPI 2 "arc64_nonmem_operand" " r,U06S0,S12S0,U06S0,r, r,S32S0")))] + "TARGET_ARC64_DIVREM + && (register_operand (operands[1], mode) + || register_operand (operands[2], mode))" + "%?\\t%0,%1,%2" + [(set_attr "length" "4,4,4,4,4,8,8") + (set_attr "type" "")]) + +(define_insn "*3_cmp0" + [(set (reg:CC_ZN CC_REGNUM) + (compare:CC_ZN + (DIVREM:GPI + (match_operand:GPI 1 "arc64_nonmem_operand" " 0,r,S32S0, r") + (match_operand:GPI 2 "arc64_nonmem_operand" " r,r, r,S32S0")) + (const_int 0))) + (set (match_operand:GPI 0 "register_operand" "=r,r, r, r") + (DIVREM:GPI (match_dup 1) + (match_dup 2)))] + "TARGET_ARC64_DIVREM + && (register_operand (operands[1], mode) + || register_operand (operands[2], mode))" + ".f\\t%0,%1,%2" + [(set_attr "length" "4,4,8,8") + (set_attr "type" "")]) + +(define_insn "*3_cmp0_noout" + [(set (reg:CC_ZN CC_REGNUM) + (compare:CC_ZN + (DIVREM:GPI + (match_operand:GPI 0 "arc64_nonmem_operand" "r,S32S0, r") + (match_operand:GPI 1 "arc64_nonmem_operand" "r, r,S32S0")) + (const_int 0)))] + "TARGET_ARC64_DIVREM + && (register_operand (operands[0], mode) + || register_operand (operands[1], mode))" + ".f\\t0,%0,%1" + [(set_attr "length" "4,8,8") + (set_attr "type" "")]) + +;; To be merged into adddi3 +(define_insn "*add_tls_off" + [(set (match_operand:P 0 "register_operand" "=r") + (plus:P (match_operand:P 1 "register_operand" "r") + (unspec:P [(match_operand 2 "" "")] + ARC64_UNSPEC_TLS_OFF)))] + "" + "add\\t%0,%1,%2@tpoff" + [(set_attr "type" "add") + (set_attr "length" "8")] + ) + +(define_insn "sub3_cmp" + [(set (reg:CC CC_REGNUM) + (compare:CC + (match_operand:GPI 1 "arc64_nonmem_operand" " 0, r,r,S32S0, r") + (match_operand:GPI 2 "arc64_nonmem_operand" "S12S0,U06S0,r, r,S32S0"))) + (set (match_operand:GPI 0 "register_operand" "=r, r,r, r, r") + (minus:GPI (match_dup 1) (match_dup 2)))] + "register_operand (operands[1], mode) + || register_operand (operands[2], mode)" + "sub.f\\t%0,%1,%2" + [(set_attr "length" "4,4,4,8,8") + (set_attr "type" "sub")] + ) + +(define_insn "add3_cmp" + [(set (match_operand 3 "cc_register" "") + (match_operator 4 "cc_compare_operator" + [(plus:GPI + (match_operand:GPI 1 "arc64_nonmem_operand" " 0, r,r,S32S0, r") + (match_operand:GPI 2 "arc64_nonmem_operand" "S12S0,U06S0,r, r,S32S0")) + (match_dup 1)])) + (set (match_operand:GPI 0 "register_operand" "=r, r,r, r, r") + (plus:GPI (match_dup 1) (match_dup 2)))] + "register_operand (operands[1], mode) + || register_operand (operands[2], mode)" + "add.f\\t%0,%1,%2" + [(set_attr "length" "4,4,4,8,8") + (set_attr "type" "add")]) + +;; Extending this pattern to handle CCmode, we need to match GEU code +;; also. +(define_insn "add3_carry" + [(set (match_operand:GPI 0 "register_operand" "=r") + (plus:GPI + (plus:GPI (match_operand:GPI 1 "register_operand" "r") + (match_operand:GPI 2 "register_operand" "r")) + (ltu:GPI (reg:CC_C CC_REGNUM) (const_int 0))))] + "" + "adc\\t%0,%1,%2" + [(set_attr "type" "adc") + (set_attr "length" "4")]) + +;; Extending this pattern to handle Cmode, we need to match GEU code +;; also. +(define_insn "sub3_carry" + [(set (match_operand:GPI 0 "register_operand" "=r") + (minus:GPI + (minus:GPI (match_operand:GPI 1 "register_operand" "r") + (match_operand:GPI 2 "register_operand" "r")) + (ltu:GPI (reg:CC CC_REGNUM) (const_int 0))))] + "" + "sbc\\t%0,%1,%2" + [(set_attr "type" "sbc") + (set_attr "length" "4")]) + +(define_expand "add3_Ccmp" + [(parallel + [(set (reg:CC_C CC_REGNUM) + (compare:CC_C + (plus:GPI + (match_operand:GPI 1 "arc64_nonmem_operand") + (match_operand:GPI 2 "arc64_nonmem_operand")) + (match_dup 1))) + (set (match_operand:GPI 0 "register_operand") + (plus:GPI (match_dup 1) (match_dup 2)))])] + "" + ) + +(define_expand "sub3_Ccmp" + [(parallel + [(set (reg:CC CC_REGNUM) + (compare:CC + (match_operand:GPI 1 "arc64_nonmem_operand") + (match_operand:GPI 2 "arc64_nonmem_operand"))) + (set (match_operand:GPI 0 "register_operand") + (minus:GPI (match_dup 1) (match_dup 2)))])] + "" + ) + +(define_expand "3" + [(set (match_operand:DBLI 0 "register_operand") + (ADDSUB:DBLI (match_operand:DBLI 1 "register_operand") + (match_operand:DBLI 2 "nonmemory_operand")))] + "" +{ + rtx low_dest, op1_low, op2_low, high_dest, op1_high, op2_high; + + if (GET_MODE_SIZE (mode) == (UNITS_PER_WORD * 2)) + { + high_dest = gen_highpart (mode, operands[0]); + low_dest = gen_lowpart (mode, operands[0]); + op1_high = gen_highpart (mode, operands[1]); + op1_low = gen_lowpart (mode, operands[1]); + op2_high = gen_highpart_mode (mode, mode, operands[2]); + op2_low = gen_lowpart (mode, operands[2]); + + emit_insn (gen_3_Ccmp (low_dest, op1_low, + force_reg (mode, op2_low))); + emit_insn (gen_3_carry (high_dest, op1_high, + force_reg (mode, op2_high))); + + DONE; + } + else if (!register_operand (operands[2], mode) + && !satisfies_constraint_S32S0 (operands[2])) + operands[2] = force_reg (mode, operands[2]); + +}) + +;; Shifted adds and subs +(define_insn "*add_shift" + [(set (match_operand:GPI 0 "register_operand" "=q,r,r,r") + (plus:GPI + (ashift:GPI (match_operand:GPI 1 "register_operand" "q,r,r,r") + (match_operand:GPI 2 "_1_2_3_operand" "")) + (match_operand:GPI 3 "arc64_regsym_operand" "0,0,r,S32S0SymMV")))] + "" + "add%2%?\\t%0,%3,%1" + [(set_attr "type" "add") + (set_attr "length" "*,4,4,8") + (set_attr "iscompact" "maybe,no,no,no")]) + +(define_insn "*addzsidi_shift" + [(set (match_operand:DI 0 "register_operand" "=q,r,r,r") + (zero_extend:DI + (plus:SI + (ashift:SI (match_operand:SI 1 "register_operand" "q,r,r,r") + (match_operand:SI 2 "_1_2_3_operand" "")) + (match_operand:SI 3 "arc64_regsym_operand" "0,0,r,S32S0SymMV"))))] + "" + "add%2%?\\t%0,%3,%1" + [(set_attr "type" "add") + (set_attr "length" "*,4,4,8") + (set_attr "iscompact" "yes,no,no,no")]) + +(define_insn "*addx_cmp0" + [(set (reg:CC_ZN CC_REGNUM) + (compare:CC_ZN + (plus:GPI (ashift:GPI (match_operand:GPI 1 "register_operand" "r,r,r") + (match_operand:GPI 2 "_1_2_3_operand" "")) + (match_operand:GPI 3 "arc64_regsym_operand" "0,r,S32S0SymMV")) + (const_int 0))) + (set (match_operand:GPI 0 "register_operand" "=r,r,r") + (plus:GPI (ashift:GPI (match_dup 1) (match_dup 2)) + (match_dup 3)))] + "" + "add%2%?.f\\t%0,%3,%1" + [(set_attr "type" "add") + (set_attr "length" "4,4,8") + (set_attr "predicable" "yes,no,no")]) + +(define_insn "*addx_cmp0_noout" + [(set (reg:CC_ZN CC_REGNUM) + (compare:CC_ZN + (plus:GPI (ashift:GPI (match_operand:GPI 0 "register_operand" "r,r,r") + (match_operand:GPI 1 "_1_2_3_operand" "")) + (match_operand:GPI 2 "arc64_regsym_operand" "0,r,S32S0SymMV")) + (const_int 0)))] + "" + "add%1%?.f\\t0,%2,%1" + [(set_attr "type" "add") + (set_attr "length" "4,4,8") + (set_attr "predicable" "yes,no,no")]) + +(define_insn "*sub_shift" + [(set (match_operand:GPI 0 "register_operand" "=r,r,r") + (minus:GPI (match_operand:GPI 1 "arc64_regsym_operand" "0,r,S32S0SymMV") + (ashift:GPI (match_operand:GPI 2 "register_operand" "r,r,r") + (match_operand:GPI 3 "_1_2_3_operand" ""))))] + "" + "sub%3\\t%0,%1,%2" + [(set_attr "type" "sub") + (set_attr "length" "4,4,8") + (set_attr "predicable" "yes,no,no")]) + +(define_insn "*add_mult" + [(set (match_operand:GPI 0 "register_operand" "=q,r,r") + (plus:GPI + (mult:GPI (match_operand:GPI 1 "register_operand" "q,r,r") + (match_operand:GPI 2 "_2_4_8_operand" "")) + (match_operand:GPI 3 "arc64_regsym_operand" "0,r,S32S0SymMV")))] + "" + "add%s2%?\\t%0,%3,%1" + [(set_attr "type" "add") + (set_attr "length" "*,4,8") + (set_attr "iscompact" "maybe,no,no")]) + +(define_insn "*add_mult2" + [(set (match_operand:GPI 0 "register_operand" "=q,r,r") + (plus:GPI + (match_operand:GPI 1 "nonmemory_operand" "0,r,S32S0") + (mult:GPI (match_operand:GPI 2 "register_operand" "q,r,r") + (match_operand:GPI 3 "_2_4_8_operand" ""))))] + "" + "add%s3%?\\t%0,%1,%2" + [(set_attr "type" "add") + (set_attr "length" "*,4,8") + (set_attr "iscompact" "maybe,no,no")]) + +;; Multiplications + +(define_expand "mulhisi3" + [(set (match_operand:SI 0 "register_operand") + (mult:SI (ANY_EXTEND:SI (match_operand:HI 1 "register_operand")) + (ANY_EXTEND:SI (match_operand:HI 2 "nonmemory_operand"))))] + "" + " + if (CONSTANT_P (operands[2])) + { + operands[2] = force_reg (HImode, operands[2]); + } + " + ) + +(define_insn "*mulhisi3r" + [(set (match_operand:SI 0 "register_operand" "=q,r,r") + (mult:SI (ANY_EXTEND:SI (match_operand:HI 1 "register_operand" "%0,0,r")) + (ANY_EXTEND:SI (match_operand:HI 2 "register_operand" "q,r,r"))))] + "" + "mpyw%?\\t%0,%1,%2" + [(set_attr "length" "*,4,4") + (set_attr "iscompact" "maybe,no,no") + (set_attr "type" "mpy") + (set_attr "predicable" "yes,yes,no") + ]) + +(define_insn "*mulhisi3rze" + [(set (match_operand:DI 0 "register_operand" "=q,r,r") + (zero_extend:DI + (mult:SI (ANY_EXTEND:SI (match_operand:HI 1 "register_operand" "%0,0,r")) + (ANY_EXTEND:SI (match_operand:HI 2 "register_operand" "q,r,r")))))] + "" + "mpyw%?\\t%0,%1,%2" + [(set_attr "length" "*,4,4") + (set_attr "iscompact" "maybe,no,no") + (set_attr "type" "mpy")]) + +(define_insn "mulhisi3i" + [(set (match_operand:SI 0 "register_operand" "=r, r, r, r,accrn,r") + (mult:SI + (sign_extend:SI + (match_operand:HI 1 "register_operand" "%0, r, 0, 0, r,r")) + (match_operand:HI 2 "short_immediate_operand" "U06S0,U06S0,S12S0,S16S0,S16S0,S16S0")))] + "" + "@ + mpyw%?\\t%0,%1,%2 + mpyw%?\\t%0,%1,%2 + mpyw%?\\t%0,%1,%2 + mpyw%?\\t%0,%1,%2 + dmpyh\\t0,%1,%2 + mpyw%?\\t%0,%1,%2" + [(set_attr "length" "4,4,4,8,8,8") + (set_attr "type" "mpy") + (set_attr "predicable" "yes,no,no,yes,no,no")]) + +(define_insn "umulhisi3i" + [(set (match_operand:SI 0 "register_operand" "=r, r, r, r,accrn,r") + (mult:SI + (zero_extend:SI + (match_operand:HI 1 "register_operand" "%0, r, 0, 0, r,r")) + (match_operand:HI 2 "unsign_immediate_operand" "U06S0,U06S0,U12S0,U16S0,U16S0,U16S0")))] + "" + "@ + mpyuw%?\\t%0,%1,%2 + mpyuw%?\\t%0,%1,%2 + mpyuw%?\\t%0,%1,%2 + mpyuw%?\\t%0,%1,%2 + dmpyhu\\t0,%1,%2 + mpyuw%?\\t%0,%1,%2" + [(set_attr "length" "4,4,4,8,8,8") + (set_attr "type" "mpy") + (set_attr "predicable" "yes,no,no,yes,no,no")]) + +;faulty;(define_insn "mulhisi3ize" +;faulty; [(set (match_operand:DI 0 "register_operand" "=r, r, r,r,r") +;faulty; (zero_extend:DI +;faulty; (mult:SI (ANY_EXTEND:SI +;faulty; (match_operand:HI 1 "register_operand" "%0, r, 0,0,r")) +;faulty; (match_operand:HI 2 "immediate_operand" "U06S0,U06S0,S12S0,i,i"))))] +;faulty; "" +;faulty; "mpyw%?\\t%0,%1,%2" +;faulty; [(set_attr "length" "4,4,4,8,8") +;faulty; (set_attr "type" "mpy") +;faulty; (set_attr "predicable" "yes,no,no,yes,no")]) + +(define_insn "*mul3" + [(set (match_operand:GPI 0 "register_operand" "=q,q, r, r, r, r, accrn, r") + (mult:GPI (match_operand:GPI 1 "register_operand" "%0,q, 0, r, 0, 0, r, r") + (match_operand:GPI 2 "nonmemory_operand" "q,0,rU06S0,rU06S0,S12S0,S32S0,S32S0r,S32S0")))] + "" + "@ + mpy%?\\t%0,%1,%2 + mpy%?\\t%0,%2,%1 + mpy%?\\t%0,%1,%2 + mpy%?\\t%0,%1,%2 + mpy%?\\t%0,%1,%2 + mpy%?\\t%0,%1,%2 + mpyd%?\\t0,%1,%2 + mpy%?\\t%0,%1,%2" + [(set_attr "length" "*,*,4,4,4,8,8,8") + (set_attr "iscompact" "maybe,maybe,no,no,no,no,no,no") + (set_attr "type" "mpy") + (set_attr "predicable" "no,no,yes,no,no,yes,no,no")]) + +(define_insn "*mulsi3ze" + [(set (match_operand:DI 0 "register_operand" "=q,q, r, r, r, r, r") + (zero_extend:DI + (mult:SI + (match_operand:SI 1 "register_operand" "%0,q, 0, r, 0, 0, r") + (match_operand:SI 2 "nonmemory_operand" "q,0,rU06S0,rU06S0,S12S0,S32S0,S32S0"))))] + "" + "@ + mpy%?\\t%0,%1,%2 + mpy%?\\t%0,%2,%1 + mpy%?\\t%0,%1,%2 + mpy%?\\t%0,%1,%2 + mpy%?\\t%0,%1,%2 + mpy%?\\t%0,%1,%2 + mpy%?\\t%0,%1,%2" + [(set_attr "length" "*,*,4,4,4,8,8") + (set_attr "iscompact" "yes,yes,no,no,no,no,no") + (set_attr "type" "mpy") + (set_attr "predicable" "no,no,yes,no,no,yes,no")]) + +(define_insn "*mulsi3_cmp0" + [(set (reg:CC_Z CC_REGNUM) + (compare:CC_Z + (mult:SI + (match_operand:SI 1 "register_operand" "%r, 0,r") + (match_operand:SI 2 "nonmemory_operand" "rU06S0,S12S0,i")) + (const_int 0))) + (set (match_operand:SI 0 "register_operand" "=r, r,r") + (mult:SI (match_dup 1) (match_dup 2)))] + "" + "mpy%?.f\\t%0,%1,%2" + [(set_attr "length" "4,4,8") + (set_attr "type" "mpy")]) + +(define_insn "*mulsi3_cmp0_noout" + [(set (reg:CC_Z CC_REGNUM) + (compare:CC_Z + (mult:SI + (match_operand:SI 0 "register_operand" "% r, r,r") + (match_operand:SI 1 "nonmemory_operand" "rU06S0,S12S0,i")) + (const_int 0)))] + "" + "mpy%?.f\\t0,%0,%1" + [(set_attr "length" "4,4,8") + (set_attr "type" "mpy")]) + +(define_insn "mulsi3_highpart" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (truncate:SI + (lshiftrt:DI + (mult:DI + (ANY_EXTEND:DI (match_operand:SI 1 "register_operand" "%0,r")) + (ANY_EXTEND:DI (match_operand:SI 2 "register_operand" "r,r"))) + (const_int 32))))] + "" + "mpym%?\\t%0,%1,%2" + [(set_attr "length" "4") + (set_attr "type" "mpy") + (set_attr "predicable" "yes,no")]) + +(define_insn "muldi3_highpart" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (truncate:DI + (lshiftrt:TI + (mult:TI + (ANY_EXTEND:TI (match_operand:DI 1 "register_operand" "%0,r")) + (ANY_EXTEND:TI (match_operand:DI 2 "register_operand" "r,r"))) + (const_int 64))))] + "TARGET_64BIT" + "mpyml%?\\t%0,%1,%2" + [(set_attr "type" "mpyl") + (set_attr "length" "4") + (set_attr "predicable" "yes,no")]) + +(define_expand "mulditi3" + [(set (match_operand:TI 0 "register_operand") + (mult:TI (ANY_EXTEND:TI (match_operand:DI 1 "register_operand")) + (ANY_EXTEND:TI (match_operand:DI 2 "register_operand"))))] + "TARGET_64BIT" +{ + rtx low = gen_reg_rtx (DImode); + emit_insn (gen_muldi3 (low, operands[1], operands[2])); + + rtx high = gen_reg_rtx (DImode); + emit_insn (gen_muldi3_highpart (high, operands[1], operands[2])); + + emit_move_insn (gen_lowpart (DImode, operands[0]), low); + emit_move_insn (gen_highpart (DImode, operands[0]), high); + DONE; +}) + +(define_expand "usmulditi3" + [(set (match_operand:TI 0 "register_operand") + (mult:TI (zero_extend:TI (match_operand:DI 1 "register_operand")) + (sign_extend:TI (match_operand:DI 2 "register_operand"))))] + "TARGET_64BIT" +{ + rtx low = gen_reg_rtx (DImode); + emit_insn (gen_muldi3 (low, operands[1], operands[2])); + + rtx high = gen_reg_rtx (DImode); + emit_insn (gen_usmuldi3_highpart (high, operands[1], operands[2])); + + emit_move_insn (gen_lowpart (DImode, operands[0]), low); + emit_move_insn (gen_highpart (DImode, operands[0]), high); + DONE; +}) + +(define_insn "usmuldi3_highpart" + [(set (match_operand:DI 0 "register_operand" "=r") + (truncate:DI + (lshiftrt:TI + (mult:TI (zero_extend:TI + (match_operand:DI 1 "register_operand" "r")) + (sign_extend:TI + (match_operand:DI 2 "register_operand" " r"))) + (const_int 64))))] + "" + "mpymsul\t%0,%2,%1" + [(set_attr "type" "mpyl") + (set_attr "length" "4")]) + + +;; 32 x 32 -> 64 (signed/unsigned) Triggers FAIL: c-c++-common/torture/builtin-arith-overflow-12.c +(define_expand "mulsidi3" + [(parallel [(set (match_operand:DI 0 "register_operand") + (mult:DI + (ANY_EXTEND:DI (match_operand:SI 1 "register_operand")) + (ANY_EXTEND:DI (match_operand:SI 2 "nonmemory_operand")))) + (clobber (reg:DI R58_REGNUM))])] + "TARGET_SIMD" + " + if (CONSTANT_P (operands[2])) + { + operands[2] = force_reg (SImode, operands[2]); + } + ") + +(define_insn "*mpyd" + [(set (match_operand:DI 0 "register_operand" "=r") + (mult:DI + (ANY_EXTEND:DI (match_operand:SI 1 "register_operand" "r")) + (ANY_EXTEND:DI (match_operand:SI 2 "register_operand" "r")))) + (clobber (reg:DI R58_REGNUM))] + "TARGET_SIMD" + "mpyd\\t%0,%1,%2" + [(set_attr "length" "4") + (set_attr "type" "mpy")]) + +(define_insn "*mpyd0" + [(set (reg:DI R58_REGNUM) + (mult:DI + (ANY_EXTEND:DI (match_operand:SI 0 "register_operand" "r")) + (ANY_EXTEND:DI (match_operand:SI 1 "register_operand" "r"))))] + "TARGET_SIMD" + "mpyd\\t0,%0,%1" + [(set_attr "length" "4") + (set_attr "type" "mpy")]) + +(define_insn "*mpydi" + [(set (match_operand:DI 0 "register_operand" "=r,r,r") + (mult:DI + (ANY_EXTEND:DI (match_operand:SI 1 "register_operand" "r,0,r")) + (match_operand:SI 2 "signed32b_operand" "U06S0,S12S0,i"))) + (clobber (reg:DI R58_REGNUM))] + "TARGET_SIMD" + "mpyd\\t%0,%1,%2" + [(set_attr "length" "4,4,8") + (set_attr "type" "mpy")]) + +;; 16bit operations using SIMD instructions +;; This gives worst code, keep it here for any other ideas. +;; exth -> add -> exth/extb :: add -> exth/extb +;;(define_insn "hi3" +;; [(set (match_operand:HI 0 "register_operand" "=r,r") +;; (ADDSUB:HI +;; (match_operand:HI 1 "register_operand" "r,r") +;; (match_operand:HI 2 "nonmemory_operand" "r,i")))] +;; "TARGET_SIMD" +;; "@ +;; v2h\\t%0,%1,%2 +;; v2h\\t%0,%1,%2@u32" +;; [(set_attr "length" "4,8") +;; (set_attr "type" "v")]) + + +;; MADD patterns +;; 32 + (signe) 16 x (signe) 16 -> 32 +(define_expand "maddhisi4" + [(set (match_operand: SI 0 "register_operand") + (plus:SI + (mult:SI + (ANY_EXTEND:SI (match_operand:HI 1 "register_operand")) + (ANY_EXTEND:SI (match_operand:HI 2 "register_operand"))) + (match_operand:SI 3 "register_operand")))] + "TARGET_SIMD && TARGET_64BIT" + { + rtx acc = gen_rtx_REG (SImode, R58_REGNUM); + + emit_move_insn (acc, operands[3]); + emit_insn (gen_machi (operands[0], operands[1], + operands[2], acc)); + DONE; + }) + +(define_insn "machi0" + [(set (reg:SI R58_REGNUM) + (plus:SI + (mult:SI + (ANY_EXTEND:SI (match_operand:HI 0 "register_operand" "%r,r")) + (ANY_EXTEND:SI (match_operand:HI 1 "nonmemory_operand" "rU06S0,i"))) + (reg:SI R58_REGNUM)))] + "TARGET_SIMD && TARGET_64BIT" + "vmac2h\\t0,%0,%1" + [(set_attr "length" "4,8") + (set_attr "type" "mac")]) + +;; The second move instruction can be remove, however, we need to add +;; a step that recognizes implicit accumulator reads and writes. +(define_insn_and_split "machi" + [(set (match_operand:SI 0 "register_operand" "=r,r,r,r") + (plus:SI + (mult:SI + (ANY_EXTEND:SI (match_operand:HI 1 "register_operand" "%r, 0,r,r")) + (ANY_EXTEND:SI (match_operand:HI 2 "nonmemory_operand" "rU06S0,S12S0,i,*ri"))) + (match_operand:SI 3 "register_operand" "accum,accum,accum,*r"))) + (clobber (reg:SI R58_REGNUM))] + "TARGET_SIMD && TARGET_64BIT" + "@ + vmac2h\\t%0,%1,%2 + vmac2h\\t%0,%1,%2 + vmac2h\\t%0,%1,%2 + #" + "&& reload_completed && (REGNO (operands[3]) != R58_REGNUM)" + [(set (reg:SI R58_REGNUM) (match_dup 3)) + (set (reg:SI R58_REGNUM) + (plus:SI (mult:SI (ANY_EXTEND:SI (match_dup 1)) + (ANY_EXTEND:SI (match_dup 2))) + (reg:SI R58_REGNUM))) + (set (match_dup 0) (reg:SI R58_REGNUM))] + "" + [(set_attr "length" "4,4,8,8") + (set_attr "type" "mac")]) + +;; 64 + (signe) 32 x (signe) 32 -> 64 +(define_expand "maddsidi4" + [(set (match_operand: DI 0 "register_operand") + (plus:DI + (mult:DI + (ANY_EXTEND:DI (match_operand:SI 1 "register_operand")) + (ANY_EXTEND:DI (match_operand:SI 2 "register_operand"))) + (match_operand:DI 3 "register_operand")))] + "TARGET_SIMD" + { + rtx acc = gen_rtx_REG (DImode, R58_REGNUM); + + emit_move_insn (acc, operands[3]); + emit_insn (gen_macd (operands[0], operands[1], + operands[2], acc)); + DONE; + }) + +(define_insn "macd0" + [(set (reg:DI R58_REGNUM) + (plus:DI + (mult:DI + (ANY_EXTEND:DI (match_operand:SI 0 "register_operand" "%r,r")) + (ANY_EXTEND:DI (match_operand:SI 1 "nonmemory_operand" "rU06S0,i"))) + (reg:DI R58_REGNUM)))] + "TARGET_SIMD" + "macd\\t0,%0,%1" + [(set_attr "length" "4,8") + (set_attr "type" "mac")]) + +(define_insn_and_split "macd" + [(set (match_operand:DI 0 "register_operand" "=r,r,r,r") + (plus:DI + (mult:DI + (ANY_EXTEND:DI (match_operand:SI 1 "register_operand" "%r,0,r,r")) + (ANY_EXTEND:DI (match_operand:SI 2 "nonmemory_operand" "rU06S0,S12S0,i,*ri"))) + (match_operand:DI 3 "register_operand" "accum,accum,accum,*r"))) + (clobber (reg:DI R58_REGNUM))] + "TARGET_SIMD" + "@ + macd\\t%0,%1,%2 + macd\\t%0,%1,%2 + macd\\t%0,%1,%2 + #" + "&& reload_completed && (REGNO (operands[3]) != R58_REGNUM)" + [(set (reg:DI R58_REGNUM) (match_dup 3)) + (parallel + [(set (match_dup 0) + (plus:DI (mult:DI (ANY_EXTEND:DI (match_dup 1)) + (ANY_EXTEND:DI (match_dup 2))) + (reg:DI R58_REGNUM))) + (clobber (reg:DI R58_REGNUM))])] + "" + [(set_attr "length" "4,4,8,8") + (set_attr "type" "mac")]) + +;; This is a combiner pattern: we need to split it in 3 instructions. +;; The second move is propagated to fallowing instructions by +;; cprop_hardreg. Unfortunately, I cannot use a second peephole +;; pattern for merging the left overs from cprop_hardreg back to mac +;; instruction as there is no peephole step following it, thus, we +;; make use of ARC's specific machine reorder step to merge back into +;; MAC instruction the MOV instructions which were not propagated by +;; cprop_hardreg step. + +(define_insn_and_split "macsi" + [(set (match_operand:SI 0 "register_operand" "=r,r,r") + (plus:SI (mult:SI (match_operand:SI 1 "register_operand" "%r,r,r") + (match_operand:SI 2 "nonmemory_operand" "rU06S0,i,*ri")) + (match_operand:SI 3 "nonmemory_operand" "accum,accum,*ri"))) + (clobber (reg:SI R58_REGNUM))] + "TARGET_SIMD" + "@ + mac\\t%0,%1,%2 + mac\\t%0,%1,%2 + #" + "&& reload_completed && (REGNO (operands[3]) != R58_REGNUM)" + [(set (reg:SI R58_REGNUM) (match_dup 3)) + (set (reg:SI R58_REGNUM) + (plus:SI (mult:SI (match_dup 1) (match_dup 2)) (reg:SI R58_REGNUM))) + (set (match_dup 0) (reg:SI R58_REGNUM))] + "" + [(set_attr "length" "4,8,8") + (set_attr "type" "mac")]) + +(define_insn "macsi0" + [(set (reg:SI R58_REGNUM) + (plus:SI (mult:SI (match_operand:SI 0 "register_operand" "%r,r") + (match_operand:SI 1 "nonmemory_operand" "rU06S0,i")) + (reg:SI R58_REGNUM)))] + "TARGET_SIMD" + "mac\\t0,%0,%1" + [(set_attr "length" "4,8") + (set_attr "type" "mac")]) + +;; Try to propagate first move into adjacent previous instructions +;; N.B. Probably we need to make a more complex step to take care of +;; this operation when we schedule +(define_peephole2 + [(set (match_operand:HI_SI 0 "register_operand" "") + (ARITH:HI_SI (match_operand:HI_SI 1 "register_operand" "") + (match_operand:HI_SI 2 "nonmemory_operand" ""))) + (set (reg:HI_SI R58_REGNUM) (match_dup 0))] + "peep2_reg_dead_p (2, operands[0])" + [(set (reg:HI_SI R58_REGNUM) (ARITH:HI_SI (match_dup 1) (match_dup 2)))]) + +(define_peephole2 + [(set (match_operand:SI 0 "register_operand" "") + (mult:SI (ANY_EXTEND:SI (match_operand:HI 1 "register_operand" "")) + (ANY_EXTEND:SI (match_operand:HI 2 "register_operand" "")))) + (set (reg:SI R58_REGNUM) (match_dup 0))] + "peep2_reg_dead_p (2, operands[0])" + [(set (reg:SI R58_REGNUM) + (mult:SI (ANY_EXTEND:SI (match_dup 1)) (ANY_EXTEND:SI (match_dup 2))))]) + +(define_peephole2 + [(parallel + [(set (match_operand:DI 0 "register_operand" "") + (mult:DI (ANY_EXTEND:DI (match_operand:SI 1 "register_operand" "")) + (ANY_EXTEND:DI (match_operand:SI 2 "register_operand" "")))) + (clobber (reg:DI R58_REGNUM))]) + (set (reg:DI R58_REGNUM) (match_dup 0))] + "peep2_reg_dead_p (2, operands[0])" + [(set (reg:DI R58_REGNUM) + (mult:DI (ANY_EXTEND:DI (match_dup 1)) (ANY_EXTEND:DI (match_dup 2))))]) + +(define_peephole2 + [(set (match_operand:SI 0 "register_operand" "") + (mult:SI (ANY_EXTEND:SI (match_operand:HI 1 "register_operand" "")) + (match_operand 2 "immediate_operand" ""))) + (set (reg:SI R58_REGNUM) (match_dup 0))] + "peep2_reg_dead_p (2, operands[0])" + [(set (reg:SI R58_REGNUM) + (mult:SI (ANY_EXTEND:SI (match_dup 1)) (match_dup 2)))]) + +;; Propagate r58 to arithmetic operation when dealing with zero extension +(define_peephole2 + [(set (match_operand:HI 0 "register_operand") + (ADDSUB:HI (match_operand:HI 1 "register_operand") + (match_operand:HI 2 "nonmemory_operand"))) + (set (reg:SI R58_REGNUM) (match_operand:SI 3 "register_operand"))] + "peep2_reg_dead_p (2, operands[0]) + && (REGNO (operands[3]) == REGNO (operands[0]))" + [(set (reg:HI R58_REGNUM) (ADDSUB:HI (match_dup 1) (match_dup 2)))]) + +(define_peephole2 + [(set (match_operand:SI 0 "register_operand" "") + (mult:SI (ANY_EXTEND:SI (match_operand:HI 1 "register_operand" "")) + (match_operand 2 "immediate_operand" ""))) + (set (reg:HI R58_REGNUM) (match_operand:HI 3 "register_operand"))] + "peep2_reg_dead_p (2, operands[0]) + && (REGNO (operands[3]) == REGNO (operands[0]))" + [(set (reg:SI R58_REGNUM) + (mult:SI (ANY_EXTEND:SI (match_dup 1)) (match_dup 2)))]) + +;; Another combiner pattern (observed in rgbyiq01) +(define_insn_and_split "dmpywhu" + [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI + (mult:SI (match_operand:SI 1 "register_operand" "r") + (match_operand 2 "unsign_immediate_operand" "i")) + (mult:SI (match_operand:SI 3 "register_operand" "r") + (match_operand 4 "unsign_immediate_operand" "i")))) + (clobber (reg:DI R58_REGNUM))] + "TARGET_SIMD && TARGET_64BIT" + "#" + "&& reload_completed" + [(parallel [(set (match_dup 0) + (unspec:SI [(match_dup 5) (match_dup 2)] + ARC64_UNSPEC_DMPYWHU)) + (clobber (reg:DI R58_REGNUM))])] + { + operands[5] = gen_lowpart (DImode, operands[0]); + emit_insn (gen_pack2silo (operands[5], operands[3], operands[1])); + operands[2] = GEN_INT ((INTVAL (operands[2]) << 16) + INTVAL (operands[4])); + } + [(set_attr "length" "8") + (set_attr "type" "dmpywh")]) + +(define_insn "dmpywhu0" + [(set (match_operand:SI 0 "register_operand" "=accum,r") + (unspec:SI [(match_operand:DI 1 "register_operand" "r,r") + (match_operand 2 "immediate_operand" "i,i")] + ARC64_UNSPEC_DMPYWHU)) + (clobber (reg:DI R58_REGNUM))] + "TARGET_SIMD && TARGET_64BIT" + "@ + dmpywhu\\t0,%1,%2@u32 + dmpywhu\\t%0,%1,%2@u32" + [(set_attr "length" "8") + (set_attr "type" "dmpywh")]) + +(define_insn_and_split "dmpywh" + [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI + (mult:SI (match_operand:SI 1 "register_operand" "r") + (match_operand 2 "short_immediate_operand" "i")) + (mult:SI (match_operand:SI 3"register_operand" "r") + (match_operand 4 "short_immediate_operand" "i")))) + (clobber (reg:DI R58_REGNUM))] + "TARGET_SIMD && TARGET_64BIT" + "#" + "&& reload_completed" + [(parallel [(set (match_dup 0) + (unspec:SI [(match_dup 5) (match_dup 2)] + ARC64_UNSPEC_DMPYWH)) + (clobber (reg:SI R58_REGNUM))])] + { + operands[5] = gen_lowpart (DImode, operands[0]); + emit_insn (gen_pack2silo (operands[5], operands[3], operands[1])); + operands[2] = GEN_INT ((INTVAL (operands[2]) << 16) + + (INTVAL (operands[4]) & 0xffff)); + } + [(set_attr "length" "8") + (set_attr "type" "dmpywh")]) + +(define_insn "dmpywh0" + [(set (match_operand:SI 2 "register_operand" "=accum,r") + (unspec:SI [(match_operand:DI 0 "register_operand" "r,r") + (match_operand 1 "immediate_operand" "i,i")] + ARC64_UNSPEC_DMPYWH)) + (clobber (reg:SI R58_REGNUM))] + "TARGET_SIMD && TARGET_64BIT" + "@ + dmpywh\\t0,%0,%1@u32 + dmpywh\\t%2,%0,%1@u32" + [(set_attr "length" "8") + (set_attr "type" "dmpywh")]) + +(define_insn "*mpywhu" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (mult:SI + (zero_extend:SI (match_operand:HI 1 "register_operand" "r,r")) + (match_operand:SI 2 "arc64_reg_or_unsig_operand" "r,i"))) + (clobber (reg:DI R58_REGNUM))] + "TARGET_SIMD && TARGET_64BIT" + "dmpywhu\\t%0,%2,%1" + [(set_attr "length" "4,8") + (set_attr "type" "dmpywh") + ]) + +(define_insn "*mpywh" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (mult:SI + (sign_extend:SI (match_operand:HI 1 "register_operand" "r,r")) + (match_operand:SI 2 "arc64_nonmem_operand" "r,i"))) + (clobber (reg:DI R58_REGNUM))] + "TARGET_SIMD && TARGET_64BIT" + "dmpywh\\t%0,%2,%1" + [(set_attr "length" "4,8") + (set_attr "type" "dmpywh")]) + +;; dmach combine pattern used to implement 16b MAC patterns. Extra +;; care needs to be taken when dealing with immediates which needs to +;; set the higher 16b to zero. I.e. we cannot use safely U6 or S12 +;; instruction variants. +(define_insn_and_split "dmach" + [(set (match_operand:HI 0 "register_operand" "=r,r,r") + (plus:HI + (mult:HI (match_operand:HI 1 "register_operand" "%r,r,r") + (match_operand:HI 2 "nonmemory_operand" "r,i,*ri")) + (match_operand:HI 3 "nonmemory_operand" "accum,accum,*ri"))) + (clobber (reg:DI R58_REGNUM))] + "TARGET_SIMD" + "@ + dmach\\t%0,%1,%2 + dmach\\t%0,%1,%V2@u32 + #" + "&& reload_completed + && (CONST_INT_P (operands[3]) || (REGNO (operands[3]) != R58_REGNUM))" + [(set (reg:HI R58_REGNUM) (match_dup 3)) + (set (reg:HI R58_REGNUM) + (plus:HI (mult:HI (match_dup 1) (match_dup 2)) (reg:HI R58_REGNUM))) + (set (match_dup 0) (reg:HI R58_REGNUM))] + "" + [(set_attr "length" "4,8,8") + (set_attr "type" "mac")]) + +(define_insn "dmach0" + [(set (reg:HI R58_REGNUM) + (plus:HI (mult:HI (match_operand:HI 0 "register_operand" "%r,r") + (match_operand:HI 1 "nonmemory_operand" "r,i")) + (reg:HI R58_REGNUM)))] + "TARGET_SIMD" + "@ + dmach\\t0,%0,%1 + dmach\\t0,%0,%V1@u32" + [(set_attr "length" "4,8") + (set_attr "type" "mac")]) + +;; macwh combine pattern +;; FIXME! maybe we shoudl use r58 as intermediate result holder to +;; enable linking (back-to-back) with other MAC instructions, but I +;; haven't seen any example. +(define_insn_and_split "dmacwh" + [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI + (plus:SI + (mult:SI (match_operand:SI 1 "register_operand" "r") + (match_operand 2 "short_immediate_operand" "i")) + (mult:SI (match_operand:SI 3 "register_operand" "r") + (match_operand 4 "short_immediate_operand" "i"))) + (match_operand:SI 5 "nonmemory_operand" "ri"))) + (clobber (reg:DI R58_REGNUM))] + "TARGET_SIMD && TARGET_64BIT" + "#" + "&& reload_completed" + [(set (match_dup 0) + (unspec:SI [(match_dup 6) (match_dup 2) (reg:SI R58_REGNUM)] + ARC64_UNSPEC_DMACWH))] + { + emit_move_insn (gen_rtx_REG (SImode, R58_REGNUM), operands[5]); + operands[6] = gen_lowpart (DImode, operands[0]); + emit_insn (gen_pack2silo (operands[6], operands[3], operands[1])); + operands[2] = GEN_INT ((INTVAL (operands[2]) << 16) + + (INTVAL (operands[4]) & 0xffff)); + } + [(set_attr "length" "8") + (set_attr "type" "mac")]) + +(define_insn "pack2silo" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "register_operand" "r")] + ARC64_UNSPEC_VPACK2WL))] + "TARGET_SIMD && TARGET_64BIT" + "vpack2wl\\t%0,%1,%2" + [(set_attr "length" "4") + (set_attr "type" "vpack")]) + +(define_insn "dmacwh0" + [(set (match_operand:SI 0 "register_operand" "=accum,r") + (unspec:SI [(match_operand:DI 1 "register_operand" "r,r") + (match_operand 2 "immediate_operand" "i,i") + (reg:SI R58_REGNUM)] + ARC64_UNSPEC_DMACWH))] + "TARGET_SIMD" + "@ + dmacwh\\t0,%1,%2@u32 + dmacwh\\t%0,%1,%2@u32" + [(set_attr "length" "8") + (set_attr "type" "mac")]) + +;; FIXME! maybe we shoudl use r58 as intermediate result holder to +;; enable linking (back-to-back) with other MAC instructions, but I +;; haven't seen any example. +(define_insn_and_split "dmacwhu" + [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI + (plus:SI + (mult:SI (match_operand:SI 1 "register_operand" "r") + (match_operand 2 "unsign_immediate_operand" "i")) + (mult:SI (match_operand:SI 3 "register_operand" "r") + (match_operand 4 "unsign_immediate_operand" "i"))) + (match_operand:SI 5 "nonmemory_operand" "ri"))) + (clobber (reg:DI R58_REGNUM))] + "TARGET_SIMD && TARGET_64BIT" + "#" + "&& reload_completed" + [(set (match_dup 0) + (unspec:SI [(match_dup 6) (match_dup 2) (reg:SI R58_REGNUM)] + ARC64_UNSPEC_DMACWHU))] + { + emit_move_insn (gen_rtx_REG (SImode, R58_REGNUM), operands[5]); + operands[6] = gen_lowpart (DImode, operands[0]); + emit_insn (gen_pack2silo (operands[6], operands[3], operands[1])); + operands[2] = GEN_INT ((INTVAL (operands[2]) << 16) + INTVAL (operands[4])); + } + [(set_attr "length" "8") + (set_attr "type" "mac")]) + +(define_insn "dmacwhu0" + [(set (match_operand:SI 0 "register_operand" "=accum,r") + (unspec:SI [(match_operand:DI 1 "register_operand" "r,r") + (match_operand 2 "immediate_operand" "i,i") + (reg:SI R58_REGNUM)] + ARC64_UNSPEC_DMACWHU))] + "TARGET_SIMD" + "@ + dmacwhu\\t0,%1,%2@u32 + dmacwhu\\t%0,%1,%2@u32" + [(set_attr "length" "8") + (set_attr "type" "mac")]) + +(define_insn "*vpack2hl_scalar" + [(set (match_operand:SI 0 "register_operand" "=r") + (ior:SI + (and:SI (match_operand:SI 1 "register_operand" "r") + (const_int 65535)) + (ashift:SI (match_operand:SI 2 "register_operand" "r") + (const_int 16))))] + "TARGET_SIMD" + "vpack2hl\\t%0,%2,%1" + [(set_attr "type" "vpack") + (set_attr "length" "4")]) + +(define_insn "*vpack2wl_scalar" + [(set (match_operand:DI 0 "register_operand" "=r") + (ior:DI + (ashift:DI (match_operand:DI 1 "register_operand" "r") + (const_int 32)) + (zero_extend:DI (match_operand:SI 2 "register_operand" "r"))))] + "TARGET_SIMD && TARGET_64BIT" + "vpack2wl\\t%0,%2,%1" + [(set_attr "type" "vpack") + (set_attr "length" "4")]) + +;; ------------------------------------------------------------------- +;; Integer SIMD instructions +;; ------------------------------------------------------------------- + +(define_expand "mov" + [(set (match_operand:VALL 0 "nonimmediate_operand") + (match_operand:VALL 1 "general_operand"))] + "TARGET_SIMD" + " + if (arc64_prepare_move_operands (operands[0], operands[1], mode)) + DONE; + ") + +(define_expand "movmisalign" + [(set (match_operand:VALL 0 "nonimmediate_operand") + (match_operand:VALL 1 "general_operand"))] + "TARGET_SIMD && !STRICT_ALIGNMENT" + " + if (arc64_prepare_move_operands (operands[0], operands[1], mode)) + DONE; + ") + +(define_insn "*mov_insn" + [(set (match_operand:VALL 0 "arc64_dest_operand" "=r,r,Ustor") + (match_operand:VALL 1 "nonimmediate_operand" "r,m,r"))] + "TARGET_SIMD && TARGET_64BIT + && (register_operand (operands[0], mode) + || register_operand (operands[1], mode))" + "@ + mov\\t%0,%1 + ld%U1\\t%0,%1 + st%U0\\t%1,%0" + [(set_attr "type" "move,ld,st")]) + +(define_insn "arc64_vpack_v4hihi" + [(set (match_operand:V4HI 0 "register_operand" "=r") + (unspec:V4HI [(match_operand:HI 1 "register_operand" "r") + (match_operand:HI 2 "register_operand" "r")] + ARC64_UNSPEC_VPACK4HL))] + "TARGET_SIMD && TARGET_64BIT" + "vpack4hl\\t%0,%1,%2" + [(set_attr "length" "4") + (set_attr "type" "vpack")]) + +(define_insn "arc64_vpack_v2sisi" + [(set (match_operand:V2SI 0 "register_operand" "=r, r,r,r") + (vec_concat:V2SI + (match_operand:SI 1 "register_operand" " r, 0,r,r") + (match_operand:SI 2 "nonmemory_operand" "U06S0,S12S0,r,S32S0")))] + "TARGET_SIMD && TARGET_64BIT" + "vpack2wl\\t%0,%1,%2" + [(set_attr "length" "4,4,4,8") + (set_attr "type" "vpack")]) + +(define_expand "vec_init" + [(match_operand:V64I 0 "register_operand") + (match_operand 1 "")] + "TARGET_SIMD && TARGET_64BIT" + { + arc64_expand_vector_init (operands[0], operands[1]); + DONE; + }) + +(define_insn "3" + [(set (match_operand:VALL 0 "register_operand" "=r") + (ADDSUB:VALL (match_operand:VALL 1 "register_operand" "r") + (match_operand:VALL 2 "register_operand" "r")))] + "TARGET_SIMD" + "v\\t%0,%1,%2" + [(set_attr "length" "4") + (set_attr "type" "v")]) + +;; Add with duplicate input. +(define_insn "*add3_dup" + [(set (match_operand:VALL 0 "register_operand" "=r,r") + (plus:VALL + (vec_duplicate:VALL + (match_operand 1 "vectdup_immediate_operand" "S06S0,S12S0")) + (match_operand:VALL 2 "register_operand" "r,0")))] + "TARGET_SIMD" + "vadd\\t%0,%2,%1" + [(set_attr "length" "4") + (set_attr "type" "vadd")]) + +(define_insn "neg2" + [(set (match_operand:VALL 0 "register_operand" "=r") + (neg:VALL (match_operand:VALL 1 "register_operand" "r")))] + "TARGET_SIMD" + "vsub\\t%0,0,%1" + [(set_attr "length" "8") + (set_attr "type" "vsub")]) + +(define_expand "vec_widen_mult_lo_v4hi" + [(match_operand:V2SI 0 "register_operand") + (ANY_EXTEND:V2SI (match_operand:V4HI 1 "register_operand")) + (ANY_EXTEND:V2SI (match_operand:V4HI 2 "register_operand"))] + "TARGET_SIMD" + { + emit_insn (gen_arc64_vmpy2h (operands[0], + operands[1], + operands[2])); + DONE; + }) + +(define_expand "vec_widen_mult_hi_v4hi" + [(match_operand:V2SI 0 "register_operand") + (ANY_EXTEND:V2SI (match_operand:V4HI 1 "register_operand")) + (ANY_EXTEND:V2SI (match_operand:V4HI 2 "register_operand"))] + "TARGET_SIMD" + { + rtx tmp1; + rtx tmp2; + if (TARGET_64BIT) + { + tmp1 = gen_reg_rtx (V4HImode); + tmp2 = gen_reg_rtx (V4HImode); + emit_insn (gen_arc64_swapl (tmp1, operands[1])); + emit_insn (gen_arc64_swapl (tmp2, operands[2])); + emit_insn (gen_arc64_vmpy2h (operands[0], tmp1, tmp2)); + } + else + { + tmp1 = operands[1]; + tmp2 = operands[2]; + emit_insn (gen_arc32_vmpy2h_hi (operands[0], tmp1, tmp2)); + } + DONE; + }) + + (define_insn "arc64_vmpy2h" + [(set (match_operand:V2SI 0 "register_operand" "=r") + (mult:V2SI + (ANY_EXTEND:V2SI + (vec_select:V2HI + (match_operand:V4HI 1 "register_operand" "r") + (parallel [(const_int 0) (const_int 1)]))) + (ANY_EXTEND:V2SI + (vec_select:V2HI + (match_operand:V4HI 2 "register_operand" "r") + (parallel [(const_int 0) (const_int 1)]))))) + (clobber (reg:V2SI R58_REGNUM))] + "TARGET_SIMD" + "vmpy2h\\t%0,%1,%2" + [(set_attr "length" "4") + (set_attr "type" "vmpy2h")]) + +(define_insn "arc64_swapl" + [(set (match_operand:V4HI 0 "register_operand" "=r") + (vec_concat:V4HI + (vec_select:V2HI (match_operand:V4HI 1 "register_operand" "r") + (parallel [(const_int 2) (const_int 3)])) + (vec_select:V2HI (match_dup 1) (parallel [(const_int 0) (const_int 1)]))))] + "TARGET_SIMD && TARGET_64BIT" + "swapl\\t%0,%1" + [(set_attr "length" "4") + (set_attr "type" "swapl")]) + +(define_expand "dot_prodv4hi" + [(match_operand:V2SI 0 "register_operand") + (ANY_EXTEND:V2SI (match_operand:V4HI 1 "register_operand")) + (ANY_EXTEND:V2SI (match_operand:V4HI 2 "register_operand")) + (match_operand:V2SI 3 "register_operand")] + "TARGET_SIMD" +{ + rtx acc_reg = gen_rtx_REG (V2SImode, R58_REGNUM); + + emit_move_insn (acc_reg, operands[3]); + emit_insn (gen_arc64_vmach_zero (operands[1], operands[2])); + if (TARGET_64BIT) + { + rtx op1_high = gen_reg_rtx (V4HImode); + rtx op2_high = gen_reg_rtx (V4HImode); + emit_insn (gen_arc64_swapl (op1_high, operands[1])); + emit_insn (gen_arc64_swapl (op2_high, operands[2])); + emit_insn (gen_arc64_vmach (operands[0], op1_high, op2_high)); + } + else + { + emit_insn (gen_arc32_vmach_hi (operands[0], operands[1], operands[2])); + } + DONE; +}) + +(define_insn "arc64_vmach" + [(set (match_operand:V2SI 0 "register_operand" "=r") + (plus:V2SI + (mult:V2SI + (ANY_EXTEND:V2SI + (vec_select:V2HI (match_operand:V4HI 1 "register_operand" "r") + (parallel [(const_int 0) (const_int 1)]))) + (ANY_EXTEND:V2SI + (vec_select:V2HI (match_operand:V4HI 2 "register_operand" "r") + (parallel [(const_int 0) (const_int 1)])))) + (reg:V2SI R58_REGNUM))) + (clobber (reg:V2SI R58_REGNUM))] + "TARGET_SIMD" + "vmac2h%?\\t%0,%1,%2" + [(set_attr "length" "4") + (set_attr "type" "vmac2h")]) + +(define_insn "arc64_vmach_zero" + [(set (reg:V2SI R58_REGNUM) + (plus:V2SI + (mult:V2SI + (ANY_EXTEND:V2SI + (vec_select:V2HI (match_operand:V4HI 0 "register_operand" "r") + (parallel [(const_int 0) (const_int 1)]))) + (ANY_EXTEND:V2SI + (vec_select:V2HI (match_operand:V4HI 1 "register_operand" "r") + (parallel [(const_int 0) (const_int 1)])))) + (reg:V2SI R58_REGNUM)))] + "TARGET_SIMD" + "vmac2h%?\\t0,%0,%1" + [(set_attr "length" "4") + (set_attr "type" "vmac2h")]) + +;; FIXME! for v2hi -> dmpyh +(define_expand "reduc_plus_scal_v4hi" + [(parallel + [(set (match_operand:HI 0 "register_operand" "=r") + (unspec:HI [(match_operand:V4HI 1 "register_operand" "r")] + ARC64_UNSPEC_QMPYH)) + (clobber (reg:DI R58_REGNUM))])] + "TARGET_SIMD" + "") + +(define_insn_and_split "*reduc_v4hi" + [(set (match_operand:HI 0 "register_operand" "=accum,r") + (unspec:HI [(match_operand:V4HI 1 "register_operand" "r,r")] + ARC64_UNSPEC_QMPYH)) + (clobber (reg:DI R58_REGNUM))] + "TARGET_SIMD" + "qmpyh\\t%0,%1,1" + "&& reload_completed && !TARGET_64BIT && (REGNO (operands[0]) != R58_REGNUM)" + [(parallel + [(set (reg:HI R58_REGNUM) + (unspec:HI [(match_dup 1)] ARC64_UNSPEC_QMPYH)) + (clobber (reg:DI R58_REGNUM))]) + (set (match_dup 0) (reg:HI R58_REGNUM))] + "" + [(set_attr "length" "8,4") + (set_attr "type" "qmpyh")]) + +(define_insn "reduc_plus_scal_v2si" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand:V2SI 1 "register_operand" "r")] + ARC64_UNSPEC_DMPYWH)) + (clobber (reg:DI R58_REGNUM))] + "TARGET_SIMD && TARGET_64BIT" + "dmpywh\\t%0,%1,1" + [(set_attr "length" "4") + (set_attr "type" "dmpywh")]) + +;; FIXME! for v2hi -> dmach +(define_expand "fold_left_plus_v4hi" + [(set (match_operand:HI 0 "register_operand") + (unspec:HI [(match_operand:HI 1 "register_operand") + (match_operand:V4HI 2 "register_operand")] + ARC64_UNSPEC_QMACH)) + (clobber (reg:DI R58_REGNUM))] + "TARGET_SIMD" + { + rtx acc_reg = gen_rtx_REG (HImode, R58_REGNUM); + emit_move_insn (acc_reg, operands[1]); + operands[1] = acc_reg; + }) + +(define_insn "*qmach" + [(set (match_operand:HI 0 "register_operand" "=r") + (unspec:HI [(reg:HI R58_REGNUM) + (match_operand:V4HI 1 "register_operand" "r")] + ARC64_UNSPEC_QMACH)) + (clobber (reg:DI R58_REGNUM))] + "TARGET_SIMD" + "qmach\\t%0,%1,1" + [(set_attr "length" "4") + (set_attr "type" "qmach")]) + +(define_expand "mulv2hi3" + [(set (match_operand:V2HI 0 "register_operand") + (mult:V2HI (match_operand:V2HI 1 "register_operand") + (match_operand:V2HI 2 "register_operand")))] + "TARGET_SIMD && TARGET_64BIT" + { + rtx tmp = gen_reg_rtx (V2SImode); + emit_insn (gen_arc64_svmpy2h_lo (tmp, operands[1], operands[2])); + emit_insn (gen_arc64_packv2hi_lo (operands[0], tmp)); + DONE; + }) + +(define_insn "arc64_packv2hi_lo" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (unspec:V2HI [(match_operand:V2SI 1 "register_operand" "r") + (const_int 0)] + ARC64_UNSPEC_VPACK4HL))] + "TARGET_SIMD && TARGET_64BIT" + "vpack4hl\\t%0,%1,0" + [(set_attr "length" "4") + (set_attr "type" "vpack")]) + +(define_expand "mulv2hi3_highpart" + [(match_operand:V2HI 0 "register_operand") + (ANY_EXTEND:SI (match_operand:V2HI 1 "register_operand")) + (ANY_EXTEND:SI (match_operand:V2HI 2 "register_operand"))] + "TARGET_SIMD && TARGET_64BIT" + { + rtx tmp = gen_reg_rtx (V2SImode); + emit_insn (gen_arc64_vmpy2h_lo (tmp, operands[1], operands[2])); + emit_insn (gen_arc64_packv2hi_hi (operands[0], tmp)); + DONE; + }) + +(define_insn "arc64_packv2hi_hi" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (unspec:V2HI [(match_operand:V2SI 1 "register_operand" "r") + (const_int 1)] + ARC64_UNSPEC_VPACK4HM))] + "TARGET_SIMD && TARGET_64BIT" + "vpack4hm\\t%0,%1,0" + [(set_attr "length" "4") + (set_attr "type" "vpack")]) + + (define_insn "arc64_vmpy2h_lo" + [(set (match_operand:V2SI 0 "register_operand" "=r") + (mult:V2SI + (ANY_EXTEND:V2SI + (match_operand:V2HI 1 "register_operand" "r")) + (ANY_EXTEND:V2SI + (match_operand:V2HI 2 "register_operand" "r")))) + (clobber (reg:V2SI R58_REGNUM))] + "TARGET_SIMD" + "vmpy2h\\t%0,%1,%2" + [(set_attr "length" "4") + (set_attr "type" "vmpy2h")]) + +(define_expand "mulv4hi3" + [(match_operand:V4HI 0 "register_operand") + (match_operand:V4HI 1 "register_operand") + (match_operand:V4HI 2 "register_operand")] + "TARGET_SIMD && TARGET_64BIT" + { + rtx tmpA = gen_reg_rtx (V2SImode); + rtx tmpB = gen_reg_rtx (V2SImode); + rtx tmp1 = gen_reg_rtx (V4HImode); + rtx tmp2 = gen_reg_rtx (V4HImode); + + emit_insn (gen_arc64_swapl (tmp1, operands[1])); + emit_insn (gen_arc64_swapl (tmp2, operands[2])); + emit_insn (gen_arc64_svmpy2h (tmpA, operands[1], operands[2])); + emit_insn (gen_arc64_svmpy2h (tmpB, tmp1, tmp2)); + emit_insn (gen_arc64_pack4hi (operands[0], tmpA, tmpB)); + DONE; + }) + +(define_insn "arc64_pack4hi" + [(set (match_operand:V4HI 0 "register_operand" "=r") + (vec_concat:V4HI + (truncate:V2HI + (match_operand:V2SI 1 "register_operand" "r")) + (truncate:V2HI + (match_operand:V2SI 2 "register_operand" "r"))))] + "TARGET_SIMD && TARGET_64BIT" + "vpack4hl\\t%0,%1,%2" + [(set_attr "length" "4") + (set_attr "type" "vpack")]) + +(define_insn "bswap2" + [(set (match_operand:VALL 0 "register_operand" "=r") + (bswap:VALL (match_operand:VALL 1 "register_operand" "r")))] + "TARGET_SIMD" + "swape\\t%0,%1" + [(set_attr "length" "4") + (set_attr "type" "swap")]) + +(define_insn "vec_extract" + [(set (match_operand: 0 "register_operand" "=r") + (vec_select: (match_operand:VALL 1 "register_operand" "r") + (parallel [(match_operand:SI 2 "const_int_operand" "n")])))] + "TARGET_SIMD" + { + HOST_WIDE_INT elem = INTVAL (operands[2]); + gcc_assert (elem < 4); + elem = ((( - 1) & ) << ) + | ((elem * ) & ); + operands[2] = GEN_INT (elem); + return "xbfu\\t%0,%1,%2"; + } + [(set_attr "length" "8") + (set_attr "type" "xbfu")]) + +;; Alternative +;; emit_insn (gen_arc64_swap (tmpA, operands[1])); swap tmpA op1 +;; emit_insn (gen_arc64_sel_lo (tmpB, operands[1])); bmask tmpB,15 +;; emit_insn (gen_arc64_pack2si (operands[0], tmpB, tmpA)); vpack4hl op0,tmpB,tmpA +(define_expand "vec_unpacku_lo_v4hi" + [(set (match_operand:V2SI 0 "register_operand") + (zero_extend:V2SI + (vec_select:V2HI + (match_operand:V4HI 1 "register_operand") + (parallel [(const_int 0)(const_int 1)]))))] + "TARGET_SIMD && TARGET_64BIT" + { + rtx tmpA = gen_reg_rtx (HImode); + rtx tmpB = gen_reg_rtx (HImode); + + emit_insn (gen_vec_extractv4hi (tmpA, operands[1], GEN_INT (0))); + emit_insn (gen_vec_extractv4hi (tmpB, operands[1], GEN_INT (1))); + emit_insn (gen_arc64_vec_concat (operands[0], tmpA, tmpB)); + DONE; + }) + +;; Alternative +;; emit_insn (gen_arc64_swapl (tmp0, operands[1])); +;; emit_insn (gen_arc64_swap (tmpA, tmp0)); +;; emit_insn (gen_arc64_sel_lo (tmpB, tmp0)); +;; emit_insn (gen_arc64_pack2si (operands[0], tmpB, tmpA)); +(define_expand "vec_unpacku_hi_v4hi" + [(set (match_operand:V2SI 0 "register_operand") + (zero_extend:V2SI + (vec_select:V2HI + (match_operand:V4HI 1 "register_operand") + (parallel [(const_int 2)(const_int 3)]))))] + "TARGET_SIMD && TARGET_64BIT" + { + rtx tmpA = gen_reg_rtx (HImode); + rtx tmpB = gen_reg_rtx (HImode); + + emit_insn (gen_vec_extractv4hi (tmpA, operands[1], GEN_INT (2))); + emit_insn (gen_vec_extractv4hi (tmpB, operands[1], GEN_INT (3))); + emit_insn (gen_arc64_vec_concat (operands[0], tmpA, tmpB)); + DONE; + }) + +(define_insn "arc64_vec_concat" + [(set (match_operand:V2SI 0 "register_operand" "=r") + (unspec:V2SI [(match_operand:HI 1 "register_operand" "r") + (match_operand:HI 2 "register_operand" "r")] + ARC64_UNSPEC_VPACK2WL))] + "TARGET_SIMD && TARGET_64BIT" + "vpack2wl\\t%0,%1,%2" + [(set_attr "length" "4") + (set_attr "type" "vpack")]) + +(define_expand "vec_duplicatev4hi" + [(set (match_operand:V4HI 0 "register_operand") + (vec_duplicate:V4HI (match_operand:HI 1 "register_operand")))] + "TARGET_SIMD && TARGET_64BIT" + { + rtx tmp = gen_reg_rtx (V2SImode); + emit_insn (gen_arc64_duplicate_v2hi(tmp, operands[1])); + emit_insn (gen_arc64_pack4hi(operands[0], tmp, tmp)); + DONE; + }) + +(define_insn "arc64_duplicate_v2hi" + [(set (match_operand:V2SI 0 "register_operand" "=r") + (unspec:V2SI [(match_operand:HI 1 "register_operand" "r") + (const_int 0)] + ARC64_UNSPEC_VPACK4HL))] + "TARGET_SIMD && TARGET_64BIT" + "vpack4hl\\t%0,%1,%1" + [(set_attr "length" "4") + (set_attr "type" "vpack")]) + +(define_insn "vec_duplicatev2si" + [(set (match_operand:V2SI 0 "register_operand" "=r") + (vec_duplicate:V2SI (match_operand:SI 1 "register_operand" "r")))] + "TARGET_SIMD && TARGET_64BIT" + "vpack2wl\\t%0,%1,%1" + [(set_attr "length" "4") + (set_attr "type" "vpack")]) + +(define_insn "vec_shr_" + [(set (match_operand:V64I 0 "register_operand" "=r,r") + (unspec:V64I [(match_operand:V64I 1 "register_operand" "0,r") + (match_operand:SI 2 "immediate_operand" "S12S0,i")] + ARC64_UNSPEC_VEC_SHR))] + "TARGET_SIMD && TARGET_64BIT" + "asrl\\t%0,%1,%2" + [(set_attr "length" "4,8") + (set_attr "type" "asl")]) + +(define_insn "vec_shl_" + [(set (match_operand:V64I 0 "register_operand" "=r,r") + (unspec:V64I [(match_operand:V64I 1 "register_operand" "0,r") + (match_operand:SI 2 "immediate_operand" "S12S0,i")] + ARC64_UNSPEC_VEC_SHL))] + "TARGET_SIMD && TARGET_64BIT" + "asll\\t%0,%1,%2" + [(set_attr "length" "4,8") + (set_attr "type" "asl")]) + +;; Patterns used by vect_perm +(define_insn "arc64_dup_lane0v2si" + [(set (match_operand:V2SI 0 "register_operand" "=r") + (vec_duplicate:V2SI + (vec_select:SI + (match_operand:V2SI 1 "register_operand" "r") + (parallel [(const_int 0)]) + )))] + "TARGET_SIMD && TARGET_64BIT" + "vpack2wl\\t%0,%1,%1" + [(set_attr "length" "4") + (set_attr "type" "vpack")]) + +(define_insn "arc64_dup_lane1v2si" + [(set (match_operand:V2SI 0 "register_operand" "=r") + (vec_duplicate:V2SI + (vec_select:SI + (match_operand:V2SI 1 "register_operand" "r") + (parallel [(const_int 1)]) + )))] + "TARGET_SIMD && TARGET_64BIT" + "vpack2wm\\t%0,%1,%1" + [(set_attr "length" "4") + (set_attr "type" "vpack")]) + +(define_insn "arc64_sel_lane0_v2si" + [(set (match_operand:V2SI 0 "register_operand" "=r") + (vec_concat:V2SI + (vec_select:SI + (match_operand:V2SI 1 "register_operand" "r") + (parallel [(const_int 0)])) + (vec_select:SI + (match_operand:V2SI 2 "register_operand" "r") + (parallel [(const_int 0)])) + ))] + "TARGET_SIMD && TARGET_64BIT" + "vpack2wl\\t%0,%1,%2" + [(set_attr "length" "4") + (set_attr "type" "vpack")]) + +(define_insn "arc64_sel_lane1_v2si" + [(set (match_operand:V2SI 0 "register_operand" "=r") + (vec_concat:V2SI + (vec_select:SI + (match_operand:V2SI 1 "register_operand" "r") + (parallel [(const_int 1)])) + (vec_select:SI + (match_operand:V2SI 2 "register_operand" "r") + (parallel [(const_int 1)])) + ))] + "TARGET_SIMD && TARGET_64BIT" + "vpack2wm\\t%0,%1,%2" + [(set_attr "length" "4") + (set_attr "type" "vpack")]) + +(define_insn "arc64_sel_lane2_0v4hi" + [(set (match_operand:V4HI 0 "register_operand" "=r") + (vec_concat:V4HI + (vec_select:V2HI + (match_operand:V4HI 1 "register_operand" "r") + (parallel [(const_int 0) (const_int 2)])) + (vec_select:V2HI + (match_operand:V4HI 2 "register_operand" "r") + (parallel [(const_int 0) (const_int 2)]))))] + "TARGET_SIMD && TARGET_64BIT" + "vpack4hl\\t%0,%1,%2" + [(set_attr "length" "4") + (set_attr "type" "vpack")]) + +(define_insn "arc64_sel_lane3_1v4hi" + [(set (match_operand:V4HI 0 "register_operand" "=r") + (vec_concat:V4HI + (vec_select:V2HI + (match_operand:V4HI 1 "register_operand" "r") + (parallel [(const_int 1) (const_int 3)])) + (vec_select:V2HI + (match_operand:V4HI 2 "register_operand" "r") + (parallel [(const_int 1) (const_int 3)]))))] + "TARGET_SIMD && TARGET_64BIT" + "vpack4hm\\t%0,%1,%2" + [(set_attr "length" "4") + (set_attr "type" "vpack")]) + +(define_insn "arc64_swaplv2si" + [(set (match_operand:V2SI 0 "register_operand" "=r") + (vec_concat:V2SI + (vec_select:SI + (match_operand:V2SI 1 "register_operand" "r") + (parallel [(const_int 1)])) + (vec_select:SI + (match_dup 1) + (parallel [(const_int 0)]))))] + "TARGET_64BIT" + "swapl\\t%0,%1" + [(set_attr "length" "4") + (set_attr "type" "swapl")]) + +(define_insn "arc64_swapv4hi" + [(set (match_operand:V4HI 0 "register_operand" "=r") + (unspec:V4HI + [(match_operand:V4HI 1 "register_operand" "r")] + ARC64_UNSPEC_SWAP))] + "TARGET_64BIT" + "swap\\t%0,%1" + [(set_attr "length" "4") + (set_attr "type" "swap")]) + +(define_insn "arc64_swapv2hi" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (unspec:V2HI + [(match_operand:V2HI 1 "register_operand" "r")] + ARC64_UNSPEC_SWAP))] + "" + "swap\\t%0,%1" + [(set_attr "length" "4") + (set_attr "type" "swap")]) + +(define_insn "arc64_swp_lane0_v4hi" + [(set (match_operand:V4HI 0 "register_operand" "=r") + (vec_concat:V4HI + (vec_select:V2HI + (match_operand:V4HI 1 "register_operand" "r") + (parallel [(const_int 0) (const_int 1)])) + (vec_select:V2HI + (match_operand:V4HI 2 "register_operand" "r") + (parallel [(const_int 0) (const_int 1)])) + ))] + "TARGET_SIMD && TARGET_64BIT" + "vpack2wl\\t%0,%1,%2" + [(set_attr "length" "4") + (set_attr "type" "vpack")]) + +(define_insn "arc64_swp_lane1_v4hi" + [(set (match_operand:V4HI 0 "register_operand" "=r") + (vec_concat:V4HI + (vec_select:V2HI + (match_operand:V4HI 1 "register_operand" "r") + (parallel [(const_int 2) (const_int 3)])) + (vec_select:V2HI + (match_operand:V4HI 2 "register_operand" "r") + (parallel [(const_int 2) (const_int 3)])) + ))] + "TARGET_SIMD && TARGET_64BIT" + "vpack2wm\\t%0,%1,%2" + [(set_attr "length" "4") + (set_attr "type" "vpack")]) + +(define_insn "*arc64_vsubadd3" + [(set (match_operand:VALL 0 "register_operand" "=r") + (unspec:VALL [(match_operand:VALL 1 "register_operand" "r") + (match_operand:VALL 2 "register_operand" "r")] + ARC64_UNSPEC_VSUBADD))] + "TARGET_SIMD" + "vsubadd\\t%0,%1,%2" + [(set_attr "length" "4") + (set_attr "type" "vaddsub")]) + +;; In 64b arches, we miss a shuffle pattern that swaps 16b pairs in a +;; 64b reg. In 32b arches, we miss a quick way to exchange 2 32b +;; regs. Hence, no support for v4hi. +(define_expand "cadd90v2si3" + [(set (match_operand:V2SI 0 "register_operand") + (unspec:V2SI [(match_operand:V2SI 1 "register_operand") + (match_operand:V2SI 2 "register_operand")] + ARC64_UNSPEC_VSUBADD))] + "TARGET_SIMD && TARGET_64BIT" + { + rtx tmp = gen_reg_rtx (V2SImode); + + emit_move_insn (tmp, gen_rtx_UNSPEC (V2SImode, + gen_rtvec (1, operands[2]), + ARC64_UNSPEC_SWAPL)); + operands[2] = tmp; + }) + +(define_expand "cadd90v2hi3" + [(set (match_operand:V2HI 0 "register_operand") + (unspec:V2HI [(match_operand:V2HI 1 "register_operand") + (match_operand:V2HI 2 "register_operand")] + ARC64_UNSPEC_VSUBADD))] + "TARGET_SIMD && TARGET_64BIT" + { + rtx tmp = gen_reg_rtx (V2HImode); + + emit_move_insn (tmp, gen_rtx_UNSPEC (V2HImode, + gen_rtvec (1, operands[2]), + ARC64_UNSPEC_SWAP)); + operands[2] = tmp; + }) + +(define_insn "*arc64_vaddsub3" + [(set (match_operand:VALL 0 "register_operand" "=r") + (unspec:VALL [(match_operand:VALL 1 "register_operand" "r") + (match_operand:VALL 2 "register_operand" "r")] + ARC64_UNSPEC_VADDSUB))] + "TARGET_SIMD" + "vaddsub\\t%0,%1,%2" + [(set_attr "length" "4") + (set_attr "type" "vaddsub")]) + +;; In 64b arches, we miss a shuffle pattern that swaps 16b pairs in a +;; 64b reg. In 32b arches, we miss a quick way to exchange 2 32b +;; regs. Hence, no support for v4hi. +(define_expand "cadd270v2si3" + [(set (match_operand:V2SI 0 "register_operand") + (unspec:V2SI [(match_operand:V2SI 1 "register_operand") + (match_operand:V2SI 2 "register_operand")] + ARC64_UNSPEC_VADDSUB))] + "TARGET_SIMD && TARGET_64BIT" + { + rtx tmp = gen_reg_rtx (V2SImode); + + emit_move_insn (tmp, gen_rtx_UNSPEC (V2SImode, + gen_rtvec (1, operands[2]), + ARC64_UNSPEC_SWAPL)); + operands[2] = tmp; + }) + +(define_expand "cadd270v2hi3" + [(set (match_operand:V2HI 0 "register_operand") + (unspec:V2HI [(match_operand:V2HI 1 "register_operand") + (match_operand:V2HI 2 "register_operand")] + ARC64_UNSPEC_VADDSUB))] + "TARGET_SIMD" + { + rtx tmp = gen_reg_rtx (V2HImode); + + emit_move_insn (tmp, gen_rtx_UNSPEC (V2HImode, + gen_rtvec (1, operands[2]), + ARC64_UNSPEC_SWAP)); + operands[2] = tmp; + }) + +;; Conversions. +(define_insn "arc64_truncate_lo_v2hi" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (truncate:V2HI (match_operand:V2SI 1 "register_operand" "r")))] + "TARGET_SIMD && !TARGET_64BIT" + "vpack2hl\\t%0,%H1,%L1" + [(set_attr "type" "vpack") + (set_attr "length" "4")]) + +(define_insn "arc64_truncate_hi_v4hi" + [(set (match_operand:V4HI 0 "register_operand" "=r") + (vec_concat:V4HI + (match_operand:V2HI 1 "register_operand" "0") + (truncate:V2HI (match_operand:V2SI 2 "register_operand" "r"))))] + "TARGET_SIMD && !TARGET_64BIT" + "vpack2hl\\t%H0,%H2,%L2" + [(set_attr "type" "vpack") + (set_attr "length" "4")]) + +;; Vector Pack: +;; 32bit: +;; vpack2hl RTl, RAh, RAl +;; vpack2hl RTh, RBh, RBl +;; 64bit: +;; vpack4hl RT, RA, RB +(define_expand "vec_pack_trunc_v2si" + [(set (match_operand:V4HI 0 "register_operand") + (vec_concat:V4HI + (truncate:V2HI + (match_operand:V2SI 1 "register_operand")) + (truncate:V2HI + (match_operand:V2SI 2 "register_operand")) + ))] + "TARGET_SIMD" + { + if (!TARGET_64BIT) + { + rtx tmp = gen_reg_rtx (V2HImode); + + emit_insn (gen_arc64_truncate_lo_v2hi (tmp, operands[1])); + emit_insn (gen_arc64_truncate_hi_v4hi (operands[0], tmp, operands[2])); + + DONE; + } + }) + +(define_insn "vec_pack_trunc_si" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (vec_concat:V2HI + (truncate:HI + (match_operand:SI 1 "register_operand" "r")) + (truncate:HI + (match_operand:SI 2 "register_operand" "r")) + ))] + "TARGET_SIMD" + "vpack2hl\\t%0,%1,%2" + [(set_attr "type" "vpack") + (set_attr "length" "4")]) + +(define_insn "vec_duplicatev2hi" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (vec_duplicate:V2HI (match_operand:HI 1 "register_operand" "r")))] + "TARGET_SIMD" + "vpack2hl\\t%0,%1,%1" + [(set_attr "type" "vpack") + (set_attr "length" "4")]) + +(define_insn "arc64_sel_lane0_v2hi" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (vec_concat:V2HI + (vec_select:HI + (match_operand:V2HI 1 "register_operand" "r") + (parallel [(const_int 0)])) + (vec_select:HI + (match_operand:V2HI 2 "register_operand" "r") + (parallel [(const_int 0)])) + ))] + "TARGET_SIMD" + "vpack2hl\\t%0,%1,%2" + [(set_attr "length" "4") + (set_attr "type" "vpack")]) + +(define_insn "arc64_sel_lane1_v2hi" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (vec_concat:V2HI + (vec_select:HI + (match_operand:V2HI 1 "register_operand" "r") + (parallel [(const_int 1)])) + (vec_select:HI + (match_operand:V2HI 2 "register_operand" "r") + (parallel [(const_int 1)])) + ))] + "TARGET_SIMD" + "vpack2hm\\t%0,%1,%2" + [(set_attr "length" "4") + (set_attr "type" "vpack")]) + +(define_insn "arc64_vpack_v2hihi" + [(set (match_operand:V2HI 0 "register_operand" "=r") + (vec_concat:V2HI + (match_operand:HI 1 "register_operand" "r") + (match_operand:HI 2 "register_operand" "r") + ))] + "TARGET_SIMD" + "vpack2hl\\t%0,%1,%2" + [(set_attr "type" "vpack") + (set_attr "length" "4")]) + +(define_insn "v2si3" + [(set (match_operand:V2SI 0 "register_operand" "=r") + (MINMAX:V2SI (match_operand:V2SI 1 "register_operand" "%r") + (match_operand:V2SI 2 "register_operand" "r")))] + "TARGET_SIMD" + "v2\\t%0,%1,%2" + [(set_attr "length" "4") + (set_attr "type" "")]) + +(define_insn "*v2si3_dup" + [(set (match_operand:V2SI 0 "register_operand" "=r,r") + (MINMAX:V2SI + (vec_duplicate:V2SI + (match_operand 1 "vectdup_immediate_operand" "S06S0,S12S0")) + (match_operand:V2SI 2 "register_operand" "r,0")))] + "TARGET_SIMD" + "v2\\t%0,%2,%1" + [(set_attr "length" "4") + (set_attr "type" "")]) + +;; ------------------------------------------------------------------- +;; FP SIMD instructions +;; ------------------------------------------------------------------- + +(define_expand "mov" + [(set (match_operand:VALLF 0 "nonimmediate_operand") + (match_operand:VALLF 1 "general_operand"))] + "ARC64_HAS_FP_BASE" + " + if (arc64_prepare_move_operands (operands[0], operands[1], mode)) + DONE; + ") + +(define_expand "movmisalign" + [(set (match_operand:VALLF 0 "nonimmediate_operand") + (match_operand:VALLF 1 "general_operand"))] + "ARC64_HAS_FP_BASE && !STRICT_ALIGNMENT" + " + if (arc64_prepare_move_operands (operands[0], operands[1], mode)) + DONE; + ") + +(define_insn "*mov" + [(set (match_operand:VALLF_64 0 "arc64_dest_operand" "=w, w,Ufpms,*r,*w,*r,*r,*Ustor") + (match_operand:VALLF_64 1 "nonimmediate_operand" "w,Ufpms, w,*w,*r,*r,*m,*r"))] + "ARC64_HAS_FP_BASE + && (register_operand (operands[0], mode) + || register_operand (operands[1], mode))" + "@ + vfmov\\t%0,%1 + fld%U1\\t%0,%1 + fst%U0\\t%1,%0 + fmv2\\t%0,%1 + fmv2\\t%0,%1 + mov\\t%0,%1 + ld%U1\\t%0,%1 + st%U0\\t%1,%0" + [(set_attr "type" "fmov,ld,st,move,move,move,ld,st") + (set_attr "length" "4,*,*,4,4,4,*,*")]) + +;; The 128 bit moves need special care. +(define_insn_and_split "*mov" + [(set (match_operand:VALLF_128 0 "arc64_fsimd_moperand" "=w, w,Ufpms,*r,*w") + (match_operand:VALLF_128 1 "arc64_fsimd_moperand" "w,Ufpms, w,*w,*r"))] + "ARC64_HAS_FP_BASE + && (register_operand (operands[0], mode) + || register_operand (operands[1], mode))" + "@ + vfmov\\t%0,%1 + fld%U1\\t%0,%1 + fst%U0\\t%1,%0 + # + #" + "&& reload_completed + && arc64_split_double_move_p (operands, mode)" + [(const_int 0)] + { + arc64_split_double_move (operands, mode); + DONE; + } + [(set_attr "type" "fmov,ld,st,move,move") + (set_attr "length" "4,*,*,8,8")]) + +(define_insn "3" + [(set (match_operand:VALLF 0 "arc64_fsimd_register" "=w") + (VOPS:VALLF (match_operand:VALLF 1 "arc64_fsimd_register" "w") + (match_operand:VALLF 2 "arc64_fsimd_register" "w")))] + "ARC64_HAS_FP_BASE" + "vf\\t%0,%1,%2" + [(set_attr "length" "4") + (set_attr "type" "vf")]) + +;; We need a neg pattern (observed in specInt2006 481.wrf) +(define_expand "neg2" + [(set (match_operand:V1FRF 0 "arc64_fsimd_register" "=w") + (neg:V1FRF (match_operand:V1FRF 1 "arc64_fsimd_register" "w")))] + "ARC64_HAS_FP_BASE" + "{ + rtx tmp = gen_reg_rtx (mode); + emit_move_insn (tmp, CONST0_RTX (mode)); + emit_insn (gen_vfnmadds (operands[0], operands[1], + tmp, operands[1])); + DONE; + }") + +(define_insn "vec_duplicate" + [(set (match_operand:VALLF 0 "arc64_fsimd_register" "=w") + (vec_duplicate:VALLF (match_operand: 1 "register_operand" "w")))] + "ARC64_HAS_FP_BASE" + "vfrep\\t%0,%1" + [(set_attr "length" "4") + (set_attr "type" "vfrep")]) + +(define_insn "3_rep" + [(set (match_operand:VALLF 0 "arc64_fsimd_register" "=w") + (VOPS:VALLF + (match_operand:VALLF 1 "arc64_fsimd_register" "w") + (vec_duplicate:VALLF + (match_operand: 2 "register_operand" "w"))))] + "ARC64_HAS_FP_BASE" + "vfs\\t%0,%1,%2" + [(set_attr "length" "4") + (set_attr "type" "vf")]) + +;; Canonical of the above (selected) patterns. +(define_insn "3_rep2" + [(set (match_operand:VALLF 0 "arc64_fsimd_register" "=w") + (VCOP:VALLF + (vec_duplicate:VALLF + (match_operand: 1 "register_operand" "w")) + (match_operand:VALLF 2 "arc64_fsimd_register" "w")))] + "ARC64_HAS_FP_BASE" + "vfs\\t%0,%2,%1" + [(set_attr "length" "4") + (set_attr "type" "vf")]) + +(define_expand "vec_set" + [(set (match_operand:VALLF 0 "register_operand") + (vec_merge:VALLF + (vec_duplicate:VALLF + (match_operand: 1 "register_operand")) + (match_dup 0) + (match_operand:SI 2 "immediate_operand")))] + "ARC64_HAS_FP_BASE" + { + HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]); + operands[2] = GEN_INT (elem); + }) + +(define_insn "*vec_set" + [(set (match_operand:VALLF 0 "arc64_fsimd_register" "=w") + (vec_merge:VALLF + (vec_duplicate:VALLF + (match_operand: 1 "register_operand" "w")) + (match_operand:VALLF 3 "arc64_fsimd_register" "0") + (match_operand:SI 2 "immediate_operand" "i")))] + "ARC64_HAS_FP_BASE" + { + int elt = exact_log2 (INTVAL (operands[2])); + gcc_assert (UNSIGNED_INT5 (elt)); + operands[2] = GEN_INT (elt); + return "vfins\\t%0[%2],%1"; + } + [(set_attr "length" "4") + (set_attr "type" "vfins")]) + +(define_insn "vec_extract" + [(set (match_operand: 0 "register_operand" "=w") + (vec_select: (match_operand:VALLF 1 "arc64_fsimd_register" "w") + (parallel [(match_operand:SI 2 "const_int_operand" "U05S0")])))] + "ARC64_HAS_FP_BASE" + "vfext\\t%0,%1[%2]" + [(set_attr "length" "4") + (set_attr "type" "vfext")]) + +;; FV

MADD +(define_insn "fma4" + [(set (match_operand:VALLF 0 "arc64_fsimd_register" "=w") + (fma:VALLF (match_operand:VALLF 1 "arc64_fsimd_register" "w") + (match_operand:VALLF 2 "arc64_fsimd_register" "w") + (match_operand:VALLF 3 "arc64_fsimd_register" "w")))] + "ARC64_HAS_FP_BASE" + "vfmadd\\t%0,%1,%2,%3" + [(set_attr "length" "4") + (set_attr "type" "fmadd")]) + +;; FV

MSUB +(define_insn "fnma4" + [(set (match_operand:VALLF 0 "arc64_fsimd_register" "=w") + (fma:VALLF (neg:VALLF (match_operand:VALLF 1 "arc64_fsimd_register" "w")) + (match_operand:VALLF 2 "arc64_fsimd_register" "w") + (match_operand:VALLF 3 "arc64_fsimd_register" "w")))] + "ARC64_HAS_FP_BASE" + "vfmsub\\t%0,%1,%2,%3" + [(set_attr "length" "4") + (set_attr "type" "fmsub")]) + +(define_insn "fms4" + [(set (match_operand:VALLF 0 "arc64_fsimd_register" "=w") + (fma:VALLF (match_operand:VALLF 1 "arc64_fsimd_register" "w") + (match_operand:VALLF 2 "arc64_fsimd_register" "w") + (neg:VALLF (match_operand:VALLF 3 "arc64_fsimd_register" "w"))))] + "!HONOR_SIGNED_ZEROS (mode) && ARC64_HAS_FP_BASE" + "vfnmsub\\t%0,%1,%2,%3" + [(set_attr "length" "4") + (set_attr "type" "fnmsub")]) + +;; -(op3 - (op1 * op2)) +(define_insn "*nfnms4" + [(set (match_operand:VALLF 0 "arc64_fsimd_register" "=w") + (neg:VALLF (fma:VALLF (neg:VALLF (match_operand:VALLF 1 "arc64_fsimd_register" "w")) + (match_operand:VALLF 2 "arc64_fsimd_register" "w") + (match_operand:VALLF 3 "arc64_fsimd_register" "w"))))] + "ARC64_HAS_FP_BASE" + "vfnmsub\\t%0,%1,%2,%3" + [(set_attr "length" "4") + (set_attr "type" "fnmsub")]) + +;; FV

NMADD +(define_insn "fnms4" + [(set (match_operand:VALLF 0 "arc64_fsimd_register" "=w") + (fma:VALLF (neg:VALLF (match_operand:VALLF 1 "arc64_fsimd_register" "w")) + (match_operand:VALLF 2 "arc64_fsimd_register" "w") + (neg:VALLF (match_operand:VALLF 3 "arc64_fsimd_register" "w"))))] + "!HONOR_SIGNED_ZEROS (mode) && ARC64_HAS_FP_BASE" + "vfnmadd\\t%0,%1,%2,%3" + [(set_attr "length" "4") + (set_attr "type" "fnmadd")]) + +;; -(op3 + (op1 * op2)) +(define_insn "*nfms4" + [(set (match_operand:VALLF 0 "arc64_fsimd_register" "=w") + (neg:VALLF (fma:VALLF (match_operand:VALLF 1 "arc64_fsimd_register" "w") + (match_operand:VALLF 2 "arc64_fsimd_register" "w") + (match_operand:VALLF 3 "arc64_fsimd_register" "w"))))] + "ARC64_HAS_FP_BASE" + "vfnmadd\\t%0,%1,%2,%3" + [(set_attr "length" "4") + (set_attr "type" "fnmadd")]) + +;; FV

SQRT +(define_insn "sqrt2" + [(set (match_operand:VALLF 0 "arc64_fsimd_register" "=w") + (sqrt:VALLF (match_operand:VALLF 1 "arc64_fsimd_register" "w")))] + "ARC64_HAS_FP_BASE" + "vfsqrt\\t%0,%1" + [(set_attr "length" "4") + (set_attr "type" "fsqrt")]) + +;; FV

MADDS +(define_insn "fma4_rep" + [(set (match_operand:VALLF 0 "arc64_fsimd_register" "=w") + (fma:VALLF (match_operand:VALLF 1 "arc64_fsimd_register" "w") + (vec_duplicate:VALLF + (match_operand: 2 "register_operand" "w")) + (match_operand:VALLF 3 "arc64_fsimd_register" "w")))] + "ARC64_HAS_FP_BASE" + "vfmadds\\t%0,%1,%2,%3" + [(set_attr "length" "4") + (set_attr "type" "fmadd")]) + +(define_peephole2 + [(set (match_operand:VALLF 0 "arc64_fsimd_register" "") + (vec_duplicate:VALLF (match_operand: 1 "register_operand" ""))) + (set (match_operand:VALLF 2 "arc64_fsimd_register" "") + (fma:VALLF (match_operand:VALLF 3 "arc64_fsimd_register" "") + (match_dup 0) + (match_operand:VALLF 4 "arc64_fsimd_register" "")))] + "ARC64_HAS_FP_BASE + && peep2_reg_dead_p (2, operands[0])" + [(set (match_dup 2) + (fma:VALLF (match_dup 3) (vec_duplicate:VALLF (match_dup 1)) + (match_dup 4)))] + "") + +;; FV

MSUBS +(define_insn "fnma4_rep" + [(set (match_operand:VALLF 0 "arc64_fsimd_register" "=w") + (fma:VALLF (neg:VALLF (match_operand:VALLF 1 "arc64_fsimd_register" "w")) + (vec_duplicate:VALLF + (match_operand: 2 "register_operand" "w")) + (match_operand:VALLF 3 "arc64_fsimd_register" "w")))] + "ARC64_HAS_FP_BASE" + "vfmsubs\\t%0,%1,%2,%3" + [(set_attr "length" "4") + (set_attr "type" "fmsub")]) + +(define_peephole2 + [(set (match_operand:VALLF 0 "arc64_fsimd_register" "") + (vec_duplicate:VALLF (match_operand: 1 "register_operand" ""))) + (set (match_operand:VALLF 2 "arc64_fsimd_register" "") + (fma:VALLF (neg:VALLF (match_operand:VALLF 3 "arc64_fsimd_register" "")) + (match_dup 0) + (match_operand:VALLF 4 "arc64_fsimd_register" "")))] + "ARC64_HAS_FP_BASE + && peep2_reg_dead_p (2, operands[0])" + [(set (match_dup 2) + (fma:VALLF (neg:VALLF (match_dup 3)) (vec_duplicate:VALLF (match_dup 1)) + (match_dup 4)))] + "") + +;; FV

NMADDS +(define_insn "vfnmadds" + [(set (match_operand:VALLF 0 "arc64_fsimd_register" "=w") + (neg:VALLF + (fma:VALLF (match_operand:VALLF 1 "arc64_fsimd_register" "w") + (vec_duplicate:VALLF + (match_operand: 2 "register_operand" "w")) + (match_operand:VALLF 3 "arc64_fsimd_register" "w"))))] + "ARC64_HAS_FP_BASE" + "vfnmadds\\t%0,%1,%2,%3" + [(set_attr "length" "4") + (set_attr "type" "fnmadd")]) + +;; FV

NMSUBS +(define_insn "vfnmsubs" + [(set (match_operand:VALLF 0 "arc64_fsimd_register" "=w") + (neg:VALLF + (fma:VALLF (neg:VALLF (match_operand:VALLF 1 "arc64_fsimd_register" "w")) + (vec_duplicate:VALLF + (match_operand: 2 "register_operand" "w")) + (match_operand:VALLF 3 "arc64_fsimd_register" "w"))))] + "ARC64_HAS_FP_BASE" + "vfnmsubs\\t%0,%1,%2,%3" + [(set_attr "length" "4") + (set_attr "type" "fnmsub")]) + +;; Exchange unspecs used for reduction ops. +(define_insn "arc64_dexch" + [(set (match_operand:VALLF_128 0 "register_operand" "=w") + (unspec:VALLF_128 [(match_operand:VALLF_128 1 "register_operand" "w")] + ARC64_UNSPEC_DEXCH))] + "ARC64_HAS_FP_BASE" + "vfdexch\\t%0,%1" + [(set_attr "length" "4") + (set_attr "type" "vfexch")]) + +(define_insn "arc64_sexch" + [(set (match_operand:V1FRF 0 "register_operand" "=w") + (unspec:V1FRF [(match_operand:V1FRF 1 "register_operand" "w")] + ARC64_UNSPEC_SEXCH))] + "ARC64_HAS_FP_BASE" + "vfsexch\\t%0,%1" + [(set_attr "length" "4") + (set_attr "type" "vfexch")]) + +(define_insn "arc64_hexch" + [(set (match_operand:VxHF 0 "register_operand" "=w") + (unspec:VxHF [(match_operand:VxHF 1 "register_operand" "w")] + ARC64_UNSPEC_HEXCH))] + "ARC64_HAS_FP_BASE" + "vfhexch\\t%0,%1" + [(set_attr "length" "4") + (set_attr "type" "vfexch")]) + +(define_expand "reduc_plus_scal_v8hf" + [(match_operand:HF 0 "register_operand") + (match_operand:V8HF 1 "register_operand")] + "" + { + rtx low = gen_lowpart (HFmode, operands[1]); + rtx high = gen_reg_rtx (HFmode); + rtx tmp0, tmp1, tmp2, tmp3; + + tmp0 = gen_reg_rtx (V8HFmode); + tmp1 = gen_reg_rtx (V8HFmode); + tmp2 = gen_reg_rtx (V8HFmode); + tmp3 = gen_reg_rtx (V8HFmode); + + /* 1/2 of the vector. */ + emit_insn (gen_arc64_dexchv8hf (tmp0, operands[1])); + emit_insn (gen_addv8hf3 (tmp1, tmp0, operands[1])); + + /* 1/4 of the vector. */ + emit_insn (gen_arc64_sexchv8hf (tmp2, tmp1)); + emit_insn (gen_addv8hf3 (tmp3, tmp2, tmp1)); + + /* Last 2 elements. */ + emit_insn (gen_vec_extractv8hfhf (high, tmp3, GEN_INT (1))); + emit_insn (gen_addhf3 (operands[0], high, low)); + DONE; + }) + +;; Vector reduction instructions (emulated) +(define_expand "reduc_plus_scal_" + [(match_operand: 0 "register_operand") + (match_operand:V2xF 1 "register_operand")] + "" + { + rtx low = gen_lowpart (mode, operands[1]); + rtx high = gen_reg_rtx (mode); + + emit_insn (gen_vec_extract (high, operands[1], GEN_INT (1))); + emit_insn (gen_add3 (operands[0], high, low)); + DONE; + }) + +(define_expand "reduc_plus_scal_" + [(match_operand: 0 "register_operand") + (match_operand:V4xF 1 "register_operand")] + "" + { +#if 0 + rtx op0 = gen_lowpart (mode, operands[1]); + rtx op1 = gen_reg_rtx (mode); + rtx op2 = gen_reg_rtx (mode); + rtx op3 = gen_reg_rtx (mode); + rtx tmp1 = gen_reg_rtx (mode); + rtx tmp2 = gen_reg_rtx (mode); + + emit_insn (gen_vec_extract (op1, operands[1], GEN_INT (1))); + emit_insn (gen_add3 (tmp1, op1, op0)); + + if (mode == V4SFmode) + op2 = gen_lowpart (SFmode, gen_highpart (DFmode, operands[1])); + else + emit_insn (gen_vec_extract (op2, operands[1], GEN_INT (2))); + + emit_insn (gen_vec_extract (op3, operands[1], GEN_INT (3))); + emit_insn (gen_add3 (tmp2, op2, op3)); + + emit_insn (gen_add3 (operands[0], tmp1, tmp2)); + DONE; +#else + rtx low = gen_lowpart (mode, operands[1]); + rtx high = gen_reg_rtx (mode); + rtx tmp0, tmp1; + + tmp0 = gen_reg_rtx (mode); + tmp1 = gen_reg_rtx (mode); + + emit_insn (gen_arc64_exch (tmp0, operands[1])); + emit_insn (gen_add3 (tmp1, tmp0, operands[1])); + + emit_insn (gen_vec_extract (high, tmp1, GEN_INT (1))); + emit_insn (gen_add3 (operands[0], high, low)); + DONE; +#endif + }) + +;; Emulated vector ops using scalar function, only for double width vectors. +;; MAX/MIN +(define_insn_and_split "3" + [(set (match_operand:W2xF 0 "arc64_fsimd_register" "=w") + (MINMAX:W2xF (match_operand:W2xF 1 "arc64_fsimd_register" "w") + (match_operand:W2xF 2 "arc64_fsimd_register" "w")))] + "ARC64_VFP_128" + "#" + "&& reload_completed" + [(const_int 0)] + { + rtx high_dest = gen_highpart (mode, operands[0]); + rtx low_dest = gen_lowpart (mode, operands[0]); + rtx high_op1 = gen_highpart (mode, operands[1]); + rtx low_op1 = gen_lowpart (mode, operands[1]); + rtx high_op2 = gen_highpart (mode, operands[2]); + rtx low_op2 = gen_lowpart (mode, operands[2]); + emit_insn (gen_3 (low_dest, low_op1, low_op2)); + emit_insn (gen_3 (high_dest, high_op1, high_op2)); + DONE; + } + [(set_attr "length" "8") + (set_attr "type" "f")]) + +;; NEG/ABS +(define_insn_and_split "2" + [(set (match_operand:W2xF 0 "arc64_fsimd_register" "=w") + (ABS_NEG:W2xF (match_operand:W2xF 1 "arc64_fsimd_register" "w")))] + "ARC64_VFP_128" + "#" + "&& reload_completed" + [(const_int 0)] + { + rtx high_dest = gen_highpart (mode, operands[0]); + rtx low_dest = gen_lowpart (mode, operands[0]); + rtx high_op1 = gen_highpart (mode, operands[1]); + rtx low_op1 = gen_lowpart (mode, operands[1]); + emit_insn (gen_2 (low_dest, low_op1)); + emit_insn (gen_2 (high_dest, high_op1)); + DONE; + } + [(set_attr "length" "8") + (set_attr "type" "fsgnjn")]) + +;; Conversions. +(define_expand "vec_pack_trunc_v2df" + [(set (match_operand:V4SF 0 "register_operand") + (vec_concat:V4SF + (float_truncate:V2SF + (match_operand:V2DF 1 "register_operand")) + (float_truncate:V2SF + (match_operand:V2DF 2 "register_operand")) + ))] + "ARC64_VFP_128" + { + rtx high_dest = gen_lowpart (SFmode, + gen_highpart (DFmode, + operands[0])); + rtx low_dest = gen_lowpart (SFmode, operands[0]); + + rtx high_op1 = gen_highpart (DFmode, operands[1]); + rtx low_op1 = gen_lowpart (DFmode, operands[1]); + rtx high_op2 = gen_highpart (DFmode, operands[2]); + rtx low_op2 = gen_lowpart (DFmode, operands[2]); + rtx tmp1 = gen_reg_rtx (SFmode); + rtx tmp3 = gen_reg_rtx (SFmode); + + emit_insn (gen_truncdfsf2 (tmp3, high_op1)); + emit_insn (gen_truncdfsf2 (high_dest, low_op1)); + emit_insn (gen_truncdfsf2 (tmp1, high_op2)); + emit_insn (gen_truncdfsf2 (low_dest, low_op2)); + + emit_insn (gen_vec_setv4sf (operands[0], tmp1, GEN_INT (1))); + emit_insn (gen_vec_setv4sf (operands[0], tmp3, GEN_INT (3))); + DONE; + }) + +(define_expand "vec_pack_trunc_df" + [(set (match_operand:V2SF 0 "register_operand") + (vec_concat:V2SF + (float_truncate:SF + (match_operand:DF 1 "register_operand")) + (float_truncate:SF + (match_operand:DF 2 "register_operand")) + ))] + "ARC64_VFP_64" + { + rtx low_dest = gen_lowpart (SFmode, operands[0]); + rtx tmp1 = gen_reg_rtx (SFmode); + + emit_insn (gen_truncdfsf2 (low_dest, operands[2])); + emit_insn (gen_truncdfsf2 (tmp1, operands[1])); + emit_insn (gen_vec_setv2sf (operands[0], tmp1, GEN_INT (1))); + DONE; + }) + +;; vec_load_lanes used when wide_simd is off and wide_ldst is +;; on. Hence the simd lengthis 64bit + +;; Patterns used to vect permutate. + +;; This one pattern is only used when we don't want to make +;; dup_permutations using vec_dup (see arc64_simd_dup). +(define_insn "arc64_dup_lane0" + [(set (match_operand:VALLF 0 "arc64_fsimd_register" "=w") + (vec_duplicate:VALLF + (vec_select: + (match_operand:VALLF 1 "arc64_fsimd_register" "w") + (parallel [(const_int 0)]) + )))] + "ARC64_HAS_FP_BASE" + "vfrep\\t%0,%1" + [(set_attr "length" "4") + (set_attr "type" "vfrep")]) + +(define_insn "arc64_dup_lane1" + [(set (match_operand:W2xF 0 "arc64_fsimd_register" "=w") + (vec_duplicate:W2xF + (vec_select: + (match_operand:W2xF 1 "arc64_fsimd_register" "w") + (parallel [(const_int 1)]) + )))] + "ARC64_VFP_128" + "vfrep\\t%0,%H1" + [(set_attr "length" "4") + (set_attr "type" "vfrep")]) + +;; Shuffle patterns +(define_insn "arc64_d" + [(set (match_operand:VALLF_128 0 "register_operand" "=w") + (unspec:VALLF_128 [(match_operand:VALLF_128 1 "register_operand" "w") + (match_operand:VALLF_128 2 "register_operand" "w")] + PERMUTED))] + "ARC64_HAS_FP_BASE" + "vfd\\t%0,%1,%2" + [(set_attr "length" "4") + (set_attr "type" "vf")]) + +(define_insn "arc64_s" + [(set (match_operand:V1FRF 0 "register_operand" "=w") + (unspec:V1FRF [(match_operand:V1FRF 1 "register_operand" "w") + (match_operand:V1FRF 2 "register_operand" "w")] + PERMUTES))] + "ARC64_HAS_FP_BASE" + "vfs\\t%0,%1,%2" + [(set_attr "length" "4") + (set_attr "type" "vf")]) + +(define_insn "arc64_h" + [(set (match_operand:VxHF 0 "register_operand" "=w") + (unspec:VxHF [(match_operand:VxHF 1 "register_operand" "w") + (match_operand:VxHF 2 "register_operand" "w")] + PERMUTEH))] + "ARC64_HAS_FP_BASE" + "vfh\\t%0,%1,%2" + [(set_attr "length" "4") + (set_attr "type" "vf")]) + +;; Required pattern needed for vector reduction operations. +;;(define_expand "vec_shr_" +;; [(match_operand:VALLF 0 "register_operand") +;; (match_operand:VALLF 1 "register_operand") +;; (match_operand:SI 2 "immediate_operand")] +;; "ARC64_HAS_FP_BASE" +;; { +;; if (arc64_expand_fvect_shr (operands)) +;; DONE; +;; FAIL; +;; }) + +;;(define_insn "vec_shr_" +;; [(set (match_operand:VALLF 0 "arc64_fsimd_moperand" "=w") +;; (unspec:VALLF [(match_operand:VALLF 1 "arc64_fsimd_moperand" "w") +;; (match_operand:SI 2 "immediate_operand")] +;; ARC64_UNSPEC_VEC_SHR))] +;; "ARC64_HAS_FP_BASE" +;; "vfasrl\\t%0,%1,%2" +;; [(set_attr "length" "4") +;; (set_attr "type" "asl")]) + + +(define_insn "*arc64_vfsubadd3" + [(set (match_operand:VALLF 0 "register_operand" "=w") + (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w") + (match_operand:VALLF 2 "register_operand" "w")] + ARC64_UNSPEC_VFSUBADD))] + "ARC64_HAS_FP_BASE" + "vfsubadd\\t%0,%1,%2" + [(set_attr "length" "4") + (set_attr "type" "vfsubadd")]) + +(define_expand "cadd903" + [(set (match_operand:VALLF 0 "register_operand") + (unspec:VALLF [(match_operand:VALLF 1 "register_operand") + (match_operand:VALLF 2 "register_operand")] + ARC64_UNSPEC_VFSUBADD))] + "ARC64_HAS_FP_BASE" + { + rtx tmp = gen_reg_rtx (mode); + + emit_move_insn (tmp, gen_rtx_UNSPEC (mode, + gen_rtvec (1, operands[2]), + ARC64_UNSPEC_EXCH)); + operands[2] = tmp; + }) + +(define_insn "*arc64_vfaddsub3" + [(set (match_operand:VALLF 0 "register_operand" "=w") + (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w") + (match_operand:VALLF 2 "register_operand" "w")] + ARC64_UNSPEC_VFADDSUB))] + "ARC64_HAS_FP_BASE" + "vfaddsub\\t%0,%1,%2" + [(set_attr "length" "4") + (set_attr "type" "vfaddsub")]) + +(define_expand "cadd2703" + [(set (match_operand:VALLF 0 "register_operand") + (unspec:VALLF [(match_operand:VALLF 1 "register_operand") + (match_operand:VALLF 2 "register_operand")] + ARC64_UNSPEC_VFADDSUB))] + "ARC64_HAS_FP_BASE" + { + rtx tmp = gen_reg_rtx (mode); + + emit_move_insn (tmp, gen_rtx_UNSPEC (mode, + gen_rtvec (1, operands[2]), + ARC64_UNSPEC_EXCH)); + operands[2] = tmp; + }) diff --git a/gcc/config/arc64/atomic.md b/gcc/config/arc64/atomic.md new file mode 100644 index 0000000000000..dd9a81be28561 --- /dev/null +++ b/gcc/config/arc64/atomic.md @@ -0,0 +1,268 @@ +;; GCC machine description for ARC atomic instructions. +;; Copyright (C) 2015-2020 Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;; Operations which can be used with atomic loads and stores. +(define_code_iterator ATOPS [plus minus ior xor and]) + +;; Operations which are supported by hardware. +(define_code_iterator ATHWOPS [plus ior xor and]) + +(define_expand "memory_barrier" + [(set (match_dup 0) + (unspec:BLK [(match_dup 0)] ARC64_UNSPEC_MEMBAR))] + "" +{ + operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); + MEM_VOLATILE_P (operands[0]) = 1; +}) + +(define_insn "*memory_barrier" + [(set (match_operand:BLK 0 "" "") + (unspec:BLK [(match_dup 0)] ARC64_UNSPEC_MEMBAR))] + "" + { + return "dmb\\t3"; + } + [(set_attr "type" "dmb") + (set_attr "length" "4")]) + +(define_expand "atomic_compare_and_swap" + [(match_operand:SI 0 "register_operand" "") ;; bool out + (match_operand:ALLI 1 "register_operand" "") ;; val out + (match_operand:ALLI 2 "mem_noofs_operand" "");; memory + (match_operand:ALLI 3 "register_operand" "") ;; expected + (match_operand:ALLI 4 "register_operand" "") ;; desired + (match_operand:SI 5 "const_int_operand") ;; is_weak + (match_operand:SI 6 "const_int_operand") ;; mod_s + (match_operand:SI 7 "const_int_operand")] ;; mod_f + "ARC64_HAS_ATOMIC_1" +{ + arc64_expand_compare_and_swap (operands); + DONE; +}) + +(define_insn_and_split "atomic_compare_and_swap_1" + [(set (reg:CC_Z CC_REGNUM) ;; bool out + (unspec_volatile:CC_Z [(const_int 0)] ARC64_VUNSPEC_CAS)) + (set (match_operand:GPI 0 "register_operand" "=&r") ;; val out + (match_operand:GPI 1 "mem_noofs_operand" "+ATOMC")) ;; memory + (set (match_dup 1) + (unspec_volatile + [(match_operand:GPI 2 "register_operand" "r") ;; expect + (match_operand:GPI 3 "register_operand" "r") ;; desired + (match_operand:SI 4 "const_int_operand") ;; is_weak + (match_operand:SI 5 "const_int_operand") ;; mod_s + (match_operand:SI 6 "const_int_operand")] ;; mod_f + ARC64_VUNSPEC_CAS))] + "ARC64_HAS_ATOMIC_1" + "#" + "&& reload_completed" + [(const_int 0)] + { + arc64_split_compare_and_swap (operands); + DONE; + }) + +(define_insn "arc_load_exclusive" + [(set (match_operand:GPI 0 "register_operand" "=r") + (unspec_volatile:GPI + [(match_operand:GPI 1 "mem_noofs_operand" "ATOMC")] + ARC64_VUNSPEC_LL))] + "ARC64_HAS_ATOMIC_1" + "llock\\t%0,%1" + [(set_attr "type" "llock") + (set_attr "iscompact" "no") + (set_attr "predicable" "no") + (set_attr "length" "*")]) + +(define_insn "arc_store_exclusive" + [(set (match_operand:GPI 0 "mem_noofs_operand" "=ATOMC") + (unspec_volatile:GPI[(match_operand:GPI 1 "register_operand" "r")] + ARC64_VUNSPEC_SC)) + (clobber (reg:CC_Z CC_REGNUM))] + "ARC64_HAS_ATOMIC_1" + "scond\\t%1,%0" + [(set_attr "type" "scond") + (set_attr "iscompact" "no") + (set_attr "predicable" "no") + (set_attr "length" "*")]) + +(define_expand "atomic_exchangesi" + [(match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "mem_noofs_operand" "") + (match_operand:SI 2 "register_operand" "") + (match_operand:SI 3 "const_int_operand" "")] + "ARC64_HAS_ATOMIC_1" +{ + enum memmodel model = (enum memmodel) INTVAL (operands[3]); + + if (model == MEMMODEL_SEQ_CST) + emit_insn (gen_sync ()); + emit_insn (gen_exchangesi (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_insn "exchange" + [(set (match_operand:GPI 0 "register_operand" "=r") + (unspec_volatile:GPI [(match_operand:GPI 1 "mem_noofs_operand" "+ATOMC")] + ARC64_VUNSPEC_EX)) + (set (match_dup 1) + (match_operand:GPI 2 "register_operand" "0"))] + "" + "ex\\t%0,%1" + [(set_attr "type" "ex") + (set_attr "iscompact" "no") + (set_attr "predicable" "no") + (set_attr "length" "*")]) + +;; New Atomic options enabled by option 2 +(define_insn_and_split "atld_" + [(set (match_operand:GPI 0 "register_operand" "=&r,r") + (match_operand:GPI 1 "mem_noofs_operand" "+ATOMC,ATOMC")) + (set (match_dup 1) + (unspec_volatile:GPI + [(ATHWOPS:GPI (match_dup 0) + (match_operand:GPI 2 "register_operand" "0,r")) + (match_operand:SI 3 "const_int_operand")] + ARC64_VUNSPEC_ATOOPS))] + "ARC64_HAS_ATOMIC_2" + "@ + atld.%A3\\t%0,%1 + #" + "&& reload_completed && !operands_match_p (operands[0], operands[2])" + [(const_int 0)] + { + emit_insn (gen_rtx_SET (operands[0], operands[2])); + emit_insn (gen_atld_ (operands[0], operands[1], operands[0], operands[3])); + DONE; + } + [(set_attr "type" "atldop")]) + +(define_expand "atomic_" + [(match_operand:GPI 0 "mem_noofs_operand" "") ;; memory + (ATOPS:GPI (match_dup 0) + (match_operand:GPI 1 "register_operand" "")) ;; operand + (match_operand:SI 2 "const_int_operand" "")] ;; model + "ARC64_HAS_ATOMIC_1" +{ + arc64_expand_atomic_op (, operands[0], operands[1], + NULL_RTX, NULL_RTX, operands[2]); + DONE; +}) + +(define_expand "atomic_nandsi" + [(match_operand:SI 0 "mem_noofs_operand" "") ;; memory + (match_operand:SI 1 "register_operand" "") ;; operand + (match_operand:SI 2 "const_int_operand" "")] ;; model + "ARC64_HAS_ATOMIC_1" +{ + arc64_expand_atomic_op (NOT, operands[0], operands[1], + NULL_RTX, NULL_RTX, operands[2]); + DONE; +}) + +(define_expand "atomic_fetch_" + [(set (match_operand:GPI 0 "register_operand") ;; output + (match_operand:GPI 1 "mem_noofs_operand")) ;; memory + (set (match_dup 1) + (unspec_volatile:GPI + [(ATHWOPS:GPI (match_dup 1) + (match_operand:GPI 2 "register_operand")) ;; operand + (match_operand:SI 3 "const_int_operand")] ;; model + ARC64_VUNSPEC_ATOOPS))] + "ARC64_HAS_ATOMIC_1" + { + if (!ARC64_HAS_ATOMIC_2) + { + arc64_expand_atomic_op (, operands[1], operands[2], + operands[0], NULL_RTX, operands[3]); + DONE; + } + if (!ARC64_HAS_ATOMIC_3) + arc64_pre_atomic_barrier ((enum memmodel) INTVAL (operands[3])); + emit_insn (gen_atld_ (operands[0], operands[1], operands[2], operands[3])); + if (!ARC64_HAS_ATOMIC_3) + arc64_post_atomic_barrier ((enum memmodel) INTVAL (operands[3])); + DONE; + }) + +;; ARCv3 doesn't have a MINUS atomic memory operation. +(define_expand "atomic_fetch_sub" + [(set (match_operand:GPI 0 "register_operand") ;; output + (match_operand:GPI 1 "mem_noofs_operand")) ;; memory + (set (match_dup 1) + (unspec_volatile:GPI + [(minus:GPI (match_dup 1) + (match_operand:GPI 2 "register_operand")) ;; operand + (match_operand:SI 3 "const_int_operand")] ;; model + ARC64_VUNSPEC_ATOOPS))] + "ARC64_HAS_ATOMIC_1" + { + arc64_expand_atomic_op (MINUS, operands[1], operands[2], + operands[0], NULL_RTX, operands[3]); + DONE; + }) + +(define_expand "atomic_fetch_nand" + [(match_operand:GPI 0 "register_operand" "") ;; output + (match_operand:GPI 1 "mem_noofs_operand" "") ;; memory + (match_operand:GPI 2 "register_operand" "") ;; operand + (match_operand:SI 3 "const_int_operand" "")] ;; model + "ARC64_HAS_ATOMIC_1" +{ + arc64_expand_atomic_op (NOT, operands[1], operands[2], + operands[0], NULL_RTX, operands[3]); + DONE; +}) + +(define_expand "atomic__fetch" + [(match_operand:GPI 0 "register_operand" "") ;; output + (match_operand:GPI 1 "mem_noofs_operand" "") ;; memory + (ATOPS:GPI (match_dup 1) + (match_operand:GPI 2 "register_operand" "")) ;; operand + (match_operand:SI 3 "const_int_operand" "")] ;; model + "ARC64_HAS_ATOMIC_1" +{ + arc64_expand_atomic_op (, operands[1], operands[2], + NULL_RTX, operands[0], operands[3]); + DONE; +}) + +(define_expand "atomic_nand_fetch" + [(match_operand:GPI 0 "register_operand" "") ;; output + (match_operand:GPI 1 "mem_noofs_operand" "") ;; memory + (match_operand:GPI 2 "register_operand" "") ;; operand + (match_operand:SI 3 "const_int_operand" "")] ;; model + "ARC64_HAS_ATOMIC_1" +{ + arc64_expand_atomic_op (NOT, operands[1], operands[2], + NULL_RTX, operands[0], operands[3]); + DONE; +}) + + +;; mode:emacs-lisp +;; comment-start: ";; " +;; eval: (set-syntax-table (caopy-sequence (syntax-table))) +;; eval: (modify-syntax-entry ?[ "(]") +;; eval: (modify-syntax-entry ?] ")[") +;; eval: (modify-syntax-entry ?{ "(}") +;; eval: (modify-syntax-entry ?} "){") +;; eval: (setq indent-tabs-mode t) +;; End: diff --git a/gcc/config/arc64/builtins.def b/gcc/config/arc64/builtins.def new file mode 100644 index 0000000000000..7bd063a20c651 --- /dev/null +++ b/gcc/config/arc64/builtins.def @@ -0,0 +1,42 @@ +/* Copyright (C) 2020 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3, or (at your option) any later + version. + + GCC is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +/* This file contains the definitions and documentation for the + builtins defined in the ARC part of the GNU compiler. Before + including this file, define a macro + + DEF_BUILTIN(NAME, N_ARGS, TYPE, ICODE, MASK) + + NAME: `__builtin_arc_name' will be the user-level name of the builtin. + `ARC64_BUILTIN_NAME' will be the internal builtin's id. + N_ARGS: Number of input arguments. If special treatment is needed, + set to -1 and handle it by hand, see arc.c:arc_expand_builtin(). + TYPE: A tree node describing the prototype of the built-in. + ICODE: Name of attached insn or expander. If special treatment in arc.c + is needed to expand the built-in, use `nothing'. + MASK: CPU selector mask. */ + +DEF_BUILTIN (NOP, 0, void_ftype_void, nothing, 1) +DEF_BUILTIN (SR, 2, void_ftype_usint_usint, sr, 1) +DEF_BUILTIN (LR, 1, usint_ftype_usint, lr, 1) +DEF_BUILTIN (BRK, 0, void_ftype_void, brk, 1) +DEF_BUILTIN (FLAG, 1, void_ftype_usint, flag, 1) +DEF_BUILTIN (SRL, 2, void_ftype_long_long, srl, 1) +DEF_BUILTIN (LRL, 1, long_ftype_long, lrl, 1) + +DEF_BUILTIN (TRAP_S, 1, void_ftype_usint, trap_s, 1) diff --git a/gcc/config/arc64/condexec.md b/gcc/config/arc64/condexec.md new file mode 100644 index 0000000000000..6fc6125cb3b1a --- /dev/null +++ b/gcc/config/arc64/condexec.md @@ -0,0 +1,392 @@ +;; Operations which can be predicated non commutative +(define_code_iterator ARITHP [ashift ashiftrt lshiftrt]) + +;; Conditional execution +(define_insn_and_split "*zero_extend2_ce" + [(cond_exec + (match_operator 2 "arc64_comparison_operator" + [(match_operand 3 "cc_register" "") (const_int 0)]) + (set (match_operand:GPI 0 "register_operand" "=r,r") + (zero_extend:GPI (match_operand:SHORT 1 "register_operand" "0,r"))))] + "" + "@ + bmsk.%m2\\t%0,%1, + #" + "reload_completed && (REGNO (operands[0]) != REGNO (operands[1]))" + [(cond_exec + (match_op_dup 2 [(match_dup 3) (const_int 0)]) + (set (match_dup 4) (match_dup 1))) + (cond_exec + (match_op_dup 2 [(match_dup 3) (const_int 0)]) + (set (match_dup 0) (zero_extend: (match_dup 4))))] + " + operands[4] = simplify_gen_subreg (mode, operands[0], + mode, 0); + " + [(set_attr "type" "and") + (set_attr "length" "4,8")]) + +;; Non-commutative operation, still I can swap the input operands if +;; it is required. +;; Todo: add conditional execution for leu and geu +;; Todo: Add for DI +(define_insn_and_split "*setsi_ce" + [(cond_exec + (match_operator 3 "arc64_comparison_operator" + [(match_operand 4 "cc_register" "") (const_int 0)]) + (set (match_operand:SI 0 "register_operand" "=r,r,r,r") + (SETCC:SI (match_operand:SI 1 "register_operand" "0,0,r,r") + (match_operand:SI 2 "arc64_nonmem_operand" "r,n,r,n"))))] + "" + "@ + set.%m3\\t%0,%1,%2 + set.%m3\\t%0,%1,%2 + # + #" + "reload_completed && (!rtx_equal_p (operands[0], operands[1]))" + [(const_int 0)] + " + { + rtx tmp; + rtx cond = gen_rtx_fmt_ee (GET_CODE (operands[3]), GET_MODE (operands[4]), + operands[4], const0_rtx); + + enum rtx_code code = ; + + if (register_operand (operands[2], SImode) + && rtx_equal_p (operands[0], operands[2])) + { + /* we need to reverse any condition besides NE/EQ. */ + if (code != NE && code !=EQ) + code = reverse_condition (code); + tmp = gen_rtx_fmt_ee (code, SImode, operands[2], operands[1]); + } + else + { + emit_insn (gen_rtx_COND_EXEC (VOIDmode, cond, + gen_rtx_SET (operands[0], operands[1]))); + tmp = gen_rtx_fmt_ee (code, SImode, operands[0], operands[2]); + } + emit_insn (gen_rtx_COND_EXEC (VOIDmode, cond, + gen_rtx_SET (operands[0], tmp))); + DONE; + } + " + [(set_attr "type" "setcc") + (set_attr "length" "4,8,8,12")]) + +;; Non commutative operation FIXME! what about op2 == op0 +(define_insn_and_split "*rotrsi_ce" + [(cond_exec + (match_operator 3 "arc64_comparison_operator" + [(match_operand 4 "cc_register" "") (const_int 0)]) + (set (match_operand:SI 0 "register_operand" "=r,r,r,r") + (rotatert:SI (match_operand:SI 1 "register_operand" "0,0,r,r") + (match_operand:SI 2 "nonmemory_operand" "rU06S0,S32S0,U06S0,S32S0"))))] + "" + "@ + ror.%m3\\t%0,%1,%2 + ror.%m3\\t%0,%1,%2 + # + #" + "reload_completed && (!rtx_equal_p (operands[0], operands[1]))" + [(cond_exec + (match_op_dup 3 [(match_dup 4) (const_int 0)]) + (set (match_dup 0) (match_dup 1))) + (cond_exec + (match_op_dup 3 [(match_dup 4) (const_int 0)]) + (set (match_dup 0) (rotatert:SI (match_dup 0) (match_dup 2))))] + "" + [(set_attr "type" "ror") + (set_attr "length" "4,8,8,12")]) + +;; FIXME! what about op2 == op0 +(define_insn_and_split "*_ce" + [(cond_exec + (match_operator 3 "arc64_comparison_operator" + [(match_operand 4 "cc_register" "") (const_int 0)]) + (set (match_operand:GPI 0 "register_operand" "= r,r,r,r") + (ARITHP:GPI (match_operand:GPI 1 "register_operand" " 0,0,r,r") + (match_operand:GPI 2 "nonmemory_operand" "rU06S0,S32S0,U06S0,S32S0"))))] + "" + "@ + .%m3\\t%0,%1,%2 + .%m3\\t%0,%1,%2 + # + #" + "reload_completed && (!rtx_equal_p (operands[0], operands[1]))" + [(cond_exec + (match_op_dup 3 [(match_dup 4) (const_int 0)]) + (set (match_dup 0) (match_dup 1))) + (cond_exec + (match_op_dup 3 [(match_dup 4) (const_int 0)]) + (set (match_dup 0) (ARITHP:GPI (match_dup 0) (match_dup 2))))] + "" + [(set_attr "length" "4,8,8,12") + (set_attr "type" "")]) + +(define_insn_and_split "*_ce" + [(cond_exec + (match_operator 3 "arc64_comparison_operator" + [(match_operand 4 "cc_register" "") (const_int 0)]) + (set (match_operand:GPI 0 "register_operand" "= r,r,r,r") + (DIVREM:GPI (match_operand:GPI 1 "register_operand" " 0,0,r,r") + (match_operand:GPI 2 "nonmemory_operand" "rU06S0,S32S0,U06S0,S32S0"))))] + "TARGET_ARC64_DIVREM" + "@ + .%m3\\t%0,%1,%2 + .%m3\\t%0,%1,%2 + # + #" + "reload_completed && (!rtx_equal_p (operands[0], operands[1]))" + [(cond_exec + (match_op_dup 3 [(match_dup 4) (const_int 0)]) + (set (match_dup 0) (match_dup 1))) + (cond_exec + (match_op_dup 3 [(match_dup 4) (const_int 0)]) + (set (match_dup 0) (DIVREM:GPI (match_dup 0) (match_dup 2))))] + "" + [(set_attr "length" "4,8,8,12") + (set_attr "type" "")]) + +;;ToDo: Add predicated SUBx patterns, for efficient handling of the +;;short immediate field. +(define_insn_and_split "*sub_ce" + [(cond_exec + (match_operator 3 "arc64_comparison_operator" + [(match_operand 4 "cc_register" "") (const_int 0)]) + (set (match_operand:GPI 0 "register_operand" "= r, r, r, r, r, r,r") + (minus:GPI (match_operand:GPI 1 "nonmemory_operand" " 0,rU06S0,S32S0, 0, r,S32S0,r") + (match_operand:GPI 2 "nonmemory_operand" "rU06S0, 0, 0,S32S0,rU06S0, r,S32S0"))))] + "(register_operand (operands[1], mode) + || register_operand (operands[2], mode))" + "@ + sub.%m3\\t%0,%1,%2 + rsub.%m3\\t%0,%2,%1 + rsub.%m3\\t%0,%2,%1 + sub.%m3\\t%0,%1,%2 + # + # + #" + "&& reload_completed + && (!((register_operand (operands[1], mode) + && rtx_equal_p (operands[0], operands[1])) + || (register_operand (operands[2], mode) + && rtx_equal_p (operands[0], operands[2]))))" + [(const_int 0)] + " + { + rtx cond = gen_rtx_fmt_ee (GET_CODE (operands[3]), GET_MODE (operands[4]), + operands[4], const0_rtx); + + if (register_operand (operands[1], mode) + && (REGNO (operands[0]) != REGNO (operands[1]))) + { + emit_insn (gen_rtx_COND_EXEC (VOIDmode, cond, + gen_rtx_SET (operands[0], operands[1]))); + emit_insn (gen_rtx_COND_EXEC (VOIDmode, cond, + gen_rtx_SET (operands[0], + gen_rtx_MINUS (mode, + operands[0], + operands[2])))); + DONE; + } + + if (register_operand (operands[2], mode) + && (REGNO (operands[0]) != REGNO (operands[2]))) + { + emit_insn (gen_rtx_COND_EXEC (VOIDmode, cond, + gen_rtx_SET (operands[0], operands[2]))); + emit_insn (gen_rtx_COND_EXEC (VOIDmode, cond, + gen_rtx_SET (operands[0], + gen_rtx_MINUS (mode, + operands[1], + operands[0])))); + DONE; + } + gcc_unreachable (); + } + " + [(set_attr "length" "4,4,8,8,8,12,12") + (set_attr "type" "sub")]) + +;; commutative MIN, MAX +(define_insn_and_split "*_ce" + [(cond_exec + (match_operator 3 "arc64_comparison_operator" + [(match_operand 4 "cc_register" "") (const_int 0)]) + (set (match_operand:GPI 0 "register_operand" "=r,r,r,r") + (MINMAX:GPI (match_operand:GPI 1 "register_operand" "%0,0,r,r") + (match_operand:GPI 2 "nonmemory_operand" "rU06S0,S32S0,rU06S0,S32S0"))))] + "" + "@ + .%m3\\t%0,%1,%2 + .%m3\\t%0,%1,%2 + # + #" + "reload_completed && (!rtx_equal_p (operands[0], operands[1]))" + [(cond_exec + (match_op_dup 3 [(match_dup 4) (const_int 0)]) + (set (match_dup 0) (MINMAX:GPI (match_dup 0) (match_dup 2))))] + " + { + rtx cond = gen_rtx_fmt_ee (GET_CODE (operands[3]), GET_MODE (operands[4]), + operands[4], const0_rtx); + /* Check first if the second input reg-operand is the same as the output + reg-operand. */ + if (rtx_equal_p (operands[0], operands[2])) + std::swap (operands[1], operands[2]); + else + emit_insn (gen_rtx_COND_EXEC (VOIDmode, cond, + gen_rtx_SET (operands[0], operands[1]))); + } + " + [(set_attr "type" "") + (set_attr "length" "4,8,8,12")]) + +(define_insn_and_split "*mul3_ce" + [(cond_exec + (match_operator 3 "arc64_comparison_operator" + [(match_operand 4 "cc_register" "") (const_int 0)]) + (set (match_operand:GPI 0 "register_operand" "= r, r,r,r") + (mult:GPI (match_operand:GPI 1 "register_operand" "% 0, 0,r,r") + (match_operand:GPI 2 "nonmemory_operand" "rU06S0,S32S0,rU06S0,S32S0"))))] + "" + "@ + mpy.%m3\\t%0,%1,%2 + mpy.%m3\\t%0,%1,%2 + # + #" + "reload_completed && (!rtx_equal_p (operands[0], operands[1]))" + [(cond_exec + (match_op_dup 3 [(match_dup 4) (const_int 0)]) + (set (match_dup 0) (mult:GPI (match_dup 0) (match_dup 2))))] + " + { + rtx cond = gen_rtx_fmt_ee (GET_CODE (operands[3]), GET_MODE (operands[4]), + operands[4], const0_rtx); + /* Check first if the second input reg-operand is the same as the output + reg-operand. */ + if (rtx_equal_p (operands[0], operands[2])) + std::swap (operands[1], operands[2]); + else + emit_insn (gen_rtx_COND_EXEC (VOIDmode, cond, + gen_rtx_SET (operands[0], operands[1]))); + } + " + [(set_attr "length" "4,8,8,12") + (set_attr "type" "mpy")]) + +(define_insn_and_split "*_ce" + [(cond_exec + (match_operator 3 "arc64_comparison_operator" + [(match_operand 4 "cc_register" "") (const_int 0)]) + (set (match_operand:GPI 0 "register_operand" "= r, r, r, r, r,r") + (COMMUTATIVEF:GPI + (match_operand:GPI 1 "nonmemory_operand" " 0,S32S0, 0, r,S32S0,r") + (match_operand:GPI 2 "nonmemory_operand" "rU06S0, 0,S32S0,rU06S0, r,S32S0"))))] + "(register_operand (operands[1], mode) + || register_operand (operands[2], mode))" + "@ + .%m3\\t%0,%1,%2 + .%m3\\t%0,%2,%1 + .%m3\\t%0,%1,%2 + # + # + #" + "&& reload_completed + && ((register_operand (operands[1], mode) + && (REGNO (operands[0]) != REGNO (operands[1]))) + || (REGNO (operands[0]) != REGNO (operands[2])))" + [(cond_exec + (match_op_dup 3 [(match_dup 4) (const_int 0)]) + (set (match_dup 0) (COMMUTATIVEF:GPI (match_dup 0) (match_dup 2))))] + " + { + rtx cond = gen_rtx_fmt_ee (GET_CODE (operands[3]), GET_MODE (operands[4]), + operands[4], const0_rtx); + if (!register_operand (operands[1], mode) + && (REGNO (operands[0]) != REGNO (operands[2]))) + std::swap (operands[1], operands[2]); + if (register_operand (operands[2], mode) + && (REGNO (operands[0]) == REGNO (operands[2]))) + std::swap (operands[1], operands[2]); + else + emit_insn (gen_rtx_COND_EXEC (VOIDmode, cond, + gen_rtx_SET (operands[0], operands[1]))); + } + " + [(set_attr "length" "4,8,8,8,12,12") + (set_attr "type" "")]) + +(define_insn_and_split "*mulhisi3r_ce" + [(cond_exec + (match_operator 3 "arc64_comparison_operator" + [(match_operand 4 "cc_register" "") (const_int 0)]) + (set (match_operand:SI 0 "register_operand" "=r,r") + (mult:SI + (ANY_EXTEND:SI (match_operand:HI 1 "register_operand" "%0,r")) + (ANY_EXTEND:SI (match_operand:HI 2 "register_operand" "r,r")))))] + "" + "@ + mpyw.%m3\\t%0,%1,%2 + #" + "reload_completed && (REGNO (operands[0]) != REGNO (operands[1]))" + [(cond_exec + (match_op_dup 3 [(match_dup 4) (const_int 0)]) + (set (match_dup 0) (mult:SI (ANY_EXTEND:SI (match_dup 5)) + (ANY_EXTEND:SI (match_dup 2)))))] + " + { + rtx cond = gen_rtx_fmt_ee (GET_CODE (operands[3]), GET_MODE (operands[4]), + operands[4], const0_rtx); + /* Check first if the second input reg-operand is the same as the output + reg-operand. */ + if (REGNO (operands[0]) == REGNO (operands[2])) + { + std::swap (operands[1], operands[2]); + operands[5] = operands[1]; + } + else + { + rtx tmp = simplify_gen_subreg (HImode, operands[0], SImode, 0); + emit_insn (gen_rtx_COND_EXEC (VOIDmode, cond, + gen_rtx_SET (tmp, operands[1]))); + operands[5] = tmp; + } + } + " + [(set_attr "length" "4,8") + (set_attr "type" "mpy")]) + +(define_insn_and_split "*sign_extendsi2_ce" + [(cond_exec + (match_operator 2 "arc64_comparison_operator" + [(match_operand 3 "cc_register" "") (const_int 0)]) + (set (match_operand:SI 0 "register_operand" "=r") + (sign_extend:SI + (match_operand:SHORT 1 "nonimmediate_operand" "r"))))] + "" + "#" + "reload_completed" + [(cond_exec + (match_op_dup 2 [(match_dup 3) (const_int 0)]) + (set (match_dup 0) (ashift:SI (match_dup 1) (const_int )))) + (cond_exec + (match_op_dup 2 [(match_dup 3) (const_int 0)]) + (set (match_dup 0) (ashiftrt:SI (match_dup 0) (const_int ))))] + " + operands[1] = simplify_gen_subreg (SImode, operands[1], mode, 0); + " + [(set_attr "type" "asl") + (set_attr "length" "8")]) + +;; mode:emacs-lisp +;; comment-start: ";; " +;; eval: (set-syntax-table (copy-sequence (syntax-table))) +;; eval: (modify-syntax-entry ?[ "(]") +;; eval: (modify-syntax-entry ?] ")[") +;; eval: (modify-syntax-entry ?{ "(}") +;; eval: (modify-syntax-entry ?} "){") +;; eval: (setq indent-tabs-mode t) +;; End: diff --git a/gcc/config/arc64/constraints.md b/gcc/config/arc64/constraints.md new file mode 100644 index 0000000000000..8e77716f9d7c1 --- /dev/null +++ b/gcc/config/arc64/constraints.md @@ -0,0 +1,424 @@ +;; Constraint definitions for Synopsys DesignWare ARC. +;; Copyright (C) 2019 Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;; Register constraints + +;; Register suited for short instructions. +(define_register_constraint "q" "AC16_REGS" + "Registers usable in short 16-bit like instructions: @code{r0}-@code{r3}, +@code{r12}-@code{r15}") + +;; Register suited for sibling calls. +(define_register_constraint "Sbreg" "SIBCALL_REGS" + "@internal + Sibling call register") + +(define_register_constraint "w" "ARC64_HAS_FP_BASE ? FP_REGS : NO_REGS" + "Floating point and SIMD vector registers.") + +;; Register suited for floating point instructions [r0-r31] +(define_register_constraint "c" "CORE_REGS" + "@internal + Core register set") + +;; Register suited for mov_s g,h instructions like. +;; FIXME! check if we get better code when it is register_constraint. +(define_constraint "h" + "@internal + Register usable in short move instructions: @code{r0}-@code{r31}" + (and (match_code "reg") + (match_test "REGNO (op) < ILINK_REGNUM"))) + +(define_constraint "accum" + "@internal" + (and (match_code "reg") + (match_test "REGNO (op) == R58_REGNUM"))) + +(define_constraint "accrn" + "@internal" + (and (match_code "reg") + (match_test "REGNO (op) == R58_REGNUM"))) + +; Usc constant is only used for storing long constants, hence we can +; have only [b,s9], and [b] types of addresses. +(define_memory_constraint "Ucnst" "@internal + A valid memory operand for storing constants" + (and (match_code "mem") + (match_test "!CONSTANT_P (XEXP (op, 0))") + (match_test "arc64_legitimate_store_address_p (mode, XEXP (op, 0))"))) + +(define_memory_constraint "Uldms" "@internal + A valid memory operand for loading using short instructions" + (and (match_code "mem") + (match_test "arc64_short_access_p (op, mode, true)"))) + +(define_memory_constraint "Ustms" "@internal + A valid memory operand for loading using short instructions" + (and (match_code "mem") + (match_test "arc64_short_access_p (op, mode, false)"))) + +(define_memory_constraint "Ufpms" "@internal + A valid memory operand for floating point operations" + (and (match_code "mem") + (match_test "arc64_fp_access_p (op, mode)"))) + +(define_memory_constraint "Ustor" "@internal + A valid memory operand for store instructions" + (and (match_code "mem") + (match_test "arc64_legitimate_store_address_p (mode, XEXP (op, 0))"))) + +(define_memory_constraint "Ustw6" "@internal + A valid memory operand for restricted storing of w6 immediate" + (and (match_code "mem") + (match_test "!MEM_VOLATILE_P (op) || !TARGET_VOLATILE_DI") + (match_test "arc64_legitimate_store_address_p (mode, XEXP (op, 0))"))) + +(define_constraint "Ustk<" + "@internal + Stack pre-decrement" + (and (match_code "mem") + (match_test "GET_CODE (XEXP (op, 0)) == PRE_DEC") + (match_test "REG_P (XEXP (XEXP (op, 0), 0))") + (match_test "REGNO (XEXP (XEXP (op, 0), 0)) == SP_REGNUM"))) + +(define_constraint "Ustk>" + "@internal + Stack post-increment" + (and (match_code "mem") + (match_test "GET_CODE (XEXP (op, 0)) == POST_INC") + (match_test "REG_P (XEXP (XEXP (op, 0), 0))") + (match_test "REGNO (XEXP (XEXP (op, 0), 0)) == SP_REGNUM"))) + +;;; Internal immediate constraint used to split move instructions. +;(define_constraint "Cax" +; "@internal +; special const_int pattern used to split move insns" +; (and (match_code "const_int") +; (match_test "optimize_size") +; (match_test "arc_check_mov_const (ival)"))) + +(define_constraint "BLsym" + "@internal + is a symbol reference allowed by the BL instruction" + (and (match_code "symbol_ref") + (match_test "!arc64_is_long_call_p (op)"))) + +(define_constraint "U06M1" + "@internal + An unsigned 6-bit integer constant, up to 62." + (and (match_code "const_int") + (match_test "UNSIGNED_INT6 (ival + 1)"))) + +(define_constraint "SymMV" + "@internal + Special constant/symbol which fits in limm field." + (and (match_code "label_ref, symbol_ref") + (match_test "arc64_allow_direct_access_p (op)"))) + +(define_constraint "SymIm" + "@internal + Special constant/symbol which fits in limm field." + (ior (match_code "label_ref, symbol_ref") + (and (match_code "const_int") + (match_test "SIGNED_INT32 (ival)")))) + +(define_constraint "S32S0" + "@internal + Special constant/symbol which fits in limm field." + (and (match_code "const_int") + (ior (match_test "UNSIGNED_INT32 (ival)") + (match_test "SIGNED_INT32 (ival)")))) + +(define_constraint "U32S0" + "@internal + Special constant/symbol which fits in limm field." + (and (match_code "const_int") + (match_test "UNSIGNED_INT32 (ival)"))) + +(define_constraint "S06S0" "@internal + A 6-bit signed integer constant" + (and + (match_code "const_int") + (match_test "SIGNED_INT6 (ival)"))) + +(define_constraint "SyPic" + "@internal + Special symbol used for PIC addressing." + (match_code "unspec")) + +(define_constraint "U06Sx" "@internal + A 6-bit unsigned integer constant shifted by x-bit(s)" + (and (match_code "const_int") + (ior (match_test "UNSIGNED_INT9_SHIFTED (ival,3)") + (match_test "UNSIGNED_INT8_SHIFTED (ival,2)") + (match_test "UNSIGNED_INT7_SHIFTED (ival,1)") + (match_test "UNSIGNED_INT6 (ival)")))) + +(define_constraint "N06Sx" "@internal + A negate 6-bit unsigned integer constant shifted by x-bit(s) used by add." + (and (match_code "const_int") + (match_test "ival < 0") + (match_test "SIGNED_INT10(ival)") + (ior (match_test "UNSIGNED_INT9_SHIFTED (-ival,3)") + (match_test "UNSIGNED_INT8_SHIFTED (-ival,2)") + (match_test "UNSIGNED_INT7_SHIFTED (-ival,1)") + (match_test "UNSIGNED_INT6 (-ival)")))) + +(define_constraint "S12Sx" "@internal + A 12-bit signed integer constant shifted by x-bit(s)" + (and (match_code "const_int") + (ior (match_test "SIGNED_INT15_SHIFTED (ival,3)") + (match_test "SIGNED_INT14_SHIFTED (ival,2)") + (match_test "SIGNED_INT13_SHIFTED (ival,1)") + (match_test "SIGNED_INT12 (ival)")))) + +(define_constraint "S03MV" "@internal + A 3-bit Integer signed constant in the interval [-1,6]" + (and (match_code "const_int") + (match_test "(ival >= -1) && (ival <= 6)"))) + +(define_constraint "Z" + "Match single precision and a floating-point zero" + (and (match_code "const_double") + (ior (match_test "op == CONST0_RTX (DFmode) + || op == CONST0_RTX (SFmode)")))) + +(define_constraint "G" "@internal + Match single precision and a floating-point zero" + (and (match_code "const_double") + (ior (match_test "op == CONST0_RTX (DFmode)") + (match_test "GET_MODE_SIZE (GET_MODE (op)) <= 4")))) + +(define_constraint "U0000" "@internal + Match const int 0" + (and (match_code "const_int") + (match_test "ival == 0"))) + +(define_constraint "U0001" "@internal + Match const int 1" + (and (match_code "const_int") + (match_test "ival == 1"))) + +(define_constraint "U0008" "@internal + Match const int 8" + (and (match_code "const_int") + (match_test "ival == 8"))) + +(define_constraint "U0016" "@internal + Match const int 16" + (and (match_code "const_int") + (match_test "ival == 16"))) + +;--------------------------------------------------------- + +(define_constraint "U06S0" "@internal + A 6-bit unsigned integer constant" + (and + (match_code "const_int") + (match_test "UNSIGNED_INT6 (ival)"))) + +(define_constraint "U06S1" "@internal + A 6-bit unsigned integer constant shifted by 1-bit(s)" + (and + (match_code "const_int") + (match_test "UNSIGNED_INT7_SHIFTED (ival,1)"))) + +(define_constraint "U06S2" "@internal + A 6-bit unsigned integer constant shifted by 2-bit(s)" + (and + (match_code "const_int") + (match_test "UNSIGNED_INT8_SHIFTED (ival,2)"))) + +(define_constraint "U06S3" "@internal + A 6-bit unsigned integer constant shifted by 3-bit(s)" + (and + (match_code "const_int") + (match_test "UNSIGNED_INT9_SHIFTED (ival,3)"))) + +(define_constraint "S12S0" "@internal + A 12-bit signed integer constant" + (and + (match_code "const_int") + (match_test "SIGNED_INT12 (ival)"))) + +(define_constraint "S12S1" "@internal + A 12-bit signed integer constant shifted by 1-bit(s)" + (and + (match_code "const_int") + (match_test "SIGNED_INT13_SHIFTED (ival,1)"))) + +(define_constraint "S12S2" "@internal + A 12-bit signed integer constant shifted by 2-bit(s)" + (and + (match_code "const_int") + (match_test "SIGNED_INT14_SHIFTED (ival,2)"))) + +(define_constraint "S12S3" "@internal + A 12-bit signed integer constant shifted by 3-bit(s)" + (and + (match_code "const_int") + (match_test "SIGNED_INT15_SHIFTED (ival,3)"))) + +(define_constraint "S03S0" "@internal + A 3-bit signed integer constant" + (and + (match_code "const_int") + (match_test "SIGNED_INT3 (ival)"))) + +(define_constraint "U07S0" "@internal + A 7-bit unsigned integer constant" + (and + (match_code "const_int") + (match_test "UNSIGNED_INT7 (ival)"))) + +(define_constraint "U03S0" "@internal + A 3-bit unsigned integer constant" + (and + (match_code "const_int") + (match_test "UNSIGNED_INT3 (ival)"))) + +(define_constraint "S11S0" "@internal + A 11-bit signed integer constant" + (and + (match_code "const_int") + (match_test "SIGNED_INT11 (ival)"))) + +(define_constraint "U05S0" "@internal + A 5-bit unsigned integer constant" + (and + (match_code "const_int") + (match_test "UNSIGNED_INT5 (ival)"))) + +(define_constraint "S09S0" "@internal + A 9-bit signed integer constant" + (and + (match_code "const_int") + (match_test "SIGNED_INT9 (ival)"))) + +(define_constraint "S21S0" "@internal + A 21-bit signed integer constant" + (and + (match_code "const_int") + (match_test "SIGNED_INT21 (ival)"))) + +(define_constraint "S25S0" "@internal + A 25-bit signed integer constant" + (and + (match_code "const_int") + (match_test "SIGNED_INT25 (ival)"))) + +(define_constraint "S10S0" "@internal + A 10-bit signed integer constant" + (and + (match_code "const_int") + (match_test "SIGNED_INT10 (ival)"))) + +(define_constraint "S07S0" "@internal + A 7-bit signed integer constant" + (and + (match_code "const_int") + (match_test "SIGNED_INT7 (ival)"))) + +(define_constraint "S13S0" "@internal + A 13-bit signed integer constant" + (and + (match_code "const_int") + (match_test "SIGNED_INT13 (ival)"))) + +(define_constraint "S08S0" "@internal + A 8-bit signed integer constant" + (and + (match_code "const_int") + (match_test "SIGNED_INT8 (ival)"))) + +(define_constraint "U10S0" "@internal + A 10-bit unsigned integer constant" + (and + (match_code "const_int") + (match_test "UNSIGNED_INT10 (ival)"))) + +(define_constraint "U08S0" "@internal + A 8-bit unsigned integer constant" + (and + (match_code "const_int") + (match_test "UNSIGNED_INT8 (ival)"))) + +(define_constraint "U09S0" "@internal + A 9-bit unsigned integer constant" + (and + (match_code "const_int") + (match_test "UNSIGNED_INT9 (ival)"))) + +(define_constraint "U12S0" "@internal + A 16-bit unsigned integer constant" + (and + (match_code "const_int") + (match_test "UNSIGNED_INT12 (ival)"))) + +(define_constraint "U16S0" "@internal + A 16-bit unsigned integer constant" + (and + (match_code "const_int") + (match_test "UNSIGNED_INT16 (ival)"))) + +(define_constraint "S16S0" "@internal + A 16-bit signed integer constant" + (and + (match_code "const_int") + (match_test "SIGNED_INT16 (ival)"))) + + +; TODO: Below this line definition should be corrected +(define_constraint "SR_R0" + "@internal + @code{R0} register." + (and (match_code "reg") + (match_test "REGNO (op) == R0_REGNUM"))) + +(define_constraint "SR_R1" + "@internal + @code{R1} register." + (and (match_code "reg") + (match_test "REGNO (op) == R1_REGNUM"))) + +(define_constraint "SR_SP" + "@internal + @code{SP} register." + (and (match_code "reg") + (match_test "REGNO (op) == SP_REGNUM"))) + +; TODO: FIX THIS +(define_constraint "SR_GP" + "@internal + @code{GP} register." + (and (match_code "reg") + (match_test "REGNO (op) == R0_REGNUM"))) + +; TODO: FIX THIS +(define_constraint "SRPCL" + "@internal + @code{PCL} register." + (and (match_code "reg") + (match_test "REGNO (op) == R0_REGNUM"))) + +;; Memory constraint used for atomic ops. +(define_memory_constraint "ATOMC" + "A memory with only a base register" + (match_operand 0 "mem_noofs_operand")) + diff --git a/gcc/config/arc64/elf.h b/gcc/config/arc64/elf.h new file mode 100644 index 0000000000000..b5600fbd0c001 --- /dev/null +++ b/gcc/config/arc64/elf.h @@ -0,0 +1,38 @@ +/* Target macros for arc*-elf targets. + + Copyright (C) 2021 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#undef DWARF2_UNWIND_INFO +#define DWARF2_UNWIND_INFO 0 + +/* If no specs file is enforced, default to nosys libarary. */ +#undef LINK_GCC_C_SEQUENCE_SPEC +#define LINK_GCC_C_SEQUENCE_SPEC \ + "--start-group %G %{!specs=*:%{!nolibc:-lc -lnosys}} --end-group" + +/* Make sure we include the crtbegin.o. */ +#undef STARTFILE_SPEC +#define STARTFILE_SPEC "crt0%O%s crti%O%s crtbegin%O%s" + +/* ...and crtend.o. */ +#undef ENDFILE_SPEC +#define ENDFILE_SPEC "crtend%O%s crtn%O%s" + +#undef UNALIGNED_ACCESS_DEFAULT +#define UNALIGNED_ACCESS_DEFAULT 1 diff --git a/gcc/config/arc64/elf32.h b/gcc/config/arc64/elf32.h new file mode 100644 index 0000000000000..b2e63b2ef527b --- /dev/null +++ b/gcc/config/arc64/elf32.h @@ -0,0 +1,30 @@ +/* Target macros for arc32-elf targets. + + Copyright (C) 2021 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#undef LINK_SPEC +#define LINK_SPEC "%{h*} \ + %{static:-Bstatic} \ + %{shared:-shared} \ + %{symbolic:-Bsymbolic} \ + %{!static:%{rdynamic:-export-dynamic}} \ + %{mcpu=hs6*:-m arc64elf64}" + +#undef ARC64_64BIT_DEFAULT +#define ARC64_64BIT_DEFAULT 0 diff --git a/gcc/config/arc64/elf64.h b/gcc/config/arc64/elf64.h new file mode 100644 index 0000000000000..cf3be8fdc0be1 --- /dev/null +++ b/gcc/config/arc64/elf64.h @@ -0,0 +1,30 @@ +/* Target macros for arc64-elf targets. + + Copyright (C) 2021 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#undef LINK_SPEC +#define LINK_SPEC "%{h*} \ + %{static:-Bstatic} \ + %{shared:-shared} \ + %{symbolic:-Bsymbolic} \ + %{!static:%{rdynamic:-export-dynamic}} \ + %{mcpu=hs5*:-m arc64elf32}" + +#undef ARC64_64BIT_DEFAULT +#define ARC64_64BIT_DEFAULT 1 diff --git a/gcc/config/arc64/hs6x.md b/gcc/config/arc64/hs6x.md new file mode 100644 index 0000000000000..51410584aacf3 --- /dev/null +++ b/gcc/config/arc64/hs6x.md @@ -0,0 +1,92 @@ +;; DFA scheduling description of the Synopsys ARCv3 HS6x cpu +;; for GNU C compiler +;; Copyright (C) 2021 Free Software Foundation, Inc. + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. + +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +(define_automaton "HS6x") + +(define_cpu_unit "ALU0" "HS6x") +(define_cpu_unit "ALU1" "HS6x") +(define_cpu_unit "MPY32" "HS6x") +(define_cpu_unit "MPY64" "HS6x") +(define_cpu_unit "DIV" "HS6x") +(define_cpu_unit "DMP" "HS6x") +;;(define_cpu_unit "DMP_FPU" "HS6x") +;;(define_cpu_unit "SP_FPU" "HS6x") +;;(define_cpu_unit "DP_FPU" "HS6x") + +;; Instruction reservation for arithmetic instructions (pipe A, pipe B). +(define_insn_reservation "alu_arith" 3 + (eq_attr "type" "abs, adcl, add, addhl, addl, and, andl, asl, asll, + asr, asrl, bclr, bic, bmsk, bset, bsetl, btst, + bxor, bxorl, cmp, ext, ffs, fls, lsr, lsrl, max, + maxl, min, minl, move, movecc, neg, nop, norm, + normh, norml, not, notl, or, orl, rol, ror, sbcl, + setcc, sex, sub, subl, swap, swape, swapel, swapl, + tst, vadd, vpack, vsub, xbfu, xor, xorl") + "(ALU0 | ALU1), nothing*2") + +(define_insn_reservation "jmp_insn" 1 + (eq_attr "type" "bbit, bi, bl, branch, branchcc, brcc, dbnz, jl, + jump, return, trap") + "ALU0") + +(define_insn_reservation "div_insn" 12 + (eq_attr "type" "div, divl, mod, modl, udiv, udivl, umod, umodl") + "ALU0 + DIV, DIV*11") + +(define_insn_reservation "mpy32_insn" 6 + (eq_attr "type" "dmpywh, mac, mpy, qmach, qmpyh, vmac2h, vmpy2h") + "ALU0 + MPY32 + MPY64, nothing*5") + +(define_insn_reservation "mpy64_insn" 9 + (eq_attr "type" "mpyl") + "ALU0 + MPY32 + MPY64, MPY64*3, nothing*5") + +(define_insn_reservation "load_insn" 3 + (eq_attr "type" "atldlop, atldop, ex, ld, llock") + "ALU1 + DMP, nothing*2") + +(define_insn_reservation "store_insn" 1 + (eq_attr "type" "scond, st") + "ALU1 + DMP") + +(define_insn_reservation "core" 1 + (eq_attr "type" "block, brk, dmb, flag, lr, sr, sync") + "ALU0 + ALU1 + DMP + MPY32 + MPY64 + DIV") + +;; Bypasses +(define_bypass 1 "load_insn" "alu_arith") +(define_bypass 1 "load_insn" "mpy*_insn") +(define_bypass 1 "load_insn" "store_insn" "store_data_bypass_p") +(define_bypass 2 "load_insn" "load_insn") +(define_bypass 1 "load_insn" "div_insn") + +;;(define_bypass 5 "mpy64_insn" "alu_arith") +(define_bypass 6 "mpy64_insn" "mpy*_insn") +(define_bypass 6 "mpy64_insn" "store_insn" "store_data_bypass_p") +(define_bypass 6 "mpy64_insn" "div_insn") + +(define_bypass 3 "mpy32_insn" "mpy*_insn") +(define_bypass 3 "mpy32_insn" "div_insn") + +(define_bypass 1 "alu_arith" "mpy*_insn" "!accumulator_bypass_p") +(define_bypass 1 "alu_arith" "div_insn") +(define_bypass 1 "alu_arith" "store_insn" "store_data_bypass_p") + +(define_bypass 1 "alu_arith" "alu_arith" "set_accumulator_p") diff --git a/gcc/config/arc64/linux.h b/gcc/config/arc64/linux.h new file mode 100644 index 0000000000000..9fad94ccfe763 --- /dev/null +++ b/gcc/config/arc64/linux.h @@ -0,0 +1,91 @@ +/* Target macros for arc*-*-linux targets. + + Copyright (C) 2021 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +/* Enable DWARF 2 exceptions. */ +#undef DWARF2_UNWIND_INFO +#define DWARF2_UNWIND_INFO 1 + +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + GNU_USER_TARGET_OS_CPP_BUILTINS (); \ + } \ + while (0) + +#undef STARTFILE_SPEC +#define STARTFILE_SPEC \ + LINUX_OR_ANDROID_LD (GNU_USER_TARGET_STARTFILE_SPEC, ANDROID_STARTFILE_SPEC) + +#undef ENDFILE_SPEC +#define ENDFILE_SPEC \ + LINUX_OR_ANDROID_LD (GNU_USER_TARGET_ENDFILE_SPEC, ANDROID_ENDFILE_SPEC) + +#undef LIB_SPEC +#define LIB_SPEC \ + "%{pthread:-lpthread} \ + %{shared:-lc} \ + %{!shared:%{profile:-lc_p}%{!profile:-lc}}" + +#undef TARGET_ASM_FILE_END +#define TARGET_ASM_FILE_END file_end_indicate_exec_stack + +/* We do not have any MULTILIB_OPTIONS specified, so there are no + MULTILIB_DEFAULTS. */ +#undef MULTILIB_DEFAULTS + +#undef SUBTARGET_CPP_SPEC +#define SUBTARGET_CPP_SPEC "\ + %{pthread:-D_REENTRANT} \ +" + +#if DEFAULT_LIBC == LIBC_GLIBC +/* Override linux.h LINK_EH_SPEC definition. + Signalize that because we have fde-glibc, we don't need all C shared libs + linked against -lgcc_s. */ +#undef LINK_EH_SPEC +#define LINK_EH_SPEC "--eh-frame-hdr " +#endif + +/* Clear the instruction cache from `beg' to `end'. This makes an + inline system call to SYS_cacheflush. */ +#undef CLEAR_INSN_CACHE +#define CLEAR_INSN_CACHE(beg, end) \ +{ \ + register unsigned long _beg __asm ("r0") = (unsigned long) (beg); \ + register unsigned long _end __asm ("r1") = (unsigned long) (end); \ + register unsigned long _xtr __asm ("r2") = 0; \ + register unsigned long _scno __asm ("r8") = 244; \ + __asm __volatile ("trap_s 0 ; sys_cache_sync" \ + : "=r" (_beg) \ + : "0" (_beg), "r" (_end), "r" (_xtr), "r" (_scno)); \ +} + +/* Emit rtl for profiling. Output assembler code to FILE + to call "_mcount" for profiling a function entry. */ +#define PROFILE_HOOK(LABEL) \ + { \ + rtx fun, rt; \ + rt = get_hard_reg_initial_val (Pmode, BLINK_REGNUM); \ + fun = gen_rtx_SYMBOL_REF (Pmode, "_mcount"); \ + emit_library_call (fun, LCT_NORMAL, VOIDmode, rt, Pmode); \ + } + +#undef UNALIGNED_ACCESS_DEFAULT +#define UNALIGNED_ACCESS_DEFAULT 1 diff --git a/gcc/config/arc64/linux32.h b/gcc/config/arc64/linux32.h new file mode 100644 index 0000000000000..deeb6823afca9 --- /dev/null +++ b/gcc/config/arc64/linux32.h @@ -0,0 +1,37 @@ +/* Target macros for arc32-*-linux targets. + + Copyright (C) 2021 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#define GLIBC_DYNAMIC_LINKER "/lib/ld-linux-arc32.so.2" + +/* Note that the default is to link against dynamic libraries, if they are + available. Override with -static. */ +#undef LINK_SPEC +#define LINK_SPEC "%{h*} \ + %{static:-Bstatic} \ + %{shared:-shared} \ + %{symbolic:-Bsymbolic} \ + %{!static: \ + %{rdynamic:-export-dynamic} \ + %{!shared:-dynamic-linker " GNU_USER_DYNAMIC_LINKER "}} \ + %{mcpu=hs6*:-m arc64linux64} \ + -X " + +#undef ARC64_64BIT_DEFAULT +#define ARC64_64BIT_DEFAULT 0 diff --git a/gcc/config/arc64/linux64.h b/gcc/config/arc64/linux64.h new file mode 100644 index 0000000000000..05a67f3a6c59c --- /dev/null +++ b/gcc/config/arc64/linux64.h @@ -0,0 +1,37 @@ +/* Target macros for arc64-*-linux targets. + + Copyright (C) 2021 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#define GLIBC_DYNAMIC_LINKER "/lib/ld-linux-arc64.so.2" + +/* Note that the default is to link against dynamic libraries, if they are + available. Override with -static. */ +#undef LINK_SPEC +#define LINK_SPEC "%{h*} \ + %{static:-Bstatic} \ + %{shared:-shared} \ + %{symbolic:-Bsymbolic} \ + %{!static: \ + %{rdynamic:-export-dynamic} \ + %{!shared:-dynamic-linker " GNU_USER_DYNAMIC_LINKER "}} \ + %{mcpu=hs5*:-m arc64linux32} \ + -X " + +#undef ARC64_64BIT_DEFAULT +#define ARC64_64BIT_DEFAULT 1 diff --git a/gcc/config/arc64/predicates.md b/gcc/config/arc64/predicates.md new file mode 100644 index 0000000000000..2d518f9cd795c --- /dev/null +++ b/gcc/config/arc64/predicates.md @@ -0,0 +1,270 @@ +;; Machine description for arc64 architecture. +;; Copyright (C) 2019 Free Software Foundation, Inc. +;; Contributed by Claudiu Zissulescu +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, but +;; WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;; General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;; Place holder for mov operand predicate +(define_predicate "arc64_movl_operand" + (and (match_code "unspec,reg, subreg, mem, const, const_int, symbol_ref, label_ref") + (ior (match_operand 0 "register_operand") + (and (ior (match_code "label_ref") + (match_code "symbol_ref")) + (match_test "arc64_allow_direct_access_p (op)")) + (match_operand 0 "memory_operand") + (and (match_code "unspec") + (ior (match_test "XINT (op,1) == ARC64_UNSPEC_PCREL") + (match_test "XINT (op,1) == ARC64_UNSPEC_TLS_GD"))) + (and (match_code "const_int") + (ior (match_test "UNSIGNED_INT32 (INTVAL (op))") + (match_test "SIGNED_INT32 (INTVAL (op))")))))) + +(define_predicate "arc64_movf_operand" + (and (match_code "reg, subreg, mem, const, const_double") + (ior (match_operand 0 "register_operand") + (match_operand 0 "memory_operand") + (and (match_code "const_double") + (ior (match_test "GET_MODE_SIZE (GET_MODE (op)) <= 4") + (match_test "op == CONST0_RTX (DFmode)")))))) + +;; A restricted version of the above, still accepting symbols and label refs. +(define_predicate "arc64_regsym_operand" + (ior (match_operand 0 "register_operand") + (and (ior (match_code "label_ref") + (match_code "symbol_ref")) + (match_test "arc64_allow_direct_access_p (op)")) + (and (match_code "const_int") + (ior (match_test "UNSIGNED_INT32 (INTVAL (op))") + (match_test "SIGNED_INT32 (INTVAL (op))"))))) + +(define_predicate "arc64_nonmem_operand" + (ior (match_operand 0 "register_operand") + (and (match_code "const_int") + (ior (match_test "UNSIGNED_INT32 (INTVAL (op))") + (match_test "SIGNED_INT32 (INTVAL (op))"))))) + +(define_predicate "arc64_reg_or_unsig_operand" + (ior (match_operand 0 "register_operand") + (and (match_code "const_int") + (match_test "UNSIGNED_INT32 (INTVAL (op))") + (match_test "INTVAL (op) > 0")))) + +;; Used for HIGH or LO_SUM patterns +(define_predicate "arc64_immediate_or_pic" + (ior (match_operand 0 "immediate_operand") + (match_code "unspec"))) + +;; Acceptable arguments for the call insn. +(define_predicate "arc64_call_insn_operand" + (ior (and (match_code "symbol_ref") + (match_test "!arc64_is_long_call_p (op)")) + (match_operand 0 "nonmemory_operand"))) + +; to be used by .f instructions +(define_special_predicate "cc_compare_operator" + (match_code "compare") + { + return GET_MODE (op) == CC_Cmode + || GET_MODE (op) == CC_Vmode; + }) + +; to be used for b{eq/ne}_s instructions. +(define_predicate "equality_comparison_operator" + (match_code "eq, ne") + { + machine_mode opmode = GET_MODE (XEXP (op, 0)); + return (opmode != CC_Vmode); + }) + + +; to be used for b{eq/ne/...}_s instructions. +(define_predicate "ccmode_comparison_operator" + (match_code "eq, ne, gt, ge, lt, le, gtu, geu, ltu, leu, + unlt, unle, unge, ungt") + { + enum rtx_code code = GET_CODE (op); + + switch (GET_MODE (XEXP (op, 0))) + { + case E_CC_FPUmode: + case E_CC_FPUEmode: + case E_CCmode: + return 1; + + case E_CC_ZNmode: + return (code == EQ || code == NE); + + default: + return 0; + } + }) + +(define_predicate "core_register_operand" + (match_code "reg,subreg") + { + if (GET_CODE (op) == SUBREG) + op = SUBREG_REG (op); + return (REG_P (op) + && (REGNO (op) <= BLINK_REGNUM + || (REGNO (op)) >= FIRST_PSEUDO_REGISTER)); + }) + + +;; True for integer comparisons and for FP comparisons other then LTGT or UNEQ +(define_special_predicate "arc64_comparison_operator" + (match_code "eq, ne, le, lt, ge, gt, geu, gtu, leu, ltu, unordered, + ordered, unlt, unle, unge, ungt")) + +(define_special_predicate "cc_register" + (match_code "reg") +{ + if (mode == VOIDmode) + { + mode = GET_MODE (op); + if (GET_MODE_CLASS (mode) != MODE_CC) + return FALSE; + } + + if (mode == GET_MODE (op) && GET_CODE (op) == REG && REGNO (op) == CC_REGNUM) + return TRUE; + + return FALSE; +}) + +;; Return TRUE if OP is a shimm 6bit value +(define_predicate "S06S0_immediate_operand" + (and (match_code "const_int") + (match_test "SIGNED_INT6 (INTVAL (op))")) +) + +(define_predicate "vectdup_immediate_operand" + (and (match_code "const_int") + (ior (match_test "SIGNED_INT6 (INTVAL (op))") + (match_test "UNSIGNED_INT12 (INTVAL (op))"))) +) + +;; Return true if OP is a MEM that when used as a load or store address will +;; require an 8 byte insn. +;; Load and store instructions don't allow the same possibilities but they're +;; similar enough that this one function will do. +;; This is currently only used when calculating length attributes. */ +(define_predicate "limm_ldst_operand" + (and (match_code "mem") + (match_test "arc64_limm_addr_p (op)"))) + +;; Allows only 1,2,3 values. It is used with add/sub shifted operations. +(define_predicate "_1_2_3_operand" + (and (match_code "const_int") + (match_test "INTVAL (op) == 1 || INTVAL (op) == 2 || INTVAL (op) == 3")) +) + +;; Allows only 2,4,8 values. It is used with add/sub shifted operations. +(define_predicate "_2_4_8_operand" + (and (match_code "const_int") + (match_test "INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8")) +) + +;; Return TRUE if OP can be used as a destination for any move +;; (mov,st,ld) instruction. +(define_predicate "arc64_dest_operand" + (match_code "reg, subreg, mem") + { + if (MEM_P (op)) + return arc64_legitimate_store_address_p (mode, XEXP (op, 0)); + return nonimmediate_operand (op, mode); + }) + +(define_predicate "mem_noofs_operand" + (and (match_code "mem") + (match_code "reg" "0"))) + +;; Used by vector floating point instructions. +(define_predicate "arc64_fsimd_register" + (match_code "reg,subreg") + { + if (GET_CODE (op) == SUBREG) + op = SUBREG_REG (op); + + if (REGNO (op) >= FIRST_PSEUDO_REGISTER) + return 1; + + /* Check if it is a register. */ + if (!REG_P (op)) + return 0; + + /* FIXME! check: REGNO_REG_CLASS (REGNO (op)) != FP_REGS */ + + /* Return true/false depending on the SIMD length. */ + switch (mode) + { + /* All vector modes equal with the size of a fp-register. */ + case E_V2SFmode: + case E_V4HFmode: + case E_V2HFmode: + return 1; + + /* All vector modes double the size of a fp-register. */ + case E_V8HFmode: + case E_V4SFmode: + case E_V2DFmode: + return (ARC64_VFP_128 && ((REGNO (op) & 0x01) == 0)); + + default: + gcc_unreachable (); + } + }) + +(define_predicate "arc64_fsimd_moperand" + (ior (match_operand 0 "arc64_fsimd_register") + (match_operand 0 "memory_operand"))) + +(define_predicate "short_immediate_operand" + (and (match_code "const_int") + (match_test "SIGNED_INT16 (INTVAL (op))"))) + +(define_predicate "unsign_immediate_operand" + (and (match_code "const_int") + (match_test "UNSIGNED_INT16 (INTVAL (op))"))) + +(define_predicate "usigned32b_operand" + (and (match_code "const_int") + (match_test "INTVAL (op) > 0") + (match_test "UNSIGNED_INT32 (INTVAL (op))"))) + +(define_predicate "signed32b_operand" + (and (match_code "const_int") + (match_test "SIGNED_INT32 (INTVAL (op))"))) + +(define_predicate "bbitimm_operand" + (and (match_code "const_int") + (match_test "IS_POWEROF2_P (INTVAL (op))"))) + +(define_special_predicate "brcc_comparison_operator" + (match_code "eq, ne, lt, ge, ltu, geu")) + +(define_special_predicate "ebrcc_comparison_operator" + (match_code "gt, gtu, le, leu")) + +;; Return true if the symbol requires a @plt34 reloc +(define_predicate "plt34_symbol_p" + (and (match_code "symbol_ref") + (match_test "arc64_use_plt34_p (op)"))) + +;; Return true if OP a (const_int 0) operand. +(define_predicate "const0_operand" + (and (match_code "const_int") + (match_test "op == CONST0_RTX (mode)"))) diff --git a/gcc/config/arc64/t-arc64 b/gcc/config/arc64/t-arc64 new file mode 100644 index 0000000000000..ded54fda4a419 --- /dev/null +++ b/gcc/config/arc64/t-arc64 @@ -0,0 +1,31 @@ +# GCC Makefile fragment for Synopsys ARC64. +# +# Copyright (C) 2021 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify it under the +# terms of the GNU General Public License as published by the Free Software +# Foundation; either version 3, or (at your option) any later version. +# +# GCC is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along +# with GCC; see the file COPYING3. If not see +# . + +TM_H += $(srcdir)/config/arc64/arc64-c.def + +arc64-c.o: $(srcdir)/config/arc64/arc64-c.cc $(CONFIG_H) $(SYSTEM_H) \ +$(TREE_H) $(TM_H) $(TM_P_H) coretypes.h + $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ + $(srcdir)/config/arc64/arc64-c.cc + +PASSES_EXTRA += $(srcdir)/config/arc64/arc64-passes.def + +# Local Variables: +# mode: Makefile +# End: diff --git a/gcc/config/arc64/t-multilib b/gcc/config/arc64/t-multilib new file mode 100644 index 0000000000000..efae736b74947 --- /dev/null +++ b/gcc/config/arc64/t-multilib @@ -0,0 +1,27 @@ +# Copyright (C) 2021 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 3, or (at your option) any later +# version. +# +# GCC is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +MULTILIB_OPTIONS = mfpu=fpus/mfpu=fpud/m128/mcpu=hs5x/mcpu=hs58 + +MULTILIB_DIRNAMES = fpus fpud m128 hs5x hs58 + +# Using m128 will default to standard implementation of memcpy/memset +MULTILIB_REUSE = mfpu.fpud=mfpu.fpud/m128 +MULTILIB_REUSE += mfpu.fpus=mfpu.fpus/m128 + +MULTILIB_MATCHES = m128=mcpu?hs68 diff --git a/gcc/config/arc64/t-multilib32 b/gcc/config/arc64/t-multilib32 new file mode 100644 index 0000000000000..93bc328a2f114 --- /dev/null +++ b/gcc/config/arc64/t-multilib32 @@ -0,0 +1,21 @@ +# Copyright (C) 2021 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 3, or (at your option) any later +# version. +# +# GCC is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +MULTILIB_OPTIONS = mcpu=hs6x + +MULTILIB_DIRNAMES = hs6x diff --git a/gcc/configure b/gcc/configure index 38bec5c80df4a..29c4121e4a73b 100755 --- a/gcc/configure +++ b/gcc/configure @@ -25435,10 +25435,16 @@ foo: .long 25 lda $4,foo($29) !tprel' tls_as_opt=--fatal-warnings ;; - arc*-*-*) + arc-*-*|arc[eb]*-*-*) conftest_s=' add_s r0,r0, @foo@tpoff' ;; + arc64-*-*) + conftest_s=' + addl r0,r0, @foo@tpoff' + tls_first_major=2 + tls_first_minor=23 + ;; cris-*-*|crisv32-*-*) conftest_s=' .section ".tdata","awT",@progbits @@ -29448,9 +29454,9 @@ esac # ??? Once 2.11 is released, probably need to add first known working # version to the per-target configury. case "$cpu_type" in - aarch64 | alpha | arc | arm | avr | bfin | cris | csky | i386 | loongarch | m32c \ - | m68k | microblaze | mips | nds32 | nios2 | pa | riscv | rs6000 | score | sparc \ - | tilegx | tilepro | visium | xstormy16 | xtensa) +aarch64 | alpha | arc | arc64 | arm | avr | bfin | cris | csky | i386 | loongarch \ + | m32c | m68k | microblaze | mips | nds32 | nios2 | pa | riscv | rs6000 | score \ + | sparc | tilegx | tilepro | visium | xstormy16 | xtensa) insn="nop" ;; ia64 | s390) diff --git a/gcc/configure.ac b/gcc/configure.ac index 23bee7010a37b..b2470a01c506b 100644 --- a/gcc/configure.ac +++ b/gcc/configure.ac @@ -3703,10 +3703,16 @@ foo: .long 25 lda $4,foo($29) !tprel' tls_as_opt=--fatal-warnings ;; - arc*-*-*) + arc-*-*|arc[eb]*-*-*) conftest_s=' add_s r0,r0, @foo@tpoff' ;; + arc64-*-*) + conftest_s=' + addl r0,r0, @foo@tpoff' + tls_first_major=2 + tls_first_minor=23 + ;; cris-*-*|crisv32-*-*) conftest_s=' .section ".tdata","awT",@progbits @@ -5519,9 +5525,9 @@ esac # ??? Once 2.11 is released, probably need to add first known working # version to the per-target configury. case "$cpu_type" in - aarch64 | alpha | arc | arm | avr | bfin | cris | csky | i386 | loongarch | m32c \ - | m68k | microblaze | mips | nds32 | nios2 | pa | riscv | rs6000 | score | sparc \ - | tilegx | tilepro | visium | xstormy16 | xtensa) + aarch64 | alpha | arc | arc64 | arm | avr | bfin | cris | csky | i386 | loongarch \ + | m32c | m68k | microblaze | mips | nds32 | nios2 | pa | riscv | rs6000 | score \ + | sparc | tilegx | tilepro | visium | xstormy16 | xtensa) insn="nop" ;; ia64 | s390) diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index ff6c338bedb69..8bd7150b55c8e 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -786,6 +786,11 @@ Objective-C and Objective-C++ Dialects}. -munalign-prob-threshold=@var{probability} -mmpy-option=@var{multo} @gol -mdiv-rem -mcode-density -mll64 -mfpu=@var{fpu} -mrf16 -mbranch-index} +@emph{ARC64 Options} +@gccoptlist{-mcmodel=@var{code-model} -matomic=@var{opt} -mfpu=@var{fpu} @gol +-mdiv-rem -mbitscan -mcpu=@var{cpu} -munaligned-access -mvolatile-di @gol +-mcode-density -msimd -mwide -m128 -mbrcc} + @emph{ARM Options} @gccoptlist{-mapcs-frame -mno-apcs-frame @gol -mabi=@var{name} @gol @@ -18911,6 +18916,7 @@ platform. * Adapteva Epiphany Options:: * AMD GCN Options:: * ARC Options:: +* ARC64 Options:: * ARM Options:: * AVR Options:: * Blackfin Options:: @@ -20464,6 +20470,82 @@ Replaced by @option{-mmultcost}. @end table +@node ARC64 Options +@subsection ARC64 Options +@cindex ARC64 options + +These @samp{-m} options are defined for the ARC64 port: + +@table @gcctabopt +@item -mcmodel=@var{code-model} +@opindex mcmodel +Select the desired code model to be generated. The possible values are +@samp{small} for programs and its statically defined symbols which +should be within 1MB of each other. @samp{medium} is the default model +suited for programs and symbols within 4GB of each other. And +@samp{large} which makes no assumptions about addresses and sizes of +sections. + +@item -matomic=@var{opt} +@opindex matomic +Enable specific Atomic Memory Operations (AMO). The argument @var{opt} +takes the value @samp{0}, @samp{1}, or @samp{2} to assign no, legacy +@samp{ARCv2}, and new @samp{ARCv3} AMOs. + +@item -mfpu=@var{fpu} +@opindex mfpu +Enable support for ARCv3 floating point vector unit. Possible +arguments are @samp{none}, @samp{fpus}, or @samp{fpud} to disable the +unit (default), using only simple precision floating point unit, and +use double precision floating point uint. + +@item -mdiv-rem +@opindex mdiv-rem +Enable 32bit or 64bit div/rem integer instructions. This option is +enabled by default. + +@item -mbitscan +@opindex mbitscan +Enable @code{NORM}, @code{NORMH}, @code{FFS}, @code{FLS}, +@code{NORML}, @code{FFSL}, and @code{FLSL} bitscan instructions. This +option is enabled by default. + +@item -mcpu=@var{cpu} +@opindex mcpu +Compile for specific ARCv3 CPU. Possible value is @samp{hs6x} which is +also the default CPU. + +@item -munaligned-access +@opindex munaligned-access +Enable unaligned access for packed data. + +@item -mvolatile-di +@opindex mvolatile-di +Enable uncached access for volatile memories. + +@item -mcode-density +@opindex mcode-density +Enable code-density instructions. + +@item -msimd +@opindex msimd +Enable integer SIMD extension instructions. + +@item -mwide +@opindex mwide +Enable 128 bit wide floating point SIMD vector. + +@item -m128 +@opindex m128 +Enable 128bit load/store instructions. This option is enabled by +default. + +@item -mbrcc +@opindex brcc +Enable BRcc instructions in RTL combiner. + +@end table + @node ARM Options @subsection ARM Options @cindex ARM options diff --git a/gcc/testsuite/gcc.c-torture/compile/20000804-1.c b/gcc/testsuite/gcc.c-torture/compile/20000804-1.c index 95bb0fafd7044..2af2311cbad79 100644 --- a/gcc/testsuite/gcc.c-torture/compile/20000804-1.c +++ b/gcc/testsuite/gcc.c-torture/compile/20000804-1.c @@ -4,6 +4,7 @@ /* { dg-skip-if "" { { i?86-*-* x86_64-*-* } && { ia32 && { ! nonpic } } } } */ /* { dg-skip-if "No 64-bit registers" { m32c-*-* } } */ /* { dg-skip-if "Not enough 64-bit registers" { pdp11-*-* } { "-O0" } { "" } } */ +/* { dg-skip-if "No 64-bit registers" { arc32-*-* } } */ /* { dg-xfail-if "Inconsistent constraint on asm" { csky-*-* } { "-O0" } { "" } } */ /* { dg-xfail-if "Inconsistent constraint on asm" { bpf-*-* } { "-O0" } { "" } } */ /* { dg-xfail-if "" { h8300-*-* } } */ diff --git a/gcc/testsuite/gcc.c-torture/execute/20101011-1.c b/gcc/testsuite/gcc.c-torture/execute/20101011-1.c index d2c0f9ab7ece3..a17ba8d2160e3 100644 --- a/gcc/testsuite/gcc.c-torture/execute/20101011-1.c +++ b/gcc/testsuite/gcc.c-torture/execute/20101011-1.c @@ -63,7 +63,7 @@ /* By default we emit a sequence with DIVU, which "never signals an exceptional condition, even when dividing by zero". */ # define DO_TEST 0 -#elif defined (__arc__) +#elif defined (__arc__) || defined (__ARC64__) /* No SIGFPE for ARC integer division. */ # define DO_TEST 0 #elif defined (__arm__) && defined (__ARM_EABI__) diff --git a/gcc/testsuite/gcc.dg/builtin-apply2.c b/gcc/testsuite/gcc.dg/builtin-apply2.c index 0f350f4ac16a2..69835bc299a71 100644 --- a/gcc/testsuite/gcc.dg/builtin-apply2.c +++ b/gcc/testsuite/gcc.dg/builtin-apply2.c @@ -1,7 +1,7 @@ /* { dg-do run } */ /* { dg-require-effective-target untyped_assembly } */ /* { dg-skip-if "Variadic funcs have all args on stack. Normal funcs have args in registers." { "avr-*-* nds32*-*-* amdgcn-*-*" } } */ -/* { dg-skip-if "Variadic funcs use different argument passing from normal funcs." { "csky*-*-* riscv*-*-* or1k*-*-* msp430-*-* pru-*-* loongarch*-*-*" } } */ +/* { dg-skip-if "Variadic funcs use different argument passing from normal funcs." { "arc64*-*-* csky*-*-* riscv*-*-* or1k*-*-* msp430-*-* pru-*-* loongarch*-*-*" } } */ /* { dg-skip-if "Variadic funcs use Base AAPCS. Normal funcs use VFP variant." { arm*-*-* && arm_hf_eabi } } */ /* PR target/12503 */ diff --git a/gcc/testsuite/gcc.dg/stack-usage-1.c b/gcc/testsuite/gcc.dg/stack-usage-1.c index 21cce0f440c53..9bf6552f9b811 100644 --- a/gcc/testsuite/gcc.dg/stack-usage-1.c +++ b/gcc/testsuite/gcc.dg/stack-usage-1.c @@ -13,6 +13,10 @@ # define SIZE 256 /* No frame pointer for leaf functions (default) */ #elif defined(__arc__) # define SIZE (256-4) +#elif defined(__ARC64_ARCH32__) +# define SIZE (256-4) /* Frame pointer is saved onto the stack. */ +#elif defined(__ARC64_ARCH64__) +# define SIZE (256-8) /* Frame pointer is saved onto the stack. */ #elif defined(__i386__) # define SIZE 248 #elif defined(__x86_64__) diff --git a/gcc/testsuite/gcc.dg/torture/stackalign/builtin-apply-2.c b/gcc/testsuite/gcc.dg/torture/stackalign/builtin-apply-2.c index 552ca1433f4f1..e2190101bf837 100644 --- a/gcc/testsuite/gcc.dg/torture/stackalign/builtin-apply-2.c +++ b/gcc/testsuite/gcc.dg/torture/stackalign/builtin-apply-2.c @@ -9,7 +9,7 @@ /* arm_hf_eabi: Variadic funcs use Base AAPCS. Normal funcs use VFP variant. avr: Variadic funcs don't pass arguments in registers, while normal funcs do. */ -/* { dg-skip-if "Variadic funcs use different argument passing from normal funcs" { arm_hf_eabi || { csky*-*-* avr-*-* riscv*-*-* or1k*-*-* msp430-*-* amdgcn-*-* pru-*-* loongarch*-*-* } } } */ +/* { dg-skip-if "Variadic funcs use different argument passing from normal funcs" { arm_hf_eabi || { arc64-*-* csky*-*-* avr-*-* riscv*-*-* or1k*-*-* msp430-*-* amdgcn-*-* pru-*-* loongarch*-*-* } } } */ /* { dg-skip-if "Variadic funcs have all args on stack. Normal funcs have args in registers." { nds32*-*-* } { v850*-*-* } } */ /* { dg-require-effective-target untyped_assembly } */ diff --git a/gcc/testsuite/gcc.dg/torture/vshuf-4.inc b/gcc/testsuite/gcc.dg/torture/vshuf-4.inc index fb35df809c890..2b846c2bdf9f2 100644 --- a/gcc/testsuite/gcc.dg/torture/vshuf-4.inc +++ b/gcc/testsuite/gcc.dg/torture/vshuf-4.inc @@ -139,4 +139,18 @@ T (223, 0, 5, 3, 4) \ T (224, 2, 3, 1, 4) \ T (225, 2, 3, 5, 1) \ T (226, 4, 3, 1, 0) \ -T (227, 2, 3, 5, 5) +T (227, 2, 3, 5, 5) \ +T (228, 1, 1, 1, 1) \ +T (229, 2, 2, 2, 2) \ +T (230, 3, 3, 3, 3) \ +T (231, 0, 2, 0, 2) \ +T (232, 0, 2, 4, 6) \ +T (233, 1, 3, 1, 3) \ +T (234, 1, 3, 5, 7) \ +T (235, 4, 6, 0, 2) \ +T (236, 5, 7, 1, 3) \ +T (237, 1, 0, 3, 2) \ +T (238, 0, 1, 0, 1) \ +T (239, 2, 3, 2, 3) \ +T (240, 2, 3, 6, 7) \ + diff --git a/gcc/testsuite/gcc.target/arc/arc.exp b/gcc/testsuite/gcc.target/arc/arc.exp index f61a6f548ecf8..45723edc0ca3f 100644 --- a/gcc/testsuite/gcc.target/arc/arc.exp +++ b/gcc/testsuite/gcc.target/arc/arc.exp @@ -17,7 +17,7 @@ # GCC testsuite that uses the `dg.exp' driver. # Exit immediately if this isn't an arc target. -if ![istarget arc*-*-*] then { +if { ![istarget arc-*-*] && ![istarget arceb-*-*] } then { return } diff --git a/gcc/testsuite/gcc.target/arc64/addsubx-1.c b/gcc/testsuite/gcc.target/arc64/addsubx-1.c new file mode 100644 index 0000000000000..b5b7f655b492d --- /dev/null +++ b/gcc/testsuite/gcc.target/arc64/addsubx-1.c @@ -0,0 +1,38 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +/* Checks if the compiler generates shifted adds and subs. */ + +int foo (int a, int b) +{ + return a + (b << 1); +} + +long int bar (long int a, long int b) +{ + return a + (b << 1); +} + +int minus1 (int a, int b) +{ + return a - (b << 1); +} + +long int minus1l (long int a, long int b) +{ + return a - (b << 1); +} + +int plus1 (int a, int b) +{ + int x = a + (b << 3); + if (x != 0) + return x; + return 20; +} + +/* { dg-final { scan-assembler "add1_s\\s+" } } */ +/* { dg-final { scan-assembler "add1l\\s+" { target { hs6x } } } } */ +/* { dg-final { scan-assembler "sub1\\s+" } } */ +/* { dg-final { scan-assembler "sub1l\\s+" { target { hs6x } } } } */ +/* { dg-final { scan-assembler "add3\\.f\\s+" } } */ diff --git a/gcc/testsuite/gcc.target/arc64/arc64.exp b/gcc/testsuite/gcc.target/arc64/arc64.exp new file mode 100644 index 0000000000000..c6eb6cad0d9cc --- /dev/null +++ b/gcc/testsuite/gcc.target/arc64/arc64.exp @@ -0,0 +1,78 @@ +# Copyright (C) 2007-2019 Free Software Foundation, Inc. + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +# GCC testsuite that uses the `dg.exp' driver. + +# Exit immediately if this isn't an arc target. +if ![istarget arc*-*-*] then { + return +} + +# Load support procs. +load_lib gcc-dg.exp + +# Return 1 if this is a compiler supporting ARCv3/32 as default processor +proc check_effective_target_hs5x { } { + return [check_no_compiler_messages hs5x assembly { + #if !defined(__ARC64_ARCH32__) + #error No ARCv3/32 + #endif + }] +} + +# Return 1 if this is a compiler supporting ARCv3/64 as default processor +proc check_effective_target_hs6x { } { + return [check_no_compiler_messages hs6x assembly { + #if !defined(__ARC64_ARCH64__) + #error No ARCv3/64 + #endif + }] +} + +# Return 1 if double loads/stores is enabled for the target +proc check_effective_target_doubleaccess { } { + return [check_no_compiler_messages doubleaccess assembly { + #if !defined(__ARC64_LL64__) && !defined (__ARC64_M128__) + #error No double loads or stores detected + #endif + }] +} + + +# Return 1 if DP FP is enabled for the target +proc check_effective_target_dpfp { } { + return [check_no_compiler_messages dpfp assembly { + #if !defined(__ARC_FPU_DP__) + #error No double loads or stores detected + #endif + }] +} + +# If a testcase doesn't have special options, use these. +global DEFAULT_CFLAGS +if ![info exists DEFAULT_CFLAGS] then { + set DEFAULT_CFLAGS " -ansi -pedantic-errors" +} + +# Initialize `dg'. +dg-init + +# Main loop. +dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.{\[cS\],cpp}]] \ + "" $DEFAULT_CFLAGS + +# All done. +dg-finish diff --git a/gcc/testsuite/gcc.target/arc64/cancel-1.c b/gcc/testsuite/gcc.target/arc64/cancel-1.c new file mode 100644 index 0000000000000..e050c538157dc --- /dev/null +++ b/gcc/testsuite/gcc.target/arc64/cancel-1.c @@ -0,0 +1,31 @@ +/* Test for cleanups with pthread_cancel. Any issue with libgcc's unwinder + will cause this test to spin in pthread_join. */ + +/* { dg-do run } */ +/* { dg-require-effective-target pthread } */ +/* { dg-options "-pthread" } */ + +#include +#include +#include + +void *thread_loop (void *) +{ + while (1) + { + printf("worker: loop\n"); + sleep(1); + } +} + +int main () +{ + pthread_t thread; + + pthread_create (&thread, 0, thread_loop, 0); + sleep(5); + pthread_cancel (thread); + pthread_join (thread, 0); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/arc64/cmodel-1.c b/gcc/testsuite/gcc.target/arc64/cmodel-1.c new file mode 100644 index 0000000000000..2ba3b9dec6710 --- /dev/null +++ b/gcc/testsuite/gcc.target/arc64/cmodel-1.c @@ -0,0 +1,14 @@ +/* Check if the call is made using JL instruction. */ +/* { dg-do compile } */ +/* { dg-options "-O -mcmodel=large" } */ + +extern int long_call(int a); + +int test (int a) +{ + return 3 * long_call(a + 1); +} + +/* { dg-final { scan-assembler "movhl" { target { hs6x } } } } */ +/* { dg-final { scan-assembler "orl" { target { hs6x } } } } */ +/* { dg-final { scan-assembler "jl_s.*\[r\d+\]" } } */ diff --git a/gcc/testsuite/gcc.target/arc64/interrupt-1.c b/gcc/testsuite/gcc.target/arc64/interrupt-1.c new file mode 100644 index 0000000000000..c229230cc5996 --- /dev/null +++ b/gcc/testsuite/gcc.target/arc64/interrupt-1.c @@ -0,0 +1,8 @@ +/* Verify the return instruction is iret. */ +/* { dg-do compile } */ +/* { dg-options "-O" } */ +void __attribute__ ((interrupt ("ilink"))) +foo (void) +{ +} +/* { dg-final { scan-assembler "rtie" } } */ diff --git a/gcc/testsuite/gcc.target/arc64/interrupt-2.c b/gcc/testsuite/gcc.target/arc64/interrupt-2.c new file mode 100644 index 0000000000000..de3494108d9a6 --- /dev/null +++ b/gcc/testsuite/gcc.target/arc64/interrupt-2.c @@ -0,0 +1,17 @@ +/* Verify that arg regs used as temporaries get saved. */ +/* { dg-do compile } */ +/* { dg-options "-O" } */ +void __attribute__ ((interrupt)) +foo2 (void) +{ + extern volatile int INTERRUPT_FLAG; + INTERRUPT_FLAG = 0; + + extern volatile int COUNTER; + COUNTER++; +} +/* { dg-final { scan-assembler "push.*r0" { target { hs6x } } } } */ +/* { dg-final { scan-assembler "pop.*r0" { target { hs6x } } } } */ + +/* { dg-final { scan-assembler "st\.a.*r0" { target { hs5x } } } } */ +/* { dg-final { scan-assembler "ld\.ab.*r0" { target { hs5x } } } } */ diff --git a/gcc/testsuite/gcc.target/arc64/interrupt-3.c b/gcc/testsuite/gcc.target/arc64/interrupt-3.c new file mode 100644 index 0000000000000..f5084de008ee7 --- /dev/null +++ b/gcc/testsuite/gcc.target/arc64/interrupt-3.c @@ -0,0 +1,9 @@ +void __attribute__ ((interrupt("you load too"))) +handler1 (void) +{ /* { dg-warning "is not \"ilink" } */ +} + +void __attribute__ ((interrupt(42))) +hander2 (void) +{ /* { dg-warning "is not a string constant" } */ +} diff --git a/gcc/testsuite/gcc.target/arc64/interrupt-4.c b/gcc/testsuite/gcc.target/arc64/interrupt-4.c new file mode 100644 index 0000000000000..21786a838d31e --- /dev/null +++ b/gcc/testsuite/gcc.target/arc64/interrupt-4.c @@ -0,0 +1,9 @@ +extern int gpio_int; +extern int read_reg (int); + +void __attribute__ ((interrupt)) +isr_handler (void) +{ + gpio_int = read_reg (1); +} +/* { dg-final { scan-assembler-times "\\sblink" 2 } } */ diff --git a/gcc/testsuite/gcc.target/arc64/interrupt-5.c b/gcc/testsuite/gcc.target/arc64/interrupt-5.c new file mode 100644 index 0000000000000..66fdb36b7d3cc --- /dev/null +++ b/gcc/testsuite/gcc.target/arc64/interrupt-5.c @@ -0,0 +1,128 @@ +/* { dg-options "-O" } */ +extern void will_trig_exception(void); + +__attribute__ ((interrupt)) +void isr_0 (void) +{ + will_trig_exception(); +} + +/* 0. There shouldn't be any need to (re)adjust the stack pointer. */ +/* { dg-final { scan-assembler-not "\\sadd.*sp" } } */ +/* { dg-final { scan-assembler-not "\\ssub.*sp" } } */ + +/* 1. hs6x output without double loads and stores. */ +/* { dg-final { scan-assembler "pushl_s\\s+r58\n" { target { hs6x && { ! doubleaccess } } } } } */ +/* { dg-final { scan-assembler "pushl_s\\s+r30\n" { target { hs6x && { ! doubleaccess } } } } } */ +/* { dg-final { scan-assembler "pushl_s\\s+r13\n" { target { hs6x && { ! doubleaccess } } } } } */ +/* { dg-final { scan-assembler "pushl_s\\s+r12\n" { target { hs6x && { ! doubleaccess } } } } } */ +/* { dg-final { scan-assembler "pushl_s\\s+r11\n" { target { hs6x && { ! doubleaccess } } } } } */ +/* { dg-final { scan-assembler "pushl_s\\s+r10\n" { target { hs6x && { ! doubleaccess } } } } } */ +/* { dg-final { scan-assembler "pushl_s\\s+r9\n" { target { hs6x && { ! doubleaccess } } } } } */ +/* { dg-final { scan-assembler "pushl_s\\s+r8\n" { target { hs6x && { ! doubleaccess } } } } } */ +/* { dg-final { scan-assembler "pushl_s\\s+r7\n" { target { hs6x && { ! doubleaccess } } } } } */ +/* { dg-final { scan-assembler "pushl_s\\s+r6\n" { target { hs6x && { ! doubleaccess } } } } } */ +/* { dg-final { scan-assembler "pushl_s\\s+r5\n" { target { hs6x && { ! doubleaccess } } } } } */ +/* { dg-final { scan-assembler "pushl_s\\s+r4\n" { target { hs6x && { ! doubleaccess } } } } } */ +/* { dg-final { scan-assembler "pushl_s\\s+r3\n" { target { hs6x && { ! doubleaccess } } } } } */ +/* { dg-final { scan-assembler "pushl_s\\s+r2\n" { target { hs6x && { ! doubleaccess } } } } } */ +/* { dg-final { scan-assembler "pushl_s\\s+r1\n" { target { hs6x && { ! doubleaccess } } } } } */ +/* { dg-final { scan-assembler "pushl_s\\s+r0\n" { target { hs6x && { ! doubleaccess } } } } } */ +/* { dg-final { scan-assembler "pushl_s\\s+blink\n" { target { hs6x && { ! doubleaccess } } } } } */ +/* { dg-final { scan-assembler "popl_s\\s+blink\n" { target { hs6x && { ! doubleaccess } } } } } */ +/* { dg-final { scan-assembler "popl_s\\s+r0\n" { target { hs6x && { ! doubleaccess } } } } } */ +/* { dg-final { scan-assembler "popl_s\\s+r1\n" { target { hs6x && { ! doubleaccess } } } } } */ +/* { dg-final { scan-assembler "popl_s\\s+r2\n" { target { hs6x && { ! doubleaccess } } } } } */ +/* { dg-final { scan-assembler "popl_s\\s+r3\n" { target { hs6x && { ! doubleaccess } } } } } */ +/* { dg-final { scan-assembler "popl_s\\s+r4\n" { target { hs6x && { ! doubleaccess } } } } } */ +/* { dg-final { scan-assembler "popl_s\\s+r5\n" { target { hs6x && { ! doubleaccess } } } } } */ +/* { dg-final { scan-assembler "popl_s\\s+r6\n" { target { hs6x && { ! doubleaccess } } } } } */ +/* { dg-final { scan-assembler "popl_s\\s+r7\n" { target { hs6x && { ! doubleaccess } } } } } */ +/* { dg-final { scan-assembler "popl_s\\s+r8\n" { target { hs6x && { ! doubleaccess } } } } } */ +/* { dg-final { scan-assembler "popl_s\\s+r9\n" { target { hs6x && { ! doubleaccess } } } } } */ +/* { dg-final { scan-assembler "popl_s\\s+r10\n" { target { hs6x && { ! doubleaccess } } } } } */ +/* { dg-final { scan-assembler "popl_s\\s+r11\n" { target { hs6x && { ! doubleaccess } } } } } */ +/* { dg-final { scan-assembler "popl_s\\s+r12\n" { target { hs6x && { ! doubleaccess } } } } } */ +/* { dg-final { scan-assembler "popl_s\\s+r13\n" { target { hs6x && { ! doubleaccess } } } } } */ +/* { dg-final { scan-assembler "popl_s\\s+r30\n" { target { hs6x && { ! doubleaccess } } } } } */ +/* { dg-final { scan-assembler "popl_s\\s+r58\n" { target { hs6x && { ! doubleaccess } } } } } */ + +/* 2. hs6x output with double loads and stores. */ +/* { dg-final { scan-assembler "pushl_s\\s+r58\n" { target { hs6x && doubleaccess } } } } */ +/* { dg-final { scan-assembler "pushl_s\\s+r30\n" { target { hs6x && doubleaccess } } } } */ +/* { dg-final { scan-assembler "stdl.a\\s+r12," { target { hs6x && doubleaccess } } } } */ +/* { dg-final { scan-assembler "stdl.a\\s+r10," { target { hs6x && doubleaccess } } } } */ +/* { dg-final { scan-assembler "stdl.a\\s+r8," { target { hs6x && doubleaccess } } } } */ +/* { dg-final { scan-assembler "stdl.a\\s+r6," { target { hs6x && doubleaccess } } } } */ +/* { dg-final { scan-assembler "stdl.a\\s+r4," { target { hs6x && doubleaccess } } } } */ +/* { dg-final { scan-assembler "stdl.a\\s+r2," { target { hs6x && doubleaccess } } } } */ +/* { dg-final { scan-assembler "stdl.a\\s+r0," { target { hs6x && doubleaccess } } } } */ +/* { dg-final { scan-assembler "pushl_s\\s+blink\n" { target { hs6x && doubleaccess } } } } */ +/* { dg-final { scan-assembler "popl_s\\s+blink\n" { target { hs6x && doubleaccess } } } } */ +/* { dg-final { scan-assembler "lddl.ab\\s+r0," { target { hs6x && doubleaccess } } } } */ +/* { dg-final { scan-assembler "lddl.ab\\s+r2," { target { hs6x && doubleaccess } } } } */ +/* { dg-final { scan-assembler "lddl.ab\\s+r4," { target { hs6x && doubleaccess } } } } */ +/* { dg-final { scan-assembler "lddl.ab\\s+r6," { target { hs6x && doubleaccess } } } } */ +/* { dg-final { scan-assembler "lddl.ab\\s+r8," { target { hs6x && doubleaccess } } } } */ +/* { dg-final { scan-assembler "lddl.ab\\s+r10," { target { hs6x && doubleaccess } } } } */ +/* { dg-final { scan-assembler "lddl.ab\\s+r12," { target { hs6x && doubleaccess } } } } */ +/* { dg-final { scan-assembler "popl_s\\s+r30\n" { target { hs6x && doubleaccess } } } } */ +/* { dg-final { scan-assembler "popl_s\\s+r58\n" { target { hs6x && doubleaccess } } } } */ + +/* 3. hs5x output without double loads and stores. */ +/* { dg-final { scan-assembler "st.a\\s+r58," { target { hs5x && { ! doubleaccess } } } } } */ +/* { dg-final { scan-assembler "st.a\\s+r30," { target { hs5x && { ! doubleaccess } } } } } */ +/* { dg-final { scan-assembler "st.a\\s+r13," { target { hs5x && { ! doubleaccess } } } } } */ +/* { dg-final { scan-assembler "st.a\\s+r12," { target { hs5x && { ! doubleaccess } } } } } */ +/* { dg-final { scan-assembler "st.a\\s+r11," { target { hs5x && { ! doubleaccess } } } } } */ +/* { dg-final { scan-assembler "st.a\\s+r10," { target { hs5x && { ! doubleaccess } } } } } */ +/* { dg-final { scan-assembler "st.a\\s+r9," { target { hs5x && { ! doubleaccess } } } } } */ +/* { dg-final { scan-assembler "st.a\\s+r8," { target { hs5x && { ! doubleaccess } } } } } */ +/* { dg-final { scan-assembler "st.a\\s+r7," { target { hs5x && { ! doubleaccess } } } } } */ +/* { dg-final { scan-assembler "st.a\\s+r6," { target { hs5x && { ! doubleaccess } } } } } */ +/* { dg-final { scan-assembler "st.a\\s+r5," { target { hs5x && { ! doubleaccess } } } } } */ +/* { dg-final { scan-assembler "st.a\\s+r4," { target { hs5x && { ! doubleaccess } } } } } */ +/* { dg-final { scan-assembler "st.a\\s+r3," { target { hs5x && { ! doubleaccess } } } } } */ +/* { dg-final { scan-assembler "st.a\\s+r2," { target { hs5x && { ! doubleaccess } } } } } */ +/* { dg-final { scan-assembler "st.a\\s+r1," { target { hs5x && { ! doubleaccess } } } } } */ +/* { dg-final { scan-assembler "st.a\\s+r0," { target { hs5x && { ! doubleaccess } } } } } */ +/* { dg-final { scan-assembler "st.a\\s+blink," { target { hs5x && { ! doubleaccess } } } } } */ +/* { dg-final { scan-assembler "ld.ab\\s+blink," { target { hs5x && { ! doubleaccess } } } } } */ +/* { dg-final { scan-assembler "ld.ab\\s+r0," { target { hs5x && { ! doubleaccess } } } } } */ +/* { dg-final { scan-assembler "ld.ab\\s+r1," { target { hs5x && { ! doubleaccess } } } } } */ +/* { dg-final { scan-assembler "ld.ab\\s+r2," { target { hs5x && { ! doubleaccess } } } } } */ +/* { dg-final { scan-assembler "ld.ab\\s+r3," { target { hs5x && { ! doubleaccess } } } } } */ +/* { dg-final { scan-assembler "ld.ab\\s+r4," { target { hs5x && { ! doubleaccess } } } } } */ +/* { dg-final { scan-assembler "ld.ab\\s+r5," { target { hs5x && { ! doubleaccess } } } } } */ +/* { dg-final { scan-assembler "ld.ab\\s+r6," { target { hs5x && { ! doubleaccess } } } } } */ +/* { dg-final { scan-assembler "ld.ab\\s+r7," { target { hs5x && { ! doubleaccess } } } } } */ +/* { dg-final { scan-assembler "ld.ab\\s+r8," { target { hs5x && { ! doubleaccess } } } } } */ +/* { dg-final { scan-assembler "ld.ab\\s+r9," { target { hs5x && { ! doubleaccess } } } } } */ +/* { dg-final { scan-assembler "ld.ab\\s+r10," { target { hs5x && { ! doubleaccess } } } } } */ +/* { dg-final { scan-assembler "ld.ab\\s+r11," { target { hs5x && { ! doubleaccess } } } } } */ +/* { dg-final { scan-assembler "ld.ab\\s+r12," { target { hs5x && { ! doubleaccess } } } } } */ +/* { dg-final { scan-assembler "ld.ab\\s+r13," { target { hs5x && { ! doubleaccess } } } } } */ +/* { dg-final { scan-assembler "ld.ab\\s+r30," { target { hs5x && { ! doubleaccess } } } } } */ +/* { dg-final { scan-assembler "ld.ab\\s+r58," { target { hs5x && { ! doubleaccess } } } } } */ + +/* 4. hs5x output with double loads and stores. */ +/* { dg-final { scan-assembler "st.a\\s+r58," { target { hs5x && doubleaccess } } } } */ +/* { dg-final { scan-assembler "st.a\\s+r30," { target { hs5x && doubleaccess } } } } */ +/* { dg-final { scan-assembler "std.a\\s+r12," { target { hs5x && doubleaccess } } } } */ +/* { dg-final { scan-assembler "std.a\\s+r10," { target { hs5x && doubleaccess } } } } */ +/* { dg-final { scan-assembler "std.a\\s+r8," { target { hs5x && doubleaccess } } } } */ +/* { dg-final { scan-assembler "std.a\\s+r6," { target { hs5x && doubleaccess } } } } */ +/* { dg-final { scan-assembler "std.a\\s+r4," { target { hs5x && doubleaccess } } } } */ +/* { dg-final { scan-assembler "std.a\\s+r2," { target { hs5x && doubleaccess } } } } */ +/* { dg-final { scan-assembler "std.a\\s+r0," { target { hs5x && doubleaccess } } } } */ +/* { dg-final { scan-assembler "st.a\\s+blink," { target { hs5x && doubleaccess } } } } */ +/* { dg-final { scan-assembler "ld.ab\\s+blink," { target { hs5x && doubleaccess } } } } */ +/* { dg-final { scan-assembler "ldd.ab\\s+r0," { target { hs5x && doubleaccess } } } } */ +/* { dg-final { scan-assembler "ldd.ab\\s+r2," { target { hs5x && doubleaccess } } } } */ +/* { dg-final { scan-assembler "ldd.ab\\s+r4," { target { hs5x && doubleaccess } } } } */ +/* { dg-final { scan-assembler "ldd.ab\\s+r6," { target { hs5x && doubleaccess } } } } */ +/* { dg-final { scan-assembler "ldd.ab\\s+r8," { target { hs5x && doubleaccess } } } } */ +/* { dg-final { scan-assembler "ldd.ab\\s+r10," { target { hs5x && doubleaccess } } } } */ +/* { dg-final { scan-assembler "ldd.ab\\s+r12," { target { hs5x && doubleaccess } } } } */ +/* { dg-final { scan-assembler "ld.ab\\s+r30," { target { hs5x && doubleaccess } } } } */ +/* { dg-final { scan-assembler "ld.ab\\s+r58," { target { hs5x && doubleaccess } } } } */ diff --git a/gcc/testsuite/gcc.target/arc64/movd-1.c b/gcc/testsuite/gcc.target/arc64/movd-1.c new file mode 100644 index 0000000000000..ba9920bd2f8ed --- /dev/null +++ b/gcc/testsuite/gcc.target/arc64/movd-1.c @@ -0,0 +1,38 @@ +/* { dg-do compile } */ +/* { dg-options "-O0 -fomit-frame-pointer" } */ + +/* "movdi" is about assigning 32-bit of data (byte). * + * * + * This is one of the tests in series of testing for * + * "movdi" support. The test scenarios are as the * + * following table: * + * * + * ,--------.----------.----------.-----------------. * + * | test | notation | dest. | source | * + * |--------|---------------------------------------| * + * | movd-1 | r <- i | register | immediate | * + * | movd-2 | r <- r | | register | * + * | movd-3 | r <- m | | memory | * + * |--------+----------+----------+-----------------| * + * | movd-4 | m <- r | | register | * + * | movd-5 | m <- i | memroy | immediate small | * + * | movd-6 | m <- I | | immediate big | * + * | movd-7 | m <- m | | memory | * + * `--------^----------^----------^-----------------' */ + +/* assign immediate to register */ +volatile int dummy; +void foo(void) +{ + volatile register int dst; + dst = 0x12344321; + dst = 0x40000000; + dst = 0x7FFFFFFF; /* largest positive number in 32-bit */ + dst = -2147483648; /* smallest negative number in 32-bit */ + dst = 0xFFFFFFFF; /* -1 */ + dummy = dst; +} +/* { dg-final { scan-assembler "mov_s\\s+r\[0-9\]+,305414945" } } */ +/* { dg-final { scan-assembler "mov_s\\s+r\[0-9\]+,1073741824" } } */ +/* { dg-final { scan-assembler "mov_s\\s+r\[0-9\]+,2147483647" } } */ +/* { dg-final { scan-assembler "mov_s\\s+r\[0-9\]+,-2147483648" } } */ diff --git a/gcc/testsuite/gcc.target/arc64/movd-2.c b/gcc/testsuite/gcc.target/arc64/movd-2.c new file mode 100644 index 0000000000000..43e786d3c0998 --- /dev/null +++ b/gcc/testsuite/gcc.target/arc64/movd-2.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +/* check "movd-1.c" for further details. */ + +/* assign a register to register */ +int foo(int a, int b) +{ + return b; +} +/* { dg-final { scan-assembler "mov_s\\s+r\[0-9\]+,r\[0-9\]+" } } */ diff --git a/gcc/testsuite/gcc.target/arc64/movd-3.c b/gcc/testsuite/gcc.target/arc64/movd-3.c new file mode 100644 index 0000000000000..50b45e302647f --- /dev/null +++ b/gcc/testsuite/gcc.target/arc64/movd-3.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-O0 -fomit-frame-pointer" } */ + +/* check "movd-1.c" for further details. */ + +/* assign memory to register */ +volatile int mem; +void foo(void) +{ + register int dst = mem; +} +/* { dg-final { scan-assembler "ld\[_s\\s\]+r\[0-9\]+,\\\[" } } */ + diff --git a/gcc/testsuite/gcc.target/arc64/movd-4.c b/gcc/testsuite/gcc.target/arc64/movd-4.c new file mode 100644 index 0000000000000..07609d7e6ee78 --- /dev/null +++ b/gcc/testsuite/gcc.target/arc64/movd-4.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-O0 -fomit-frame-pointer" } */ + +/* check "movd-1.c" for further details. */ + +/* register to memory */ +int mem; +void foo(void) +{ + register int reg_int; + mem = reg_int; +} +/* { dg-final { scan-assembler "st\[_s\\s\]+r\[0-9\]+,\\\[" } } */ diff --git a/gcc/testsuite/gcc.target/arc64/movd-5.c b/gcc/testsuite/gcc.target/arc64/movd-5.c new file mode 100644 index 0000000000000..eed0f346e2970 --- /dev/null +++ b/gcc/testsuite/gcc.target/arc64/movd-5.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-O0 -fomit-frame-pointer" } */ + +/* check "movd-1.c" for further details. */ + +/* assign immediate to a memory: this immediate is small * + * enough to be covered by w6 (signed 6 bit number). */ +int mem; +void foo(void) +{ + mem = 0x00; /* the usual suspect: 0 */ + mem = 31; /* largest positive number in w6 */ + mem = -32; /* smallest negative number in w6 */ + mem = -1; /* just a -1 (all bits 1) */ +} +/* { dg-final { scan-assembler "st\\s+0,\\\[" } } */ +/* { dg-final { scan-assembler "st\\s+31,\\\[" } } */ +/* { dg-final { scan-assembler "st\\s+-32,\\\[" } } */ +/* { dg-final { scan-assembler "st\\s+-1,\\\[" } } */ diff --git a/gcc/testsuite/gcc.target/arc64/movd-6.c b/gcc/testsuite/gcc.target/arc64/movd-6.c new file mode 100644 index 0000000000000..d5f9b58507b76 --- /dev/null +++ b/gcc/testsuite/gcc.target/arc64/movd-6.c @@ -0,0 +1,23 @@ +/* { dg-do compile } */ +/* { dg-options "-O0 -fomit-frame-pointer" } */ + +/* check "movd-1.c" for further details. */ + +/* assign immediate to a memory: this immediate cannot be * + * represented by 6-bit, hence stb w6, mem is not an option */ +int mem; +void foo(void) +{ + mem = 0x40; /* smallest 7-bit number */ + mem = 0x7F; /* largest 7-bit number */ + mem = 0x80; /* 128 */ + mem = -128; /* smallest 8-bit number */ + mem = -33; /* first negative number outside w6's range. else, + it would use something like 'sth -32, [@mem]' */ +} +/* { dg-final { scan-assembler "mov_s\\s+r\\d,64" } } */ +/* { dg-final { scan-assembler "mov_s\\s+r\\d,127" } } */ +/* { dg-final { scan-assembler "mov_s\\s+r\\d,128" } } */ +/* { dg-final { scan-assembler "mov\\s+r\\d,-128" } } */ +/* { dg-final { scan-assembler "mov\\s+r\\d,-33" } } */ +/* { dg-final { scan-assembler-times "st\[_s\\s\]+r\\d,\\\[" 5 } } */ diff --git a/gcc/testsuite/gcc.target/arc64/movd-7.c b/gcc/testsuite/gcc.target/arc64/movd-7.c new file mode 100644 index 0000000000000..0836cc5660527 --- /dev/null +++ b/gcc/testsuite/gcc.target/arc64/movd-7.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-O1 -fomit-frame-pointer" } */ + +/* check "movd-1.c" for further details. */ + +/* assign memory to a memory */ +int mem_dst, mem_src; +void foo(void) +{ + mem_dst = mem_src; +} +/* { dg-final { scan-assembler "ld.x\\s+r\[0-9\]+,\\\[" { target hs6x } } } */ +/* { dg-final { scan-assembler-not "ext\[bhw\]\\s+" } } */ +/* { dg-final { scan-assembler "st\[_s\\s\]+r\\d,\\\[" } } */ + +/* { dg-final { scan-assembler "ld\\s+r\[0-9\]+,\\\[" { target hs5x } } } */ diff --git a/gcc/testsuite/gcc.target/arc64/movh-1.c b/gcc/testsuite/gcc.target/arc64/movh-1.c new file mode 100644 index 0000000000000..4667f357af7b6 --- /dev/null +++ b/gcc/testsuite/gcc.target/arc64/movh-1.c @@ -0,0 +1,40 @@ +/* { dg-do compile } */ +/* { dg-options "-O0 -fomit-frame-pointer" } */ + +/* "movhi" is about assigning 16-bit of data (byte). * + * * + * This is one of the tests in series of testing for * + * "movhi" support. The test scenarios are as the * + * following table: * + * * + * ,--------.----------.----------.-----------------. * + * | test | notation | dest. | source | * + * |--------|---------------------------------------| * + * | movh-1 | r <- i | register | immediate | * + * | movh-2 | r <- r | | register | * + * | movh-3 | r <- m | | memory | * + * |--------+----------+----------+-----------------| * + * | movh-4 | m <- r | | register | * + * | movh-5 | m <- i | memroy | immediate small | * + * | movh-6 | m <- I | | immediate big | * + * | movh-7 | m <- m | | memory | * + * | movh-8 | m <- m | volatile causes sex, why? | * + * `--------^----------^----------^-----------------' */ + +/* assign immediate to register */ +volatile short dummy; +void foo(void) +{ + volatile register short dst; + dst = 0x1234; + dst = 0x4000; + dst = 0x7FFF; /* largest positive number in short */ + dst = -32768; /* smallest negative number in short */ + dst = 0xFFFF; /* -1 */ + dummy = dst; +} +/* { dg-final { scan-assembler "mov_s\\s+r\[0-9\]+,4660" } } */ +/* { dg-final { scan-assembler "mov_s\\s+r\[0-9\]+,16384" } } */ +/* { dg-final { scan-assembler "mov_s\\s+r\[0-9\]+,32767" } } */ +/* { dg-final { scan-assembler "mov_s\\s+r\[0-9\]+,-32768" } } */ +/* { dg-final { scan-assembler "sth\.as\\s+-1,\\\[" } } */ diff --git a/gcc/testsuite/gcc.target/arc64/movh-2.c b/gcc/testsuite/gcc.target/arc64/movh-2.c new file mode 100644 index 0000000000000..b0780523a2251 --- /dev/null +++ b/gcc/testsuite/gcc.target/arc64/movh-2.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +/* check "movh-1.c" for further details. */ + +/* assign a register to register */ +short foo(short a, short b) +{ + return b; +} +/* { dg-final { scan-assembler "sexh_s\\s+r\[0-9\]+,r\[0-9\]+" } } */ diff --git a/gcc/testsuite/gcc.target/arc64/movh-3.c b/gcc/testsuite/gcc.target/arc64/movh-3.c new file mode 100644 index 0000000000000..af17466c18153 --- /dev/null +++ b/gcc/testsuite/gcc.target/arc64/movh-3.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-O0 -fomit-frame-pointer" } */ + +/* check "movh-1.c" for further details. */ + +/* assign memory to register */ +volatile short mem; +void foo(void) +{ + register short dst = mem; +} +/* { dg-final { scan-assembler "ldh\[_s\\s\]+r\[0-9\]+,\\\[" } } */ + diff --git a/gcc/testsuite/gcc.target/arc64/movh-4.c b/gcc/testsuite/gcc.target/arc64/movh-4.c new file mode 100644 index 0000000000000..c87f762c92692 --- /dev/null +++ b/gcc/testsuite/gcc.target/arc64/movh-4.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-O0 -fomit-frame-pointer" } */ + +/* check "movh-1.c" for further details. */ + +/* register to memory */ +short mem; +void foo(void) +{ + register short reg_short; + mem = reg_short; +} +/* { dg-final { scan-assembler "sth\[_s\\s\]+r\[0-9\]+,\\\[" } } */ diff --git a/gcc/testsuite/gcc.target/arc64/movh-5.c b/gcc/testsuite/gcc.target/arc64/movh-5.c new file mode 100644 index 0000000000000..25c64ae3c19fa --- /dev/null +++ b/gcc/testsuite/gcc.target/arc64/movh-5.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-O0 -fomit-frame-pointer" } */ + +/* check "movh-1.c" for further details. */ + +/* assign immediate to a memory: this immediate is small * + * enough to be covered by w6 (signed 6 bit number). */ +short mem; +void foo(void) +{ + mem = 0x00; /* the usual suspect: 0 */ + mem = 31; /* largest positive number in w6 */ + mem = -32; /* smallest negative number in w6 */ + mem = -1; /* just a -1 (all bits 1) */ +} +/* { dg-final { scan-assembler "sth\\s+0,\\\[" } } */ +/* { dg-final { scan-assembler "sth\\s+31,\\\[" } } */ +/* { dg-final { scan-assembler "sth\\s+-32,\\\[" } } */ +/* { dg-final { scan-assembler "sth\\s+-1,\\\[" } } */ diff --git a/gcc/testsuite/gcc.target/arc64/movh-6.c b/gcc/testsuite/gcc.target/arc64/movh-6.c new file mode 100644 index 0000000000000..508236cd6de43 --- /dev/null +++ b/gcc/testsuite/gcc.target/arc64/movh-6.c @@ -0,0 +1,23 @@ +/* { dg-do compile } */ +/* { dg-options "-O0 -fomit-frame-pointer" } */ + +/* check "movh-1.c" for further details. */ + +/* assign immediate to a memory: this immediate cannot be * + * represented by 6-bit, hence stb w6, mem is not an option */ +short mem; +void foo(void) +{ + mem = 0x40; /* smallest 7-bit number */ + mem = 0x7F; /* largest 7-bit number */ + mem = 0x80; /* 128 */ + mem = -128; /* smallest 8-bit number */ + mem = -33; /* first negative number outside w6's range. else, + it would use something like 'sth -32, [@mem]' */ +} +/* { dg-final { scan-assembler "mov_s\\s+r\\d,64" } } */ +/* { dg-final { scan-assembler "mov_s\\s+r\\d,127" } } */ +/* { dg-final { scan-assembler "mov_s\\s+r\\d,128" } } */ +/* { dg-final { scan-assembler "mov\\s+r\\d,-128" } } */ +/* { dg-final { scan-assembler "mov\\s+r\\d,-33" } } */ +/* { dg-final { scan-assembler-times "sth\[_s\\s\]+r\\d,\\\[" 5 } } */ diff --git a/gcc/testsuite/gcc.target/arc64/movh-7.c b/gcc/testsuite/gcc.target/arc64/movh-7.c new file mode 100644 index 0000000000000..8042248dc6bde --- /dev/null +++ b/gcc/testsuite/gcc.target/arc64/movh-7.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O1 -fomit-frame-pointer" } */ + +/* check "movh-1.c" for further details. */ + +/* assign memory to a memory */ +short mem_dst, mem_src; +void foo(void) +{ + mem_dst = mem_src; +} +/* { dg-final { scan-assembler "ldh\\s+r\[0-9\]+,\\\[" } } */ +/* { dg-final { scan-assembler-not "ext\[bhw\]\\s+" } } */ +/* { dg-final { scan-assembler "sth\[_s\\s\]+r\\d,\\\[" } } */ diff --git a/gcc/testsuite/gcc.target/arc64/movh-8.c b/gcc/testsuite/gcc.target/arc64/movh-8.c new file mode 100644 index 0000000000000..4ce0cc71bbd47 --- /dev/null +++ b/gcc/testsuite/gcc.target/arc64/movh-8.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-O1 -fomit-frame-pointer" } */ + +/* check "movh-1.c" for further details. */ + +/* FIXME: with a 'volatile' this test generates an unnecessary sexh */ +/* assign memory to a memory */ +volatile short mem_dst, mem_src; +void foo(void) +{ + mem_dst = mem_src; +} +/* { dg-final { scan-assembler "ldh\\s+r\[0-9\]+,\\\[" } } */ +/* { dg-final { scan-assembler-not "sexh" } } */ +/* { dg-final { scan-assembler "sth\[_s\\s\]+r\\d,\\\[" } } */ diff --git a/gcc/testsuite/gcc.target/arc64/movh-9.c b/gcc/testsuite/gcc.target/arc64/movh-9.c new file mode 100644 index 0000000000000..3e50106307f06 --- /dev/null +++ b/gcc/testsuite/gcc.target/arc64/movh-9.c @@ -0,0 +1,31 @@ +/* { dg-do compile } */ +/* { dg-options "-O1 -fomit-frame-pointer" } */ + +/* "movqi" is about assigning 8-bit of data (byte). * + * * + * This is one of the tests in series of testing for * + * "movqi" support. The test scenarios are as the * + * following table: * + * * + * ,--------.----------.----------.-----------------. * + * | test | notation | dest. | source | * + * |--------|---------------------------------------| * + * | movq-1 | r <- i | register | immediate | * + * | movq-2 | r <- r | | register | * + * | movq-3 | r <- m | | memory | * + * |--------+----------+----------+-----------------| * + * | movq-4 | m <- r | | register | * + * | movq-5 | m <- i | memroy | immediate small | * + * | movq-6 | m <- I | | immediate big | * + * | movq-7 | m <- m | | memory | * + * `--------^----------^----------^-----------------' */ + +/* FIXME: this test fails trying to assign memory to memory directly */ +/* assign memory to a memory */ +char mem_dst, mem_src; +void foo(void) +{ + mem_dst = mem_src; +} +/* { dg-final { scan-assembler "ldb\\s+r\[0-9\]+,\\\[" } } */ +/* { dg-final { scan-assembler "stb\[_s\\s\]+r\\d,\\\[" } } */ diff --git a/gcc/testsuite/gcc.target/arc64/movq-1.c b/gcc/testsuite/gcc.target/arc64/movq-1.c new file mode 100644 index 0000000000000..3ea1c665cf9f1 --- /dev/null +++ b/gcc/testsuite/gcc.target/arc64/movq-1.c @@ -0,0 +1,42 @@ +/* { dg-do compile } */ +/* { dg-options "-O0 -fomit-frame-pointer" } */ + +/* "movqi" is about assigning 8-bit of data (byte). * + * * + * This is one of the tests in series of testing for * + * "movqi" support. The test scenarios are as the * + * following table: * + * * + * ,--------.----------.----------.-----------------. * + * | test | notation | dest. | source | * + * |--------|---------------------------------------| * + * | movq-1 | r <- i | register | immediate | * + * | movq-2 | r <- r | | register | * + * | movq-3 | r <- m | | memory | * + * |--------+----------+----------+-----------------| * + * | movq-4 | m <- r | | register | * + * | movq-5 | m <- i | memroy | immediate small | * + * | movq-6 | m <- I | | immediate big | * + * | movq-7 | m <- m | | memory | * + * | movq-8 | m <- m | volatile causes ext, why? | * + * `--------^----------^----------^-----------------' */ + +/* assign immediate to register */ +volatile char dummy; +void foo(void) +{ + volatile register char dst; + dst = 0x0; + dst = 0x22; + dst = 0x40; + dst = 0x80; + dst = -128; + dst = 0xFF; + dummy = dst; +} +/* { dg-final { scan-assembler "stb\\s+0,\\\[" } } */ +/* { dg-final { scan-assembler "mov_s\\s+r\\d,34" } } */ +/* { dg-final { scan-assembler "mov_s\\s+r\\d,64" } } */ +/* expecting "mov_s r0,128" twice to cover both dst=-/+128 */ +/* { dg-final { scan-assembler-times "mov\\s+r\\d,-128" 2 } } */ +/* { dg-final { scan-assembler "stb\\s+-1,\\\[" } } */ diff --git a/gcc/testsuite/gcc.target/arc64/movq-2.c b/gcc/testsuite/gcc.target/arc64/movq-2.c new file mode 100644 index 0000000000000..78515c1f63b9c --- /dev/null +++ b/gcc/testsuite/gcc.target/arc64/movq-2.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +/* check "movq-1.c" for further details. */ + +/* assign a register to register */ +char foo(char a, char b) +{ + return b; +} +/* { dg-final { scan-assembler "extb_s\\s+r\\d+,r\\d+" } } */ diff --git a/gcc/testsuite/gcc.target/arc64/movq-3.c b/gcc/testsuite/gcc.target/arc64/movq-3.c new file mode 100644 index 0000000000000..3e9288f72d7f1 --- /dev/null +++ b/gcc/testsuite/gcc.target/arc64/movq-3.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-O0 -fomit-frame-pointer" } */ + +/* check "movq-1.c" for further details. */ + +/* assign memory to register */ +volatile char mem; +void foo(void) +{ + register char dst = mem; +} +/* { dg-final { scan-assembler "ldb\[_s\\s\]+r\[0-9\]+,\\\[" } } */ + diff --git a/gcc/testsuite/gcc.target/arc64/movq-4.c b/gcc/testsuite/gcc.target/arc64/movq-4.c new file mode 100644 index 0000000000000..e1c9b863257e2 --- /dev/null +++ b/gcc/testsuite/gcc.target/arc64/movq-4.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-O0 -fomit-frame-pointer" } */ + +/* check "movq-1.c" for further details. */ + +/* register to memory */ +char mem; +void foo(void) +{ + register char reg_char; + mem = reg_char; +} +/* { dg-final { scan-assembler "stb\[_s\\s\]+r\[0-9\]+,\\\[" } } */ diff --git a/gcc/testsuite/gcc.target/arc64/movq-5.c b/gcc/testsuite/gcc.target/arc64/movq-5.c new file mode 100644 index 0000000000000..8546af1183233 --- /dev/null +++ b/gcc/testsuite/gcc.target/arc64/movq-5.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-O0 -fomit-frame-pointer" } */ + +/* check "movq-1.c" for further details. */ + +/* assign immediate to a memory: this immediate is small * + * enough to be covered by w6 (signed 6 bit number). */ +volatile char mem; +void foo(void) +{ + mem = 0x00; /* the usual suspect: 0 */ + mem = 31; /* largest positive number in w6 */ + mem = -32; /* smallest negative number in w6 */ + mem = -1; /* just a -1 (all bits 1) */ +} +/* { dg-final { scan-assembler "stb\\s+0,\\\[" } } */ +/* { dg-final { scan-assembler "stb\\s+31,\\\[" } } */ +/* { dg-final { scan-assembler "stb\\s+-32,\\\[" } } */ +/* { dg-final { scan-assembler "stb\\s+-1,\\\[" } } */ diff --git a/gcc/testsuite/gcc.target/arc64/movq-6.c b/gcc/testsuite/gcc.target/arc64/movq-6.c new file mode 100644 index 0000000000000..4f5295f7d2683 --- /dev/null +++ b/gcc/testsuite/gcc.target/arc64/movq-6.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-options "-O0 -fomit-frame-pointer" } */ + +/* check "movq-1.c" for further details. */ + +/* assign immediate to a memory: this immediate cannot be * + * represented by 6-bit, hence stb w6, mem is not an option */ +char mem; +void foo(void) +{ + mem = 0x40; /* smallest 7-bit number */ + mem = 0x7F; /* largest 7-bit number */ + mem = 0x80; /* 128 */ + mem = -128; /* smallest 8-bit number */ + mem = -33; /* first negative number outside w6's range. else, + it would use something like 'stb -32, [@mem]' */ +} +/* { dg-final { scan-assembler "mov_s\\s+r\\d,64" } } */ +/* { dg-final { scan-assembler "mov_s\\s+r\\d,127" } } */ +/* { dg-final { scan-assembler-times "mov\\s+r\\d,-128" 2 } } */ +/* { dg-final { scan-assembler "mov\\s+r\\d,-33" } } */ +/* { dg-final { scan-assembler-times "stb\[_s\\s\]+r\\d,\\\[" 5 } } */ diff --git a/gcc/testsuite/gcc.target/arc64/movq-7.c b/gcc/testsuite/gcc.target/arc64/movq-7.c new file mode 100644 index 0000000000000..48e2b3d88c586 --- /dev/null +++ b/gcc/testsuite/gcc.target/arc64/movq-7.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O1 -fomit-frame-pointer" } */ + +/* check "movq-1.c" for further details. */ + +/* assign memory to a memory */ +char mem_dst, mem_src; +void foo(void) +{ + mem_dst = mem_src; +} +/* { dg-final { scan-assembler "ldb\\s+r\[0-9\]+,\\\[" } } */ +/* { dg-final { scan-assembler-not "ext\[bhw\]\\s+" } } */ +/* { dg-final { scan-assembler "stb\[_s\\s\]+r\\d,\\\[" } } */ diff --git a/gcc/testsuite/gcc.target/arc64/movq-8.c b/gcc/testsuite/gcc.target/arc64/movq-8.c new file mode 100644 index 0000000000000..cd27779c36679 --- /dev/null +++ b/gcc/testsuite/gcc.target/arc64/movq-8.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O0 -fomit-frame-pointer" } */ + +/* check "movq-1.c" for further details. */ + +/* FIXME: with a 'volatile' this test generates an unnecessary extb */ +/* assign memory to a memory */ +volatile char mem_dst, mem_src; +void foo(void) +{ + mem_dst = mem_src; +} +/* { dg-final { scan-assembler "ldb\[_s\\s\]+r\[0-9\]+,\\\[" } } */ +/* { dg-final { scan-assembler "stb\[_s\\s\]+r\\d,\\\[" } } */ diff --git a/gcc/testsuite/gcc.target/arc64/mpyf.c b/gcc/testsuite/gcc.target/arc64/mpyf.c new file mode 100644 index 0000000000000..6c567a92ba238 --- /dev/null +++ b/gcc/testsuite/gcc.target/arc64/mpyf.c @@ -0,0 +1,33 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +/* "Checking if the result is zero" can safely use mpy.f. The pattern of + interest is: + + mpy.f 0,r0,r1 + mov.ne r0,12345678 */ +/* { dg-final { scan-assembler "mpy.f\\s+.*,r0,r1\n\\s*mov\.ne\\s+r0,12345678" } } */ +int check_z_flag (int a, int b) +{ + if (a * b == 0) + return a; + else + return 12345678; +} + +/* "Checking if the result is negative" should not rely on .f notion of mpy, + because in that case N will be only set if bit 63 is set. The pattern + of interest is something like: + + mpy_s r1,r1,r0 + tst_s r1,r1 + mov.n r0,87654321 */ +/* { dg-final { scan-assembler "mpy(_s)\\s+(\[^,\]*).*\n\\s*tst(_s)\\s+\\2,\\2\n\\s*mov\.n\\s+r0,87654321" } } */ +int check_n_flag (int a, int b) +{ + if (a * b >= 0) + return a; + else + return 87654321; +} + diff --git a/gcc/testsuite/gcc.target/arc64/naked-1.c b/gcc/testsuite/gcc.target/arc64/naked-1.c new file mode 100644 index 0000000000000..16771f2a6d268 --- /dev/null +++ b/gcc/testsuite/gcc.target/arc64/naked-1.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-O0" } */ +/* Check that naked functions don't place arguments on the stack at + optimisation level '-O0'. */ +extern void bar (int); + +void __attribute__((naked)) +foo (int n, int m) +{ + bar (n + m); +} +/* { dg-final { scan-assembler "\tbl\t@bar" } } */ + +/* Look for things that would appear in a non-naked function, but which + should not appear in a naked function. */ +/* { dg-final { scan-assembler-not "\tj.*\\\[blink\\\]" } } */ +/* { dg-final { scan-assembler-not "\tst.* " } } */ +/* { dg-final { scan-assembler-not "\tmov.*fp,sp" } } */ diff --git a/gcc/testsuite/gcc.target/arc64/naked-2.c b/gcc/testsuite/gcc.target/arc64/naked-2.c new file mode 100644 index 0000000000000..7bd0bcae9126a --- /dev/null +++ b/gcc/testsuite/gcc.target/arc64/naked-2.c @@ -0,0 +1,5 @@ +/* Verify proper errors are generated for invalid code. */ +void __attribute__ ((interrupt, naked)) +foo (void) +{ /* { dg-error "are mutually exclusive" } */ +} diff --git a/gcc/testsuite/gcc.target/arc64/scaling-128.c b/gcc/testsuite/gcc.target/arc64/scaling-128.c new file mode 100644 index 0000000000000..43a701112059c --- /dev/null +++ b/gcc/testsuite/gcc.target/arc64/scaling-128.c @@ -0,0 +1,49 @@ +/* { dg-do compile } */ +/* { dg-options "-O0 -mno-fpmov" } */ +/* { dg-additional-options "-m128" { target { hs6x } } } */ +/* { dg-additional-options "-mll64" { target { hs5x } } } */ + +/* Address scaling for double loads and stores must multiply + the "offset" with the size of a single element and not + the double of it. For example: + + Wrong code generation: + ldd r2,[r1] + std r2,[r0] + lddl.as r2,[r1,1] + stdl.as r2,[r0,1] + ... + + Correct code generation: + lddl r2,[r1] + stdl r2,[r0] + lddl.as r2,[r1,2] + stdl.as r2,[r0,2] + ... +*/ + +/* Must generate: + lddl r0,[r3] + stdl r0,[r2] + lddl.as r0,[r3,2] + stdl.as r0,[r2,2] +*/ +void func() +{ + char buf[32]; + __builtin_strcpy(buf, "ABCDEFGHIJKLMNOPQRSTUVWXYZ23456"); +} + +/* { dg-final { scan-assembler "lddl\\s+r.,\\\[r.\\\]" { target { hs6x } } } } */ +/* { dg-final { scan-assembler "stdl\\s+r.,\\\[r.\\\]" { target { hs6x } } } } */ +/* { dg-final { scan-assembler "lddl.as\\s+r.,\\\[r.,2\\\]" { target { hs6x } } } } */ +/* { dg-final { scan-assembler "stdl.as\\s+r.,\\\[r.,2\\\]" { target { hs6x } } } } */ + +/* { dg-final { scan-assembler "ldd\\s+r.,\\\[r.\\\]" { target { hs5x } } } } */ +/* { dg-final { scan-assembler "std\\s+r.,\\\[r.\\\]" { target { hs5x } } } } */ +/* { dg-final { scan-assembler "ldd.as\\s+r.,\\\[r.,2\\\]" { target { hs5x } } } } */ +/* { dg-final { scan-assembler "std.as\\s+r.,\\\[r.,2\\\]" { target { hs5x } } } } */ +/* { dg-final { scan-assembler "ldd.as\\s+r.,\\\[r.,4\\\]" { target { hs5x } } } } */ +/* { dg-final { scan-assembler "std.as\\s+r.,\\\[r.,4\\\]" { target { hs5x } } } } */ +/* { dg-final { scan-assembler "ldd.as\\s+r.,\\\[r.,6\\\]" { target { hs5x } } } } */ +/* { dg-final { scan-assembler "std.as\\s+r.,\\\[r.,6\\\]" { target { hs5x } } } } */ diff --git a/gcc/testsuite/gcc.target/arc64/scaling-64.c b/gcc/testsuite/gcc.target/arc64/scaling-64.c new file mode 100644 index 0000000000000..4474bee50b913 --- /dev/null +++ b/gcc/testsuite/gcc.target/arc64/scaling-64.c @@ -0,0 +1,48 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target hs5x } */ +/* { dg-options "-mll64 -O0 -mno-fpmov" } */ + +/* Address scaling for double loads and stores must multiply + the "offset" with the size of a single element and not + the double of it. For example: + + Wrong code generation: + ldd r2,[r1] + std r2,[r0] + ldd.as r2,[r1,1] + std.as r2,[r0,1] + ... + + Correct code generation: + ldd r2,[r1] + std r2,[r0] + ldd.as r2,[r1,2] + std.as r2,[r0,2] + ... +*/ + +/* Must generate: + + ldd r2,[r1] + std r2,[r0] + ldd.as r2,[r1,2] + std.as r2,[r0,2] + ldd.as r2,[r1,4] + std.as r2,[r0,4] + ldd.as r2,[r1,6] + std.as r2,[r0,6] +*/ +void func() +{ + char buf[32]; + __builtin_strcpy(buf, "ABCDEFGHIJKLMNOPQRSTUVWXYZ23456"); +} + +/* { dg-final { scan-assembler "ldd\\s+r.,\\\[r.\\\]" } } */ +/* { dg-final { scan-assembler "std\\s+r.,\\\[r.\\\]" } } */ +/* { dg-final { scan-assembler "ldd.as\\s+r.,\\\[r.,2\\\]" } } */ +/* { dg-final { scan-assembler "std.as\\s+r.,\\\[r.,2\\\]" } } */ +/* { dg-final { scan-assembler "ldd.as\\s+r.,\\\[r.,4\\\]" } } */ +/* { dg-final { scan-assembler "std.as\\s+r.,\\\[r.,4\\\]" } } */ +/* { dg-final { scan-assembler "ldd.as\\s+r.,\\\[r.,6\\\]" } } */ +/* { dg-final { scan-assembler "std.as\\s+r.,\\\[r.,6\\\]" } } */ diff --git a/gcc/testsuite/gcc.target/arc64/split128b.c b/gcc/testsuite/gcc.target/arc64/split128b.c new file mode 100644 index 0000000000000..9a7e8f977da11 --- /dev/null +++ b/gcc/testsuite/gcc.target/arc64/split128b.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target hs6x } */ +/* { dg-options "-O2 -m128 -fpic" } */ + +struct tst { + long a; + long b; +} static var; + +void bar (long, struct tst); + +void foo (void) +{ + bar (0, var); +} + +/* { dg-final { scan-assembler "ldl\\s+r1,\\\[pcl,@var@pcl\\\]" } } */ +/* { dg-final { scan-assembler "ldl\\s+r2,\\\[pcl,@var@pcl\\\+8\\\]" } } */ diff --git a/gcc/testsuite/gcc.target/arc64/symbol-1.c b/gcc/testsuite/gcc.target/arc64/symbol-1.c new file mode 100644 index 0000000000000..f1feea02289a6 --- /dev/null +++ b/gcc/testsuite/gcc.target/arc64/symbol-1.c @@ -0,0 +1,14 @@ +/* { dg-options "-O2 -fPIC" } */ + +struct { + int a; + int b; +} _rtld_local; + +extern void bar (int *); +void foo (void) +{ + bar(&_rtld_local.b); +} + +/* { dg-final { scan-assembler "_rtld_local@gotpc" } } */ diff --git a/gcc/testsuite/gcc.target/arc64/symbol-2.c b/gcc/testsuite/gcc.target/arc64/symbol-2.c new file mode 100644 index 0000000000000..6179e388aaf8a --- /dev/null +++ b/gcc/testsuite/gcc.target/arc64/symbol-2.c @@ -0,0 +1,23 @@ +/* Test whether a symbol ends up to be accessed directly. */ +/* { dg-do compile { target fpic } } */ +/* { dg-options "-O2 -fpic -w" } */ +/* { dg-final { scan-assembler-times "@_nl_C_locobj" 1 } } */ +/* { dg-final { scan-assembler-times "@_nl_C_locobj@gotpc" 1 } } */ +struct { + int a; +} _nl_C_locobj; + +int b; +c() +{ + char *d; + for (; d[0];) + if (b) + if (c < '9') + { + char e = ({ (&_nl_C_locobj)->a; }); + if (e == 'i' && f()) + f(0, 0, 0, &_nl_C_locobj); + e == 'n' && f(0, 0, 0, _nl_C_locobj); + } +} diff --git a/gcc/testsuite/gcc.target/arc64/tswap.c b/gcc/testsuite/gcc.target/arc64/tswap.c new file mode 100644 index 0000000000000..5800e7cf9875d --- /dev/null +++ b/gcc/testsuite/gcc.target/arc64/tswap.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target hs6x } */ +/* { dg-options "-O2 -msimd -ftree-vectorize" } */ + +/* Enable this test when HS5x recognizes various vector permutations + operations. */ + +struct{ + unsigned short x1; + unsigned short x2; +} vara, varb; + +void foo (void) +{ + vara.x1 = varb.x2; + vara.x2 = varb.x1; +} +/* { dg-final { scan-assembler "swap\\s+r\\d+,r\\d+" } } */ diff --git a/gcc/testsuite/gcc.target/arc64/vshuf-4.inc b/gcc/testsuite/gcc.target/arc64/vshuf-4.inc new file mode 100644 index 0000000000000..309671ace09f5 --- /dev/null +++ b/gcc/testsuite/gcc.target/arc64/vshuf-4.inc @@ -0,0 +1,154 @@ +#define N 4 +#define TESTS \ +T (0, 0, 1, 2, 3) \ +T (1, 0, 0, 0, 0) \ +T (2, 6, 3, 1, 5) \ +T (3, 1, 3, 5, 0) \ +T (4, 6, 3, 5, 4) \ +T (5, 6, 4, 1, 0) \ +T (6, 6, 1, 4, 2) \ +T (7, 3, 7, 4, 4) \ +T (8, 3, 2, 4, 4) \ +T (9, 3, 2, 6, 1) \ +T (10, 5, 4, 5, 6) \ +T (11, 1, 4, 0, 7) \ +T (12, 1, 5, 7, 2) \ +T (13, 2, 3, 0, 4) \ +T (14, 7, 6, 4, 2) \ +T (15, 6, 1, 3, 4) \ +T (16, 0, 2, 4, 6) \ +T (17, 1, 3, 5, 7) \ +T (18, 3, 3, 3, 3) \ +T (19, 3, 2, 1, 0) \ +T (20, 0, 4, 1, 5) \ +T (21, 2, 6, 3, 7) \ +T (22, 1, 2, 3, 0) \ +T (23, 2, 1, 0, 3) \ +T (24, 2, 5, 6, 3) \ +T (25, 0, 1, 4, 5) +#define EXPTESTS \ +T (116, 1, 2, 4, 3) \ +T (117, 7, 3, 3, 0) \ +T (118, 5, 3, 2, 7) \ +T (119, 0, 3, 5, 6) \ +T (120, 0, 0, 1, 5) \ +T (121, 4, 6, 2, 1) \ +T (123, 4, 6, 3, 2) \ +T (124, 4, 7, 5, 6) \ +T (125, 0, 4, 2, 4) \ +T (126, 2, 4, 6, 0) \ +T (127, 4, 3, 5, 1) \ +T (128, 5, 4, 7, 3) \ +T (129, 7, 5, 6, 4) \ +T (130, 2, 0, 5, 3) \ +T (131, 7, 4, 3, 0) \ +T (132, 6, 1, 3, 1) \ +T (133, 3, 4, 1, 7) \ +T (134, 0, 0, 6, 4) \ +T (135, 6, 4, 0, 1) \ +T (136, 6, 0, 2, 4) \ +T (137, 1, 3, 4, 2) \ +T (138, 3, 1, 2, 4) \ +T (139, 3, 1, 5, 0) \ +T (140, 1, 6, 0, 2) \ +T (141, 0, 2, 6, 4) \ +T (142, 1, 7, 5, 2) \ +T (143, 7, 0, 4, 1) \ +T (144, 7, 3, 5, 6) \ +T (145, 0, 7, 5, 4) \ +T (146, 6, 4, 2, 3) \ +T (147, 1, 5, 7, 6) \ +T (148, 5, 7, 4, 7) \ +T (149, 5, 2, 2, 0) \ +T (150, 7, 1, 6, 4) \ +T (151, 5, 2, 4, 6) \ +T (152, 5, 0, 4, 6) \ +T (153, 4, 2, 7, 3) \ +T (154, 7, 1, 0, 6) \ +T (155, 0, 4, 2, 5) \ +T (156, 3, 4, 3, 2) \ +T (157, 2, 0, 6, 1) \ +T (158, 5, 1, 7, 4) \ +T (159, 2, 1, 5, 6) \ +T (160, 1, 6, 5, 7) \ +T (161, 2, 4, 1, 6) \ +T (162, 3, 7, 1, 6) \ +T (163, 2, 1, 4, 7) \ +T (164, 4, 2, 1, 0) \ +T (165, 0, 7, 1, 3) \ +T (166, 7, 4, 2, 3) \ +T (167, 4, 5, 3, 5) \ +T (168, 1, 5, 6, 7) \ +T (169, 6, 3, 2, 0) \ +T (170, 6, 2, 1, 5) \ +T (171, 5, 6, 1, 3) \ +T (172, 2, 2, 3, 1) \ +T (173, 5, 4, 3, 5) \ +T (174, 7, 3, 4, 1) \ +T (175, 4, 2, 3, 6) \ +T (176, 7, 6, 5, 3) \ +T (177, 7, 2, 0, 6) \ +T (178, 1, 3, 0, 2) \ +T (179, 5, 3, 0, 5) \ +T (180, 4, 6, 7, 2) \ +T (181, 4, 5, 2, 0) \ +T (182, 5, 0, 1, 2) \ +T (183, 2, 3, 4, 1) \ +T (184, 2, 6, 5, 1) \ +T (185, 0, 6, 7, 4) \ +T (186, 4, 1, 6, 2) \ +T (187, 1, 3, 2, 3) \ +T (188, 2, 5, 4, 3) \ +T (189, 2, 5, 6, 4) \ +T (190, 4, 0, 5, 0) \ +T (191, 2, 1, 6, 0) \ +T (192, 7, 5, 0, 1) \ +T (193, 3, 5, 6, 7) \ +T (194, 0, 1, 2, 7) \ +T (195, 3, 1, 0, 2) \ +T (196, 2, 4, 6, 3) \ +T (197, 6, 0, 5, 4) \ +T (198, 6, 5, 7, 1) \ +T (199, 2, 5, 4, 6) \ +T (200, 7, 2, 3, 6) \ +T (201, 3, 5, 7, 3) \ +T (202, 1, 7, 4, 6) \ +T (203, 4, 0, 7, 1) \ +T (204, 7, 1, 0, 4) \ +T (205, 5, 1, 3, 4) \ +T (206, 0, 7, 3, 5) \ +T (207, 3, 2, 1, 5) \ +T (208, 7, 5, 0, 2) \ +T (209, 7, 0, 6, 3) \ +T (210, 6, 6, 7, 7) \ +T (211, 5, 6, 0, 4) \ +T (212, 5, 1, 2, 2) \ +T (213, 7, 1, 2, 6) \ +T (214, 5, 4, 2, 6) \ +T (215, 1, 5, 6, 4) \ +T (216, 7, 0, 2, 1) \ +T (217, 1, 5, 3, 6) \ +T (218, 3, 3, 6, 5) \ +T (219, 2, 3, 5, 7) \ +T (220, 2, 4, 3, 0) \ +T (221, 1, 5, 6, 3) \ +T (222, 7, 5, 1, 5) \ +T (223, 0, 5, 3, 4) \ +T (224, 2, 3, 1, 4) \ +T (225, 2, 3, 5, 1) \ +T (226, 4, 3, 1, 0) \ +T (227, 2, 3, 5, 5) \ +T (228, 1, 1, 1, 1) \ +T (229, 2, 2, 2, 2) \ +T (230, 3, 3, 3, 3) \ +T (231, 0, 2, 0, 2) \ +T (232, 0, 2, 4, 6) \ +T (233, 1, 3, 1, 3) \ +T (234, 1, 3, 5, 7) \ +T (235, 4, 6, 0, 2) \ +T (236, 5, 7, 1, 3) \ +T (237, 1, 0, 3, 2) \ +T (238, 0, 1, 0, 1) \ +T (239, 2, 3, 2, 3) \ +T (240, 2, 3, 6, 7) \ + diff --git a/gcc/testsuite/gcc.target/arc64/vshuf-8.inc b/gcc/testsuite/gcc.target/arc64/vshuf-8.inc new file mode 100644 index 0000000000000..31b24d40e6ab3 --- /dev/null +++ b/gcc/testsuite/gcc.target/arc64/vshuf-8.inc @@ -0,0 +1,158 @@ +#define N 8 +#define TESTS \ +T (0, 0, 1, 2, 3, 4, 5, 6, 7) \ +T (1, 0, 0, 0, 0, 0, 0, 0, 0) \ +T (2, 0, 1, 2, 3, 0, 1, 2, 3) \ +T (3, 0, 10, 3, 8, 1, 11, 10, 2) \ +T (4, 0, 1, 2, 3, 3, 0, 2, 1) \ +T (5, 12, 5, 14, 9, 8, 15, 1, 7) \ +T (6, 9, 10, 11, 8, 4, 15, 14, 12) \ +T (7, 4, 10, 14, 9, 11, 1, 12, 11) \ +T (8, 15, 3, 3, 8, 5, 11, 2, 10) \ +T (9, 13, 11, 10, 15, 8, 5, 14, 8) \ +T (10, 9, 13, 12, 14, 10, 1, 5, 14) \ +T (11, 5, 11, 12, 6, 3, 2, 4, 15) \ +T (12, 5, 13, 14, 8, 4, 10, 4, 12) \ +T (13, 14, 8, 12, 3, 13, 9, 5, 4) \ +T (14, 15, 3, 13, 6, 14, 12, 10, 0) \ +T (15, 0, 5, 11, 7, 4, 6, 14, 1) \ +T (16, 0, 2, 4, 6, 8, 10, 12, 14) \ +T (17, 1, 3, 5, 7, 9, 11, 13, 15) \ +T (18, 3, 3, 3, 3, 3, 3, 3, 3) \ +T (19, 7, 6, 5, 4, 3, 2, 1, 0) \ +T (20, 0, 8, 1, 9, 2, 10, 3, 11) \ +T (21, 4, 12, 5, 13, 6, 14, 7, 15) \ +T (22, 1, 2, 3, 4, 5, 6, 7, 0) \ +T (23, 6, 5, 4, 3, 2, 1, 0, 7) \ +T (24, 0, 1, 2, 3, 8, 9, 10, 11) \ +T (25, 0, 1, 2, 3, 12, 13, 14, 15) \ +T (26, 0, 1, 8, 9, 10, 11, 12, 13) \ +T (27, 0, 8, 9, 10, 11, 12, 13, 14) +#define EXPTESTS \ +T (116, 9, 3, 9, 4, 7, 0, 0, 6) \ +T (117, 4, 14, 12, 8, 9, 6, 0, 10) \ +T (118, 10, 12, 1, 3, 4, 11, 9, 2) \ +T (119, 4, 11, 9, 5, 8, 14, 0, 2) \ +T (120, 0, 10, 8, 6, 4, 9, 7, 5) \ +T (121, 10, 15, 0, 4, 12, 9, 7, 3) \ +T (122, 13, 6, 0, 7, 5, 12, 12, 2) \ +T (123, 4, 10, 11, 15, 12, 7, 3, 8) \ +T (124, 1, 13, 15, 9, 6, 5, 7, 4) \ +T (125, 7, 13, 2, 5, 2, 8, 1, 6) \ +T (126, 4, 5, 2, 12, 14, 13, 8, 6) \ +T (127, 4, 10, 2, 7, 11, 15, 9, 0) \ +T (128, 14, 8, 12, 10, 13, 3, 11, 2) \ +T (129, 10, 8, 14, 9, 5, 1, 15, 7) \ +T (130, 12, 2, 9, 13, 5, 14, 1, 15) \ +T (131, 10, 4, 1, 14, 11, 15, 9, 2) \ +T (132, 9, 12, 7, 4, 2, 1, 0, 9) \ +T (133, 11, 15, 4, 10, 3, 12, 13, 5) \ +T (134, 1, 5, 6, 13, 14, 15, 3, 10) \ +T (135, 0, 14, 6, 10, 1, 13, 3, 2) \ +T (136, 2, 11, 1, 5, 12, 13, 3, 5) \ +T (137, 7, 11, 4, 1, 12, 10, 14, 10) \ +T (138, 3, 6, 7, 13, 11, 15, 10, 0) \ +T (139, 10, 8, 7, 14, 1, 11, 0, 13) \ +T (140, 5, 3, 13, 11, 8, 5, 12, 6) \ +T (141, 15, 2, 12, 13, 10, 8, 5, 0) \ +T (142, 13, 9, 10, 2, 11, 3, 4, 1) \ +T (143, 2, 14, 6, 5, 1, 8, 15, 0) \ +T (144, 12, 9, 14, 10, 1, 3, 11, 13) \ +T (145, 12, 13, 11, 2, 5, 6, 4, 8) \ +T (146, 5, 0, 2, 1, 6, 8, 15, 13) \ +T (147, 8, 7, 1, 3, 5, 11, 14, 15) \ +T (148, 0, 9, 2, 15, 3, 1, 8, 10) \ +T (149, 4, 14, 11, 6, 8, 5, 12, 7) \ +T (150, 5, 9, 10, 12, 14, 15, 2, 7) \ +T (151, 11, 6, 5, 7, 11, 14, 2, 1) \ +T (152, 13, 1, 7, 4, 6, 8, 15, 9) \ +T (153, 10, 12, 9, 1, 6, 7, 8, 15) \ +T (154, 8, 4, 5, 1, 3, 0, 7, 13) \ +T (155, 13, 9, 3, 4, 10, 1, 15, 7) \ +T (156, 13, 8, 2, 7, 0, 6, 3, 6) \ +T (157, 15, 15, 13, 6, 0, 5, 14, 4) \ +T (158, 13, 1, 2, 2, 7, 9, 2, 6) \ +T (159, 5, 12, 10, 13, 6, 1, 4, 7) \ +T (160, 0, 2, 9, 1, 5, 11, 14, 11) \ +T (161, 14, 6, 5, 10, 3, 2, 15, 4) \ +T (162, 3, 10, 0, 1, 13, 14, 11, 15) \ +T (163, 13, 7, 5, 9, 5, 0, 11, 4) \ +T (164, 2, 11, 1, 12, 3, 13, 4, 9) \ +T (165, 1, 0, 10, 11, 5, 13, 4, 3) \ +T (166, 3, 9, 1, 12, 15, 14, 10, 5) \ +T (167, 3, 10, 11, 14, 5, 1, 8, 12) \ +T (168, 10, 15, 5, 14, 4, 13, 6, 3) \ +T (169, 1, 8, 6, 4, 11, 13, 7, 10) \ +T (170, 8, 7, 1, 15, 11, 9, 0, 3) \ +T (171, 4, 0, 11, 7, 1, 15, 3, 13) \ +T (172, 14, 7, 3, 4, 9, 11, 0, 6) \ +T (173, 7, 3, 11, 4, 8, 2, 10, 15) \ +T (174, 7, 9, 14, 2, 0, 5, 13, 3) \ +T (175, 4, 8, 5, 9, 3, 11, 1, 14) \ +T (176, 13, 12, 3, 3, 1, 4, 8, 5) \ +T (177, 7, 12, 9, 13, 10, 4, 5, 8) \ +T (178, 14, 3, 12, 7, 2, 6, 5, 5) \ +T (179, 7, 0, 8, 6, 2, 14, 12, 15) \ +T (180, 2, 12, 0, 4, 1, 15, 11, 10) \ +T (181, 0, 7, 5, 12, 15, 10, 14, 3) \ +T (182, 3, 10, 2, 5, 11, 6, 13, 14) \ +T (183, 3, 2, 7, 11, 0, 13, 8, 10) \ +T (184, 0, 12, 15, 1, 9, 2, 11, 4) \ +T (185, 11, 15, 8, 10, 9, 1, 13, 3) \ +T (186, 12, 13, 15, 12, 3, 9, 5, 7) \ +T (187, 4, 10, 5, 6, 1, 11, 0, 11) \ +T (188, 11, 6, 7, 9, 0, 8, 14, 8) \ +T (189, 1, 15, 6, 9, 12, 6, 7, 14) \ +T (190, 1, 5, 6, 11, 12, 13, 3, 0) \ +T (191, 0, 8, 15, 13, 12, 6, 1, 4) \ +T (192, 12, 15, 8, 4, 2, 0, 9, 5) \ +T (193, 14, 5, 13, 10, 12, 11, 0, 1) \ +T (194, 12, 1, 9, 8, 10, 9, 0, 2) \ +T (195, 11, 0, 13, 4, 6, 2, 14, 15) \ +T (196, 0, 10, 6, 2, 12, 4, 9, 13) \ +T (197, 7, 12, 8, 10, 1, 0, 5, 0) \ +T (198, 12, 13, 0, 5, 3, 14, 11, 4) \ +T (199, 9, 1, 4, 14, 10, 12, 15, 6) \ +T (200, 3, 12, 13, 6, 14, 2, 1, 6) \ +T (201, 5, 14, 8, 10, 1, 12, 2, 0) \ +T (202, 5, 8, 2, 7, 4, 15, 14, 2) \ +T (203, 14, 13, 10, 9, 11, 15, 7, 8) \ +T (204, 12, 13, 14, 2, 4, 9, 5, 7) \ +T (205, 0, 7, 5, 4, 7, 13, 6, 8) \ +T (206, 7, 0, 15, 6, 12, 2, 5, 4) \ +T (207, 8, 6, 0, 1, 1, 11, 1, 9) \ +T (208, 11, 6, 14, 9, 5, 3, 7, 13) \ +T (209, 12, 3, 15, 9, 1, 0, 8, 13) \ +T (210, 11, 1, 9, 8, 7, 6, 12, 2) \ +T (211, 10, 9, 2, 6, 8, 11, 0, 4) \ +T (212, 10, 13, 15, 9, 6, 15, 14, 10) \ +T (213, 9, 5, 8, 3, 4, 7, 11, 4) \ +T (214, 1, 2, 13, 5, 8, 4, 3, 6) \ +T (215, 8, 3, 2, 4, 9, 14, 12, 13) \ +T (216, 5, 7, 8, 15, 3, 1, 10, 4) \ +T (217, 2, 9, 3, 2, 14, 11, 5, 7) \ +T (218, 15, 6, 4, 10, 14, 3, 13, 2) \ +T (219, 0, 8, 14, 5, 15, 7, 10, 1) \ +T (220, 14, 0, 6, 10, 8, 2, 7, 4) \ +T (221, 15, 13, 3, 14, 11, 2, 14, 6) \ +T (222, 8, 2, 10, 13, 1, 0, 4, 11) \ +T (223, 7, 15, 2, 9, 1, 12, 11, 3) \ +T (224, 13, 15, 3, 12, 15, 7, 0, 8) \ +T (225, 0, 2, 1, 11, 14, 3, 9, 14) \ +T (226, 12, 14, 3, 15, 8, 5, 1, 7) \ +T (227, 0, 5, 13, 8, 4, 2, 1, 3) \ +T (228, 0, 2, 4, 6, 0, 2, 4, 6) \ +T (229, 0, 1, 4, 5, 0, 1, 4, 5) \ +T (230, 0, 1, 2, 3, 0, 1, 2, 3) \ +T (231, 1, 3, 5, 7, 1, 3, 5, 7) \ +T (232, 2, 3, 6, 7, 2, 3, 6, 7) \ +T (233, 4, 5, 6, 7, 4, 5, 6, 7) \ +T (234, 1, 0, 3, 2, 5, 4, 7, 6) \ +T (235, 2, 3, 0, 1, 6, 7, 4, 5) \ +T (236, 4, 5, 6, 7, 0, 1, 2, 3) \ +T (237, 0, 1, 0, 1, 2, 3, 2, 3) \ +T (238, 4, 5, 4, 5, 6, 7, 6, 7) \ +T (239, 0, 0, 2, 2, 4, 4, 6, 6) \ +T (240, 0, 1, 0, 1, 4, 5, 4, 5) \ +T (241, 1, 1, 3, 3, 5, 5, 7, 7) \ +T (242, 2, 3, 2, 3, 6, 7, 6, 7) diff --git a/gcc/testsuite/gcc.target/arc64/vshuf-main.inc b/gcc/testsuite/gcc.target/arc64/vshuf-main.inc new file mode 100644 index 0000000000000..52b75ee7365b2 --- /dev/null +++ b/gcc/testsuite/gcc.target/arc64/vshuf-main.inc @@ -0,0 +1,55 @@ +/* Driver fragment for __builtin_shuffle of any vector shape. */ + +extern void abort (void); + +#ifndef UNSUPPORTED +V a, b, c, d; + +#define T(num, msk...) \ +__attribute__((noinline, noclone)) void \ +test_##num (void) \ +{ \ + VI mask = { msk }; \ + int i; \ + c = __builtin_shuffle (a, mask); \ + d = __builtin_shuffle (a, b, mask); \ + __asm ("" : : "r" (&c), "r" (&d) : "memory"); \ + for (i = 0; i < N; ++i) \ + if (c[i] != a[mask[i] & (N - 1)]) \ + abort (); \ + else if ((mask[i] & N)) \ + { \ + if (d[i] != b[mask[i] & (N - 1)]) \ + abort (); \ + } \ + else if (d[i] != a[mask[i] & (N - 1)]) \ + abort (); \ +} +TESTS +#ifdef EXPENSIVE +EXPTESTS +#endif +#endif + +int +main () +{ +#ifndef UNSUPPORTED + int i; + for (i = 0; i < N; ++i) + { + a[i] = i + 2; + b[i] = N + i + 2; + } + +#undef T +#define T(num, msk...) \ + test_##num (); + TESTS +#ifdef EXPENSIVE + EXPTESTS +#endif +#endif + + return 0; +} diff --git a/gcc/testsuite/gcc.target/arc64/vshuf-v2hi.c b/gcc/testsuite/gcc.target/arc64/vshuf-v2hi.c new file mode 100644 index 0000000000000..19a32ca30e4e7 --- /dev/null +++ b/gcc/testsuite/gcc.target/arc64/vshuf-v2hi.c @@ -0,0 +1,17 @@ +/* { dg-options "-msimd -O2" } */ + +typedef short V __attribute__((vector_size(4))); +typedef unsigned short VI __attribute__((vector_size(4))); + +V a, b, c; +const VI maskl = {0, 0}; +const VI maskh = {1, 1}; + +void foo (void) +{ + b = __builtin_shuffle (a, maskl); + c = __builtin_shuffle (a, maskh); +} + +/* { dg-final { scan-assembler "vpack2hl" } } */ +/* { dg-final { scan-assembler "vpack2hm" } } */ diff --git a/gcc/testsuite/gcc.target/arc64/vshuf-v4hf.c b/gcc/testsuite/gcc.target/arc64/vshuf-v4hf.c new file mode 100644 index 0000000000000..d32324a02ba91 --- /dev/null +++ b/gcc/testsuite/gcc.target/arc64/vshuf-v4hf.c @@ -0,0 +1,9 @@ +/* { dg-do run } */ +/* { dg-require-effective-target dpfp } */ +/* { dg-options "-DEXPENSIVE -O2" } */ + +typedef _Float16 V __attribute__((vector_size(8))); +typedef unsigned short VI __attribute__((vector_size(8))); + +#include "vshuf-4.inc" +#include "vshuf-main.inc" diff --git a/gcc/testsuite/gcc.target/arc64/vshuf-v8hf.c b/gcc/testsuite/gcc.target/arc64/vshuf-v8hf.c new file mode 100644 index 0000000000000..b0bf4f01983fd --- /dev/null +++ b/gcc/testsuite/gcc.target/arc64/vshuf-v8hf.c @@ -0,0 +1,9 @@ +/* { dg-do run } */ +/* { dg-require-effective-target dpfp } */ +/* { dg-options "-DEXPENSIVE -O2" } */ + +typedef _Float16 V __attribute__((vector_size(16))); +typedef unsigned short VI __attribute__((vector_size(16))); + +#include "vshuf-8.inc" +#include "vshuf-main.inc" diff --git a/gcc/testsuite/gcc.target/arc64/widening_mult.c b/gcc/testsuite/gcc.target/arc64/widening_mult.c new file mode 100644 index 0000000000000..a568fcc36b6b5 --- /dev/null +++ b/gcc/testsuite/gcc.target/arc64/widening_mult.c @@ -0,0 +1,16 @@ +/* { dg-options "-O2 -msimd -ftree-vectorize" } */ + +void +foo (int N, int* c, short* a, short val) +{ + int i,j; + for (i = 0; i < N; i++) + { + for (j = 0; j < N; j++) + { + c[i * N + j]=(int)a[i * N + j] * (int)val; + } + } +} + +/* { dg-final { scan-assembler-times "vmpy2h" 2 } } */ diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 244fe2306f457..b8b766079af4b 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -775,6 +775,10 @@ proc check_effective_target_freestanding { } { # Check to see that file I/O functions are available. proc check_effective_target_fileio { } { + if { [istarget arc64-*-*] } { + return 0 + } + return [check_no_compiler_messages fileio_available executable { #include int main() { @@ -3902,6 +3906,15 @@ proc check_effective_target_arc_atomic { } { }] } +# Return 1 if this is a compiler supporting ARC atomic operations +proc check_effective_target_arc64_atomic { } { + return [check_no_compiler_messages arc_atomic assembly { + #if !defined(__ARC64_ATOMIC__) + #error NO_ATOMIC64 + #endif + }] +} + # Return 1 if this is an arm target using 32-bit instructions proc check_effective_target_arm32 { } { if { ![istarget arm*-*-*] } { @@ -8510,6 +8523,7 @@ proc check_effective_target_sync_int_128_runtime { } { proc check_effective_target_sync_long_long { } { if { [istarget i?86-*-*] || [istarget x86_64-*-*] || [istarget aarch64*-*-*] + || ([istarget arc64-*-*] && [check_effective_target_arc64_atomic]) || [istarget arm*-*-*] || [istarget alpha*-*-*] || ([istarget sparc*-*-*] && [check_effective_target_lp64]) @@ -8574,6 +8588,7 @@ proc check_effective_target_sync_long_long_runtime { } { } "" }]) || [istarget aarch64*-*-*] + || ([istarget arc64-*-*] && [check_effective_target_arc64_atomic]) || [istarget arm*-*-uclinuxfdpiceabi] || ([istarget arm*-*-linux-*] && [check_runtime sync_longlong_runtime { @@ -8643,6 +8658,7 @@ proc check_effective_target_sync_int_long { } { || [istarget powerpc*-*-*] || [istarget cris-*-*] || ([istarget sparc*-*-*] && [check_effective_target_sparc_v9]) + || ([istarget arc64-*-*] && [check_effective_target_arc64_atomic]) || ([istarget arc*-*-*] && [check_effective_target_arc_atomic]) || [check_effective_target_mips_llsc] || [istarget nvptx*-*-*] @@ -8678,7 +8694,9 @@ proc check_effective_target_sync_char_short { } { || [istarget powerpc*-*-*] || [istarget cris-*-*] || ([istarget sparc*-*-*] && [check_effective_target_sparc_v9]) - || ([istarget arc*-*-*] && [check_effective_target_arc_atomic]) + || ([istarget arc64-*-*] && [check_effective_target_arc64_atomic]) + || ([istarget arc-*-*] && [check_effective_target_arc_atomic]) + || ([istarget arceb-*-*] && [check_effective_target_arc_atomic]) || [istarget loongarch*-*-*] || [check_effective_target_mips_llsc] }}] } diff --git a/libgcc/config.host b/libgcc/config.host index 15c64989c7cf8..79739a7bd0e81 100644 --- a/libgcc/config.host +++ b/libgcc/config.host @@ -95,9 +95,12 @@ amdgcn*-*-*) cpu_type=gcn tmake_file="${tmake_file} t-softfp-sfdf t-softfp" ;; -arc*-*-*) +arc-*-* | arceb-*-*) cpu_type=arc ;; +arc[6432]*-*-*) + cpu_type=arc64 + ;; arm*-*-*) cpu_type=arm ;; @@ -445,17 +448,30 @@ amdgcn*-*-amdhsa) tmake_file="$tmake_file gcn/t-amdgcn" extra_parts="crt0.o" ;; -arc*-*-elf*) +arc-*-elf* | arceb-*-elf*) tmake_file="arc/t-arc" extra_parts="crti.o crtn.o crtend.o crtbegin.o crtendS.o crtbeginS.o" extra_parts="$extra_parts crttls.o" ;; -arc*-*-linux*) +arc-*-linux* | arceb-*-linux*) tmake_file="${tmake_file} t-slibgcc-libgcc t-slibgcc-nolc-override arc/t-arc-uClibc arc/t-arc" extra_parts="$extra_parts crti.o crtn.o" extra_parts="$extra_parts crttls.o" md_unwind_header=arc/linux-unwind.h ;; +arc32-*-elf | arc64-*-elf) + tmake_file="${tmake_file} arc64/t-arc64 arc64/t-softfp t-softfp t-softfp-sfdf" + ;; +arc32-*-linux*) + tmake_file="${tmake_file} arc64/t-softfp t-softfp t-softfp-sfdf" + tmake_file="${tmake_file} t-slibgcc-libgcc t-slibgcc-nolc-override" + md_unwind_header=arc64/linux-unwind.h + ;; +arc64-*-linux*) + tmake_file="${tmake_file} arc64/t-arc64 arc64/t-softfp t-softfp t-softfp-sfdf" + tmake_file="${tmake_file} t-slibgcc-libgcc t-slibgcc-nolc-override" + md_unwind_header=arc64/linux-unwind.h + ;; arm-wrs-vxworks7*) tmake_file="$tmake_file arm/t-arm arm/t-elf arm/t-bpabi arm/t-vxworks7" tmake_file="$tmake_file t-softfp-sfdf t-softfp-excl arm/t-softfp t-softfp" diff --git a/libgcc/config/arc64/lib2funcs.c b/libgcc/config/arc64/lib2funcs.c new file mode 100644 index 0000000000000..55f0993d0b37d --- /dev/null +++ b/libgcc/config/arc64/lib2funcs.c @@ -0,0 +1,151 @@ +/* libgcc routines for ARC64 + Copyright (C) 2019 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + + +typedef int sint64_t __attribute__ ((mode (DI))); +typedef unsigned int uint64_t __attribute__ ((mode (DI))); +typedef int word_t __attribute__ ((mode (__word__))); + + +sint64_t __muldi3 (sint64_t, sint64_t); +sint64_t __divdi3 (sint64_t, sint64_t); +sint64_t __moddi3 (sint64_t, sint64_t); + +sint64_t +__muldi3 (sint64_t a, sint64_t b) +{ + sint64_t res = 0; + uint64_t cnt = a; + + while (cnt) + { + if (cnt & 1) + res += b; + b <<= 1; + cnt >>= 1; + } + return res; +} + +/* Unsigned integer division/modulus. */ + +static inline __attribute__ ((__always_inline__)) +uint64_t +udivmoddi4 (uint64_t num, uint64_t den, word_t modwanted) +{ + uint64_t bit = 1; + uint64_t res = 0; + + while (den < num && bit && !(den & (1LL << 63))) + { + den <<= 1; + bit <<= 1; + } + while (bit) + { + if (num >= den) + { + num -= den; + res |= bit; + } + bit >>= 1; + den >>= 1; + } + if (modwanted) + return num; + return res; +} + +sint64_t +__divdi3 (sint64_t a, sint64_t b) +{ + word_t neg = 0; + sint64_t res; + + if (a < 0) + { + a = -a; + neg = !neg; + } + + if (b < 0) + { + b = -b; + neg = !neg; + } + + res = udivmoddi4 (a, b, 0); + + if (neg) + res = -res; + + return res; +} + +sint64_t +__moddi3 (sint64_t a, sint64_t b) +{ + word_t neg = 0; + sint64_t res; + + if (a < 0) + { + a = -a; + neg = 1; + } + + if (b < 0) + b = -b; + + res = udivmoddi4 (a, b, 1); + + if (neg) + res = -res; + + return res; +} + +uint64_t +__udivdi3 (uint64_t a, uint64_t b) +{ + return udivmoddi4 (a, b, 0); +} + +uint64_t +__umoddi3 (uint64_t a, uint64_t b) +{ + return udivmoddi4 (a, b, 1); +} + +/* We need 32bit version for some of the functions defined in + libgcc2.c. */ +#define LIBGCC2_UNITS_PER_WORD 4 + +#define L_clzsi2 +#define L_ctzsi2 +#define L_ffssi2 +#define L_paritysi2 +#define L_popcountsi2 + +#include "libgcc2.c" diff --git a/libgcc/config/arc64/linux-unwind.h b/libgcc/config/arc64/linux-unwind.h new file mode 100644 index 0000000000000..359f4c634d46a --- /dev/null +++ b/libgcc/config/arc64/linux-unwind.h @@ -0,0 +1,144 @@ +/* DWARF2 EH unwinding support for ARC64 Linux. + Copyright (C) 2021 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License + and a copy of the GCC Runtime Library Exception along with this + program; see the files COPYING3 and COPYING.RUNTIME respectively. + If not, see . */ + +#ifndef inhibit_libc +/* Do code reading to identify a signal frame, and set the frame + state data appropriately. See unwind-dw2.c for the structs. */ + +#include +#include + +/* The corresponding index in "reg_offset_map". */ +enum reg_id { + REG_RET = 5 +}; + +#define SKIP (-1) + +/* This order is defined by a structure in the kernel, in file + arch/arc/kernel/signal.c. */ + +const int +reg_offset_map[] = { + SKIP, /* bta */ + SKIP, /* lp_start */ + SKIP, /* lp_end */ + SKIP, /* lp_count */ + SKIP, /* status32 */ + SKIP, /* ret */ + 31, /* blink */ + 27, /* fp */ + 26, /* gp */ + 12, /* r12 */ + 11, /* r11 */ + 10, /* r10 */ + 9, /* r9 */ + 8, /* r8 */ + 7, /* r7 */ + 6, /* r6 */ + 5, /* r5 */ + 4, /* r4 */ + 3, /* r3 */ + 2, /* r2 */ + 1, /* r1 */ + 0, /* r0 */ + 28 /* sp */ +}; + +const size_t +reg_offset_map_size = sizeof (reg_offset_map) / sizeof (reg_offset_map[0]); + +#define MOV_R8_139 0x12c2208a +#define TRAP_S_0 0x781e +#define J_S_BLINK 0x7ee0 + +#define MD_FALLBACK_FRAME_STATE_FOR arc_fallback_frame_state + +static __attribute__((noinline)) _Unwind_Reason_Code +arc_fallback_frame_state (struct _Unwind_Context *context, + _Unwind_FrameState *fs) +{ + /* The kernel creates an rt_sigframe on the stack immediately prior + to delivering a signal. + + This structure must have the same shape as the linux kernel + equivalent. */ + struct rt_sigframe { + siginfo_t info; + ucontext_t uc; + unsigned int sigret_magic; + }; + + struct rt_sigframe *rt_; + u_int16_t *pc = (u_int16_t *) context->ra; + struct sigcontext *sc; + _Unwind_Ptr new_cfa; + size_t i; + + + /* A signal frame will have a return address pointing to + __default_sa_restorer. This code is hardwired as: + + <__default_rt_sa_restorer>: + 208a 12c2 mov r8,139 + 781e trap_s 0 + 7ee0 j_s [blink] + */ + if (pc[0] != (u_int16_t)MOV_R8_139 || pc[1] != (u_int16_t)(MOV_R8_139 >> 16) + || pc[2] != TRAP_S_0 || pc[3] != J_S_BLINK) + return _URC_END_OF_STACK; + + rt_ = context->cfa; + sc = (struct sigcontext *)&rt_->uc.uc_mcontext; + + new_cfa = (_Unwind_Ptr)sc; + fs->regs.cfa_how = CFA_REG_OFFSET; + fs->regs.cfa_reg = __LIBGCC_STACK_POINTER_REGNUM__; + fs->regs.cfa_offset = new_cfa - (_Unwind_Ptr)context->cfa; + + unsigned long *regs = &sc->regs.scratch.bta; + for (i = 0; i < reg_offset_map_size; ++i) + { + if (reg_offset_map[i] == SKIP) + continue; + fs->regs.reg[reg_offset_map[i]].how = REG_SAVED_OFFSET; + fs->regs.reg[reg_offset_map[i]].loc.offset + = ((_Unwind_Ptr)&(regs[i])) - new_cfa; + } + + fs->signal_frame = 1; + fs->retaddr_column = __LIBGCC_DWARF_ALT_FRAME_RETURN_COLUMN__; + fs->regs.reg[fs->retaddr_column].how = REG_SAVED_VAL_OFFSET; + fs->regs.reg[fs->retaddr_column].loc.offset = + ((_Unwind_Ptr) (regs[REG_RET])) - new_cfa; + + return _URC_NO_REASON; +} + +#endif /* ifndef inhibit_libc */ + +/* TODO: There was once an arc_frob_update_context () dwelling here. + Check if it is still needed. "cleanup" tests are fine without it. + glibc tests (nptl/tst-* and debug/tst-backtrace*) should shed more + light on it. */ diff --git a/libgcc/config/arc64/sfp-machine.h b/libgcc/config/arc64/sfp-machine.h new file mode 100644 index 0000000000000..59c2d89619122 --- /dev/null +++ b/libgcc/config/arc64/sfp-machine.h @@ -0,0 +1,84 @@ +#ifdef __ARC64_ARCH32__ + +#define _FP_W_TYPE_SIZE 32 +#define _FP_W_TYPE unsigned long +#define _FP_WS_TYPE signed long +#define _FP_I_TYPE long + +#define _FP_MUL_MEAT_S(R,X,Y) \ + _FP_MUL_MEAT_1_wide(_FP_WFRACBITS_S,R,X,Y,umul_ppmm) +#define _FP_MUL_MEAT_D(R,X,Y) \ + _FP_MUL_MEAT_2_wide(_FP_WFRACBITS_D,R,X,Y,umul_ppmm) +#define _FP_MUL_MEAT_Q(R,X,Y) \ + _FP_MUL_MEAT_4_wide(_FP_WFRACBITS_Q,R,X,Y,umul_ppmm) + +#define _FP_DIV_MEAT_S(R,X,Y) _FP_DIV_MEAT_1_udiv_norm(S,R,X,Y) +#define _FP_DIV_MEAT_D(R,X,Y) _FP_DIV_MEAT_2_udiv(D,R,X,Y) +#define _FP_DIV_MEAT_Q(R,X,Y) _FP_DIV_MEAT_4_udiv(Q,R,X,Y) + +#define _FP_NANFRAC_S _FP_QNANBIT_S +#define _FP_NANFRAC_D _FP_QNANBIT_D, 0 +#define _FP_NANFRAC_Q _FP_QNANBIT_Q, 0, 0, 0 + +#else + +#define _FP_W_TYPE_SIZE 64 +#define _FP_W_TYPE unsigned long long +#define _FP_WS_TYPE signed long long +#define _FP_I_TYPE long long + +#define _FP_MUL_MEAT_S(R,X,Y) \ + _FP_MUL_MEAT_1_imm(_FP_WFRACBITS_S,R,X,Y) +#define _FP_MUL_MEAT_D(R,X,Y) \ + _FP_MUL_MEAT_1_wide(_FP_WFRACBITS_D,R,X,Y,umul_ppmm) +#define _FP_MUL_MEAT_Q(R,X,Y) \ + _FP_MUL_MEAT_2_wide_3mul(_FP_WFRACBITS_Q,R,X,Y,umul_ppmm) + +#define _FP_DIV_MEAT_S(R,X,Y) _FP_DIV_MEAT_1_imm(S,R,X,Y,_FP_DIV_HELP_imm) +#define _FP_DIV_MEAT_D(R,X,Y) _FP_DIV_MEAT_1_udiv_norm(D,R,X,Y) +#define _FP_DIV_MEAT_Q(R,X,Y) _FP_DIV_MEAT_2_udiv(Q,R,X,Y) + +#define _FP_NANFRAC_H _FP_QNANBIT_H +#define _FP_NANFRAC_S _FP_QNANBIT_S +#define _FP_NANFRAC_D _FP_QNANBIT_D +#define _FP_NANFRAC_Q _FP_QNANBIT_Q, 0 +#endif /* !__ARC64_ARC32__ */ + +#ifdef __ARC64_ARCH64__ +typedef int TItype __attribute__ ((mode (TI))); +typedef unsigned int UTItype __attribute__ ((mode (TI))); +#define TI_BITS (__CHAR_BIT__ * (int)sizeof(TItype)) +#endif + +/* The type of the result of a floating point comparison. This must + match __libgcc_cmp_return__ in GCC for the target. */ +typedef int __gcc_CMPtype __attribute__ ((mode (__libgcc_cmp_return__))); +#define CMPtype __gcc_CMPtype + +#define _FP_NANSIGN_H 0 +#define _FP_NANSIGN_S 0 +#define _FP_NANSIGN_D 0 +#define _FP_NANSIGN_Q 0 + +#define _FP_KEEPNANFRACP 0 +#define _FP_QNANNEGATEDP 0 + +#define _FP_CHOOSENAN(fs, wc, R, X, Y, OP) \ + do { \ + R##_s = _FP_NANSIGN_##fs; \ + _FP_FRAC_SET_##wc(R,_FP_NANFRAC_##fs); \ + R##_c = FP_CLS_NAN; \ + } while (0) + +/* Not checked. */ +#define _FP_TININESS_AFTER_ROUNDING 0 + +#define __LITTLE_ENDIAN 1234 +#define __BIG_ENDIAN 4321 + +# define __BYTE_ORDER __LITTLE_ENDIAN + +/* Define ALIASNAME as a strong alias for NAME. */ +# define strong_alias(name, aliasname) _strong_alias(name, aliasname) +# define _strong_alias(name, aliasname) \ + extern __typeof (name) aliasname __attribute__ ((alias (#name))); diff --git a/libgcc/config/arc64/t-arc64 b/libgcc/config/arc64/t-arc64 new file mode 100644 index 0000000000000..aeb982ec088b2 --- /dev/null +++ b/libgcc/config/arc64/t-arc64 @@ -0,0 +1,21 @@ +# GCC Makefile fragment for Synopsys DesignWare ARC + +# Copyright (C) 2019 Free Software Foundation, Inc. + +# This file is part of GCC. + +# GCC is free software; you can redistribute it and/or modify it under the +# terms of the GNU General Public License as published by the Free Software +# Foundation; either version 3, or (at your option) any later version. + +# GCC is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. + +# You should have received a copy of the GNU General Public License along +# with GCC; see the file COPYING3. If not see +# . + +LIB2ADD += $(srcdir)/config/arc64/lib2funcs.c + diff --git a/libgcc/config/arc64/t-softfp b/libgcc/config/arc64/t-softfp new file mode 100644 index 0000000000000..4110a0b22a432 --- /dev/null +++ b/libgcc/config/arc64/t-softfp @@ -0,0 +1,7 @@ +include $(srcdir)/config/arc64/t-softfp32 + +ARC_ARCH32:=$(findstring __ARC64_ARCH32__,$(shell $(gcc_compile_bare) $(MULTILIB_CFLAGS) -dM -E -