diff --git a/config.sub b/config.sub
index 38f3d037a785f..3f2c56f1b0a99 100755
--- a/config.sub
+++ b/config.sub
@@ -4,7 +4,7 @@
 
 # shellcheck disable=SC2006,SC2268 # see below for rationale
 
-timestamp='2021-10-27'
+timestamp='2022-06-15'
 
 # This file is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by
diff --git a/gcc/common/config/arc64/arc64-common.cc b/gcc/common/config/arc64/arc64-common.cc
new file mode 100644
index 0000000000000..c51630cc09ec6
--- /dev/null
+++ b/gcc/common/config/arc64/arc64-common.cc
@@ -0,0 +1,52 @@
+/* Common hooks for Synopsys DesignWare ARC
+   Copyright (C) 2019 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "diagnostic-core.h"
+#include "tm.h"
+#include "common/common-target.h"
+#include "opts.h"
+#include "flags.h"
+
+/* Set default optimization options.  */
+#define OPT_LEVELS_3_PLUS_SPEED_ONLY OPT_LEVELS_3_PLUS
+static const struct default_options arc_option_optimization_table[] =
+  {
+    { OPT_LEVELS_SIZE, OPT_ftree_loop_optimize, NULL, 0},
+    { OPT_LEVELS_SIZE, OPT_fmove_loop_invariants, NULL, 0},
+    /* Disable fomit-frame-pointer by default.  */
+    { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 },
+    /* Enable redundant extension instructions removal at -O2 and higher.  */
+    { OPT_LEVELS_2_PLUS, OPT_free, NULL, 1 },
+    { OPT_LEVELS_NONE, 0, NULL, 0 }
+  };
+
+
+#undef  TARGET_OPTION_OPTIMIZATION_TABLE
+#define TARGET_OPTION_OPTIMIZATION_TABLE arc_option_optimization_table
+
+#undef  TARGET_DEFAULT_TARGET_FLAGS
+#define TARGET_DEFAULT_TARGET_FLAGS \
+  (MASK_BITSCAN | MASK_CODE_DENSITY | ARC64_SUBTARGET_DEFAULT)
+
+#include "common/common-target-def.h"
+
+struct gcc_targetm_common targetm_common = TARGETM_COMMON_INITIALIZER;
diff --git a/gcc/config.gcc b/gcc/config.gcc
index 70d006b3f05ef..bd86169b643a2 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -344,13 +344,18 @@ amdgcn*)
 am33_2.0-*-linux*)
 	cpu_type=mn10300
 	;;
-arc*-*-*)
+arc-*-* | arceb-*-*)
 	cpu_type=arc
 	c_target_objs="arc-c.o"
 	cxx_target_objs="arc-c.o"
 	extra_options="${extra_options} arc/arc-tables.opt g.opt"
 	extra_headers="arc-simd.h"
 	;;
+arc[6432]*-*-*)
+	cpu_type=arc64
+	c_target_objs="arc64-c.o"
+	cxx_target_objs="arc64-c.o"
+	;;
 arm*-*-*)
 	cpu_type=arm
 	extra_objs="arm-builtins.o arm-mve-builtins.o aarch-common.o"
@@ -1192,7 +1197,7 @@ alpha*-dec-*vms*)
 	tm_file="${tm_file} vms/vms.h alpha/vms.h"
 	tmake_file="${tmake_file} alpha/t-vms alpha/t-alpha"
 	;;
-arc*-*-elf*)
+arc-*-elf* | arceb-*-elf*)
 	tm_file="arc/arc-arch.h dbxelf.h elfos.h newlib-stdint.h arc/elf.h ${tm_file}"
 	tmake_file="arc/t-multilib arc/t-arc"
 	extra_gcc_objs="driver-arc.o"
@@ -1213,7 +1218,7 @@ arc*-*-elf*)
 	big*)	tm_file="arc/big.h ${tm_file}"
 	esac
 	;;
-arc*-*-linux*)
+arc-*-linux* | arceb-*-linux*)
 	tm_file="arc/arc-arch.h dbxelf.h elfos.h gnu-user.h linux.h linux-android.h glibc-stdint.h arc/linux.h ${tm_file}"
 	tmake_file="${tmake_file} arc/t-multilib-linux arc/t-arc"
 	extra_gcc_objs="driver-arc.o"
@@ -1237,6 +1242,28 @@ arc*-*-linux*)
 	# automatically detect that GAS supports it, yet we require it.
 	gcc_cv_initfini_array=yes
 	;;
+arc32-*-elf*)
+	tm_file="dbxelf.h elfos.h newlib-stdint.h arc64/elf.h arc64/elf32.h ${tm_file}"
+	tmake_file="${tmake_file} arc64/t-multilib32 arc64/t-arc64"
+	;;
+arc64-*-elf*)
+	tm_file="dbxelf.h elfos.h newlib-stdint.h arc64/elf.h arc64/elf64.h ${tm_file}"
+	tmake_file="${tmake_file} arc64/t-multilib arc64/t-arc64"
+	;;
+arc32-*-linux*)
+	tm_file="dbxelf.h elfos.h gnu-user.h linux.h arc64/linux.h arc64/linux32.h linux-android.h glibc-stdint.h ${tm_file}"
+	tmake_file="${tmake_file} arc64/t-arc64"
+	# Force .init_array support.  The configure script cannot always
+	# automatically detect that GAS supports it, yet we require it.
+	gcc_cv_initfini_array=yes
+	;;
+arc64-*-linux*)
+	tm_file="dbxelf.h elfos.h gnu-user.h linux.h arc64/linux.h arc64/linux64.h linux-android.h glibc-stdint.h ${tm_file}"
+	tmake_file="${tmake_file} arc64/t-arc64"
+	# Force .init_array support.  The configure script cannot always
+	# automatically detect that GAS supports it, yet we require it.
+	gcc_cv_initfini_array=yes
+	;;
 arm-wrs-vxworks7*)
 	# We only support VxWorks 7 now on ARM, post SR600.  Pre SR600
 	# VxWorks 7 was transitory and major versions prior to 7 were based
@@ -4311,7 +4338,7 @@ case "${target}" in
 		done
 		;;
 
-	arc*-*-*)
+	arc-*-* | arceb-*-*)
 		supported_defaults="cpu fpu"
 
 		new_cpu=hs38_linux
@@ -4360,6 +4387,29 @@ case "${target}" in
 		fi
 		;;
 
+	arc[6432]*-*-*)
+		supported_defaults="fpu cpu"
+		case "$with_fpu" in
+		"" | fpus | fpud)
+			# OK
+			;;
+		*)
+			echo "Unknown floating point type used in "\
+			     "--with-fpu=$with-fpu" 1>&2
+			exit 1
+			;;
+		esac
+
+		case "$with_cpu" in
+		"" | hs5* | hs6*)
+		   # OK
+		   ;;
+		*)
+		   echo "Unknown cpu used in --with-cpu=$with_cpu" 1>&2
+		   exit 1
+		esac
+		;;
+
 	avr-*-*)
 		# Handle --with-multilib-list.
 		if test "x${with_multilib_list}" != xdefault; then
diff --git a/gcc/config/arc64/arc32.md b/gcc/config/arc64/arc32.md
new file mode 100644
index 0000000000000..12fd743215638
--- /dev/null
+++ b/gcc/config/arc64/arc32.md
@@ -0,0 +1,101 @@
+(define_mode_attr vectab [(V2HI "") (V4HI "d") (V2SI "d")])
+(define_mode_attr vmvtab [(V2HI "add") (V4HI "vadd2") (V2SI "vadd2")])
+
+;; ARCv3:32 specific instructions.
+
+(define_insn_and_split "*arc32_movdi"
+  [(set (match_operand:DI 0 "arc64_dest_operand"  "=r,r,Ustor")
+	(match_operand:DI 1 "nonimmediate_operand" "r,m,r"))]
+  "!TARGET_64BIT
+   && (register_operand (operands[0], DImode)
+       || register_operand (operands[1], DImode))"
+  "@
+  vadd2\\t%0,%1,0
+  ldd%U1\\t%0,%1
+  std%U0\\t%1,%0"
+  "&& reload_completed && arc64_split_double_move_p (operands, DImode)"
+  [(const_int 0)]
+  {
+   arc64_split_double_move (operands, DImode);
+   DONE;
+  }
+  [(set_attr "type" "vadd,ld,st")
+   (set_attr "length" "4,*,*")])
+
+(define_insn_and_split "*arc32_mov<mode>"
+  [(set (match_operand:VALL 0 "arc64_dest_operand"  "=r,r,Ustor")
+	(match_operand:VALL 1 "nonimmediate_operand" "r,m,r"))]
+  "!TARGET_64BIT && TARGET_SIMD
+   && (register_operand (operands[0], <MODE>mode)
+       || register_operand (operands[1], <MODE>mode))"
+  "@
+  <vmvtab>\\t%0,%1,0
+  ld<vectab>%U1\\t%0,%1
+  st<vectab>%U0\\t%1,%0"
+  "&& reload_completed && arc64_split_double_move_p (operands, <MODE>mode)"
+  [(const_int 0)]
+  {
+   arc64_split_double_move (operands, <MODE>mode);
+   DONE;
+  }
+  [(set_attr "type" "vadd,ld,st")
+   (set_attr "length" "4,*,*")])
+
+(define_insn "arc32_<su>vmach_hi"
+ [(set (match_operand:V2SI 0 "register_operand" "=r")
+       (plus:V2SI
+	(mult:V2SI
+	 (ANY_EXTEND:V2SI
+	  (vec_select:V2HI (match_operand:V4HI 1 "register_operand" "r")
+			   (parallel [(const_int 2) (const_int 3)])))
+	 (ANY_EXTEND:V2SI
+	  (vec_select:V2HI (match_operand:V4HI 2 "register_operand" "r")
+			   (parallel [(const_int 2) (const_int 3)]))))
+	(reg:V2SI R58_REGNUM)))
+  (clobber (reg:V2SI R58_REGNUM))]
+  "TARGET_SIMD && !TARGET_64BIT"
+  "vmac2h<su_optab>%?\\t%0,%H1,%H2"
+  [(set_attr "length" "4")
+   (set_attr "type" "vmac2h")])
+
+ (define_insn "arc32_<su>vmpy2h_hi"
+   [(set (match_operand:V2SI 0 "register_operand"  "=r")
+	 (mult:V2SI
+	  (ANY_EXTEND:V2SI
+	   (vec_select:V2HI
+	    (match_operand:V4HI 1 "register_operand" "r")
+	    (parallel [(const_int 2) (const_int 3)])))
+	  (ANY_EXTEND:V2SI
+	   (vec_select:V2HI
+	    (match_operand:V4HI 2 "register_operand" "r")
+	    (parallel [(const_int 2) (const_int 3)])))))
+    (clobber (reg:V2SI R58_REGNUM))]
+   "TARGET_SIMD && !TARGET_64BIT"
+   "vmpy2h<su_optab>\\t%0,%H1,%H2"
+   [(set_attr "length" "4")
+    (set_attr "type" "vmpy2h")])
+
+(define_insn_and_split "<optab>v2si3"
+  [(set (match_operand:V2SI 0 "register_operand" "=r")
+	(LSHIFT:V2SI (match_operand:V2SI 1 "register_operand" "r")
+		     (match_operand:SI 2 "nonmemory_operand" "ri")))]
+  "!TARGET_64BIT"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 3) (LSHIFT:SI (match_dup 4) (match_dup 2)))
+   (set (match_dup 5) (LSHIFT:SI (match_dup 6) (match_dup 2)))]
+  {
+   operands[3] = gen_lowpart (SImode, operands[0]);
+   operands[5] = gen_highpart (SImode, operands[0]);
+   operands[4] = gen_lowpart (SImode, operands[1]);
+   operands[6] = gen_highpart (SImode, operands[1]);
+   if (REG_P (operands[2])
+       && REGNO (operands[2]) == REGNO (operands[3]))
+     {
+       std::swap (operands[3], operands[5]);
+       std::swap (operands[4], operands[6]);
+     }
+  }
+  [(set_attr "length" "8")
+   (set_attr "type" "<mntab>")])
+
diff --git a/gcc/config/arc64/arc64-c.cc b/gcc/config/arc64/arc64-c.cc
new file mode 100644
index 0000000000000..4190774ed0bf8
--- /dev/null
+++ b/gcc/config/arc64/arc64-c.cc
@@ -0,0 +1,84 @@
+/* Copyright (C) 2016-2019 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.
+*/
+
+#define IN_TARGET_CODE 1
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "tree.h"
+#include "memmodel.h"
+#include "tm_p.h"
+#include "cpplib.h"
+#include "c-family/c-common.h"
+#include "target.h"
+
+#define builtin_define(TXT) cpp_define (pfile, TXT)
+#define builtin_assert(TXT) cpp_assert (pfile, TXT)
+
+/* Define or undefine macros based on the current target.  */
+
+static void
+def_or_undef_macro (cpp_reader* pfile, const char *name, bool def_p)
+{
+  if (def_p)
+    cpp_define (pfile, name);
+  else
+    cpp_undef (pfile, name);
+}
+
+/* Helper for TARGET_CPU_CPP_BUILTINS hook.  */
+
+void
+arc64_cpu_cpp_builtins (cpp_reader * pfile)
+{
+  builtin_assert ("cpu=arc64");
+  builtin_assert ("machine=arc64");
+
+  builtin_define ("__ARC64__");
+  builtin_define ("__LITTLE_ENDIAN__");
+  builtin_define ("__ARCV3__");
+
+  if (arc64_cmodel_var == ARC64_CMODEL_SMALL)
+    builtin_define ("__ARC64_CMODEL_SMALL__");
+  else if (arc64_cmodel_var == ARC64_CMODEL_MEDIUM)
+    builtin_define ("__ARC64_CMODEL_MEDIUM__");
+  else if (arc64_cmodel_var == ARC64_CMODEL_LARGE)
+    builtin_define ("__ARC64_CMODEL_LARGE__");
+
+  if (TARGET_HARD_FLOAT)
+    {
+      builtin_define ("__arc_hard_float__");
+      builtin_define ("__ARC_HARD_FLOAT__");
+      builtin_define ("__ARC_FLOAT_ABI_HARD__");
+    }
+  else
+    {
+      builtin_define ("__arc_soft_float__");
+      builtin_define ("__ARC_SOFT_FLOAT__");
+    }
+
+#undef ARC64_C_DEF
+#define ARC64_C_DEF(NAME, CONDITION)		\
+  def_or_undef_macro (pfile, NAME, CONDITION);
+
+#include "arc64-c.def"
+#undef ARC64_C_DEF
+}
diff --git a/gcc/config/arc64/arc64-c.def b/gcc/config/arc64/arc64-c.def
new file mode 100644
index 0000000000000..7f50ec3826e2d
--- /dev/null
+++ b/gcc/config/arc64/arc64-c.def
@@ -0,0 +1,35 @@
+/* Copyright (C) 2021 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.
+*/
+
+ARC64_C_DEF ("__ARC64_ATOMIC_1__", arc64_atomic_option == 1)
+ARC64_C_DEF ("__ARC64_ATOMIC_2__", arc64_atomic_option == 2)
+ARC64_C_DEF ("__ARC64_ATOMIC_3__", arc64_atomic_option == 3)
+ARC64_C_DEF ("__ARC64_UNALIGNED__", unaligned_access)
+ARC64_C_DEF ("__ARC_FPU_SP__", ARC64_HAS_FPUS)
+ARC64_C_DEF ("__ARC_FPU_DP__", ARC64_HAS_FPUD)
+ARC64_C_DEF ("__ARC64_LL64__", TARGET_LL64)
+ARC64_C_DEF ("__ARC64_M128__", TARGET_WIDE_LDST)
+ARC64_C_DEF ("__ARC64_WIDE_VECTOR__", TARGET_WIDE_SIMD)
+ARC64_C_DEF ("__ARC64_ARCH64__",  TARGET_64BIT)
+ARC64_C_DEF ("__ARC64_ARCH32__", !TARGET_64BIT)
+
+
+/* Local Variables: */
+/* mode: c */
+/* End: */
diff --git a/gcc/config/arc64/arc64-modes.def b/gcc/config/arc64/arc64-modes.def
new file mode 100644
index 0000000000000..7eeb80a881147
--- /dev/null
+++ b/gcc/config/arc64/arc64-modes.def
@@ -0,0 +1,38 @@
+/* Machine description for arc64 architecture.
+   Copyright (C) 2019 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+CC_MODE (CC_ZN);    /* Only N and Z bits of condition flags are valid.  */
+CC_MODE (CC_Z);     /* Only Z bit of condition flags is valid.  */
+CC_MODE (CC_C);     /* C represents unsigned overflow of a simple addition.  */
+CC_MODE (CC_V);     /* Only V bit of condition flag is valid.  */
+CC_MODE (CC_FPU);
+CC_MODE (CC_FPUE);
+
+/* Half-precision floating point for __fp16.  */
+FLOAT_MODE (HF, 2, 0);
+ADJUST_FLOAT_FORMAT (HF, &ieee_half_format);
+
+/* Int vector modes.  */
+VECTOR_MODES (INT, 4);        /* V2HI */
+VECTOR_MODES (INT, 8);        /* V4HI V2SI */
+
+/* FP vector modes.  */
+VECTOR_MODE (FLOAT, HF, 2);   /* V2HF */
+VECTOR_MODES (FLOAT, 8);      /* V4HF V2SF */
+VECTOR_MODES (FLOAT, 16);     /* V8HF V4SF V2DF */
diff --git a/gcc/config/arc64/arc64-opts.h b/gcc/config/arc64/arc64-opts.h
new file mode 100644
index 0000000000000..fe188200598c5
--- /dev/null
+++ b/gcc/config/arc64/arc64-opts.h
@@ -0,0 +1,34 @@
+/* Copyright (C) 2019 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef ARC64_OPTS_H
+#define ARC64_OPTS_H
+
+/* The code model defines the address generation strategy.  */
+enum arc64_code_model {
+  /* Static code and data fit within a 1MB region.
+     The default non-PIC code model.  */
+  ARC64_CMODEL_SMALL,
+  /* The default for PIC code model, static code and data fit within
+     4GB region.  Local calls will fit within 16MB region.  */
+  ARC64_CMODEL_MEDIUM,
+  /* No assumptions about addresses of code and data.  */
+  ARC64_CMODEL_LARGE
+};
+
+#endif /* ARC64_OPTS_H */
diff --git a/gcc/config/arc64/arc64-passes.def b/gcc/config/arc64/arc64-passes.def
new file mode 100644
index 0000000000000..34cbbe3dd0aaf
--- /dev/null
+++ b/gcc/config/arc64/arc64-passes.def
@@ -0,0 +1,21 @@
+/* Description of target passes for ARC64.
+   Copyright (C) 2021 Free Software Foundation, Inc. */
+
+/* This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it under
+   the terms of the GNU General Public License as published by the Free
+   Software Foundation; either version 3, or (at your option) any later
+   version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or
+   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+   for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Re-run peephole2 before reorg.  */
+INSERT_PASS_AFTER (pass_sched2, 1, pass_peephole2);
diff --git a/gcc/config/arc64/arc64-protos.h b/gcc/config/arc64/arc64-protos.h
new file mode 100644
index 0000000000000..1651fcad469d9
--- /dev/null
+++ b/gcc/config/arc64/arc64-protos.h
@@ -0,0 +1,55 @@
+#ifndef GCC_ARC64_PROTOS_H
+#define GCC_ARC64_PROTOS_H
+
+extern int arc64_epilogue_uses (int);
+extern int arc64_eh_uses (int);
+extern HOST_WIDE_INT arc64_initial_elimination_offset (unsigned, unsigned);
+extern void arc64_init_expanders (void);
+extern void arc64_cpu_cpp_builtins (cpp_reader *);
+
+#ifdef RTX_CODE
+
+extern rtx arc64_return_addr (int, rtx);
+extern machine_mode arc64_select_cc_mode (enum rtx_code, rtx, rtx);
+extern bool arc64_can_use_return_insn_p (void);
+extern void arc64_expand_call (rtx, rtx, bool);
+extern rtx arc64_gen_compare_reg (enum rtx_code, rtx, rtx);
+extern bool arc64_prepare_move_operands (rtx, rtx, machine_mode);
+extern void arc64_expand_prologue (void);
+extern void arc64_expand_epilogue (bool);
+extern bool arc64_limm_addr_p (rtx);
+extern bool arc64_is_long_call_p (rtx);
+extern bool arc64_legitimate_store_address_p (machine_mode, rtx);
+extern bool arc64_short_access_p (rtx, machine_mode, bool);
+extern rtx arc64_eh_return_handler_rtx (void);
+extern int arc64_asm_preferred_eh_data_format (int, int);
+
+extern bool arc64_check_mov_const (HOST_WIDE_INT);
+extern bool arc64_split_mov_const (rtx *);
+extern bool arc64_expand_cpymem (rtx *);
+
+extern void arc64_expand_atomic_op (enum rtx_code, rtx, rtx, rtx, rtx, rtx);
+extern void arc64_pre_atomic_barrier (enum memmodel);
+extern void arc64_post_atomic_barrier (enum memmodel);
+extern void arc64_expand_compare_and_swap (rtx []);
+extern void arc64_split_compare_and_swap (rtx []);
+extern bool arc64_allow_direct_access_p (rtx);
+extern bool arc64_use_fp_regs (machine_mode);
+extern bool arc64_fp_access_p (rtx, machine_mode);
+extern void arc64_expand_casesi (rtx []);
+extern bool arc64_split_double_move_p (rtx *, machine_mode);
+extern void arc64_split_double_move (rtx *, machine_mode);
+extern unsigned arc64_dbx_register_number (unsigned);
+extern bool arc64_expand_fvect_shr (rtx *);
+extern bool arc64_use_plt34_p (rtx);
+extern int regno_clobbered_p (unsigned int, rtx_insn *, machine_mode, int);
+extern void arc64_gen_unlikely_cbranch (enum rtx_code, machine_mode, rtx);
+extern int accumulator_bypass_p (rtx_insn *, rtx_insn *);
+extern int set_accumulator_p (rtx_insn *, rtx_insn *);
+extern const char *arc64_output_return (void);
+extern bool arc64_hard_regno_rename_ok (unsigned, unsigned);
+extern void arc64_expand_vector_init (rtx, rtx);
+
+#endif /* RTX_CODE */
+
+#endif /* GCC_ARC64_PROTOS_H */
diff --git a/gcc/config/arc64/arc64.cc b/gcc/config/arc64/arc64.cc
new file mode 100644
index 0000000000000..39704c131f405
--- /dev/null
+++ b/gcc/config/arc64/arc64.cc
@@ -0,0 +1,6767 @@
+#define IN_TARGET_CODE 1
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "memmodel.h"
+#include "backend.h"
+#include "target.h"
+#include "rtl.h"
+#include "tree.h"
+#include "cfghooks.h"
+#include "df.h"
+#include "tm_p.h"
+#include "stringpool.h"
+#include "attribs.h"
+#include "optabs.h"
+#include "regs.h"
+#include "emit-rtl.h"
+#include "recog.h"
+#include "diagnostic.h"
+#include "fold-const.h"
+#include "varasm.h"
+#include "stor-layout.h"
+#include "calls.h"
+#include "output.h"
+#include "insn-attr.h"
+#include "flags.h"
+#include "explow.h"
+#include "expr.h"
+#include "langhooks.h"
+#include "tm-constrs.h"
+#include "cfgrtl.h"
+#include "tree-pass.h"
+#include "context.h"
+#include "builtins.h"
+#include "rtl-iter.h"
+#include "alias.h"
+#include "opts.h"
+#include "dwarf2.h"
+#include "hw-doloop.h"
+
+/* This file should be included last.  */
+#include "target-def.h"
+
+/* Return true if REGNO is suited for short instructions.  */
+#define COMPACT_REG_P(REGNO)						\
+  (((signed)(REGNO) >= R0_REGNUM && (REGNO) <= R3_REGNUM)		\
+   || ((REGNO) >= R12_REGNUM && (REGNO) <= R15_REGNUM))
+
+/* Use ARC64_LPIC only if dealing with 64-bit variant of arc64.  */
+#define ARC64_MAYBE_LPIC (TARGET_64BIT ? ARC64_LPIC : ARC64_PIC)
+#define ARC64_MAYBE_LARGE (TARGET_64BIT ? ARC64_LARGE : ARC64_LO32)
+
+/* Maximum size of a loop.  */
+#define MAX_LOOP_LENGTH 4094
+#define MIN_LOOP_LENGTH -4092
+
+#define UNITS_PER_LIMM 4
+
+#define DOUBLE_LOAD_STORE ((!TARGET_64BIT && TARGET_LL64) \
+			   || (TARGET_64BIT && TARGET_WIDE_LDST))
+
+/* Logic:
+
+   HS5x (32-bit arch):
+     - no 64-bit loads and stores   -> 32-bit moves
+       - use_fpu && fpu_exists      ->   fpr
+       - else			    ->   gpr
+     - 64-bit loads and stores      -> 64-bit moves
+       - use_fpu && fpu{s,d}_exists ->   fpr
+       - else			    ->   gpr
+
+   HS6x (64-bit arch):
+     - no 128-bit loads and stores  -> 64-bit moves
+       - use_fpu && fpu_exists      ->   fpr
+       - else			    ->   gpr
+     - 128-bit loads and stores     -> 128-bit moves
+       - use_fpu && fpud_exists     ->   fpr
+       - else			    ->   gpr.  */
+
+static machine_mode cpymem_copy_mode (void)
+{
+  /* HS6x.  */
+  if (TARGET_64BIT)
+    {
+      if (!TARGET_WIDE_LDST)
+	{
+	  if (TARGET_FP_MOVE && ARC64_HAS_FPUD)
+	    return DFmode;
+	  else if (TARGET_FP_MOVE && ARC64_VFP_64)
+	    return V2SFmode;
+
+	  return DImode;
+	}
+
+      if (TARGET_FP_MOVE)
+	{
+	  if (ARC64_VFP_128)
+	    return V2DFmode;
+	}
+
+      return TImode;
+    }
+  /* HS5x.  */
+  else
+    {
+      if (!TARGET_LL64)
+	{
+	  if (TARGET_FP_MOVE && ARC64_HAS_FPUS)
+	    return SFmode;
+
+	  return SImode;
+	}
+
+      if (TARGET_FP_MOVE)
+	{
+	  /* ARC64_VFP_64 does not cover all cases YET.  */
+	  if (ARC64_VFP_64)
+	    return DFmode;
+	}
+
+      return DImode;
+    }
+}
+
+#define ARC_INVERSE_CONDITION_CODE(X)  ((X) ^ 1)
+
+/* Implement REGNO_REG_CLASS.  */
+const enum reg_class arc64_regno_to_regclass[FIRST_PSEUDO_REGISTER] =
+  {
+   AC16_REGS, AC16_REGS, AC16_REGS, AC16_REGS,
+   CORE_REGS, CORE_REGS, CORE_REGS, CORE_REGS,
+   CORE_REGS, CORE_REGS, CORE_REGS, CORE_REGS,
+   AC16_REGS, AC16_REGS, AC16_REGS, AC16_REGS,
+   CORE_REGS, CORE_REGS, CORE_REGS, CORE_REGS,
+   CORE_REGS, CORE_REGS, CORE_REGS, CORE_REGS,
+   CORE_REGS, CORE_REGS, CORE_REGS, CORE_REGS,
+   CORE_REGS, NO_REGS,   CORE_REGS, CORE_REGS,
+
+   NO_REGS, NO_REGS, NO_REGS, NO_REGS,
+   NO_REGS, NO_REGS, NO_REGS, NO_REGS,
+   NO_REGS, NO_REGS, NO_REGS, NO_REGS,
+   NO_REGS, NO_REGS, NO_REGS, NO_REGS,
+
+   NO_REGS, NO_REGS, NO_REGS, NO_REGS,
+   NO_REGS, NO_REGS, NO_REGS, NO_REGS,
+   NO_REGS, NO_REGS, GENERAL_REGS, GENERAL_REGS,
+   NO_REGS, NO_REGS, NO_REGS, NO_REGS,
+
+   FP_REGS, FP_REGS, FP_REGS, FP_REGS,
+   FP_REGS, FP_REGS, FP_REGS, FP_REGS,
+   FP_REGS, FP_REGS, FP_REGS, FP_REGS,
+   FP_REGS, FP_REGS, FP_REGS, FP_REGS,
+   FP_REGS, FP_REGS, FP_REGS, FP_REGS,
+   FP_REGS, FP_REGS, FP_REGS, FP_REGS,
+   FP_REGS, FP_REGS, FP_REGS, FP_REGS,
+   FP_REGS, FP_REGS, FP_REGS, FP_REGS,
+
+   GENERAL_REGS, GENERAL_REGS, NO_REGS,
+  };
+
+enum arc_cc_code_index
+{
+  ARC_CC_AL, ARC_CC_EQ = ARC_CC_AL+2, ARC_CC_NE, ARC_CC_P, ARC_CC_N,
+  ARC_CC_C,  ARC_CC_NC, ARC_CC_V, ARC_CC_NV,
+  ARC_CC_GT, ARC_CC_LE, ARC_CC_GE, ARC_CC_LT, ARC_CC_HI, ARC_CC_LS, ARC_CC_PNZ,
+  ARC_CC_LO = ARC_CC_C, ARC_CC_HS = ARC_CC_NC
+};
+
+typedef enum arc64_symb_type
+{
+  ARC64_UNK = 0, ARC64_LO32, ARC64_LARGE, ARC64_PIC, ARC64_LPIC, ARC64_TLS,
+  ARC64_PCREL
+} arc64_symb;
+
+/* Information about single argument.  */
+struct arc64_arg_info {
+  /* Number of integer registers allocated to this argument.  */
+  unsigned int ngpr;
+  /* Number of floating-point registers allocated to this argument.  */
+  unsigned int nfpr;
+
+  /* Offset.  */
+  unsigned int off_gpr;
+  unsigned int off_fpr;
+
+  /* Goes on stack.  */
+  bool stack_p;
+};
+
+/* Frame and machine specific info.  */
+
+struct GTY (()) arc64_frame
+{
+  HOST_WIDE_INT reg_offset[FIRST_PSEUDO_REGISTER];
+
+  /* The size of the saved callee-save int/FP registers. */
+  HOST_WIDE_INT saved_regs_size;
+
+  /* The number of extra stack bytes taken up by register varargs.
+     This area is allocated by the callee at the very top of the
+     frame.  This value is rounded up to a multiple of
+     STACK_BOUNDARY.  */
+  HOST_WIDE_INT saved_varargs_size;
+
+  HOST_WIDE_INT saved_outargs_size;
+
+  HOST_WIDE_INT saved_locals_size;
+
+  /* The size of the frame.  This value is the offset from base of the
+     frame (incomming SP) to the stack_pointer.  This value is always
+     a multiple of STACK_BOUNDARY.  */
+  HOST_WIDE_INT frame_size;
+
+  bool layout_p;
+};
+
+
+/* ARC64 function types.   */
+enum arc_function_type {
+  /* No function should have the unknown type.  This value is used to
+   indicate the that function type has not yet been computed.  */
+  ARC64_FUNCTION_UNKNOWN  = 0,
+
+  /* The normal function type indicates that the function has the
+   standard prologue and epilogue.  */
+  ARC64_FUNCTION_NORMAL  = 1L << 0,
+
+  /* These are interrupt handlers.  The name corresponds to the register
+     name that contains the return address.  */
+  ARC64_FUNCTION_ILINK   = 1L << 1,
+
+  /* The naked function type indicates that the function does not have
+   prologue or epilogue, and that no stack frame is available.  */
+  ARC64_FUNCTION_NAKED   = 1L << 2
+};
+
+/* Check if a function is an interrupt function.  */
+#define ARC_INTERRUPT_P(TYPE)  (((TYPE) & ARC64_FUNCTION_ILINK) != 0)
+
+/* Check if a function is normal, that is, has standard prologue and
+   epilogue.  */
+#define ARC_NORMAL_P(TYPE) (((TYPE) & ARC64_FUNCTION_NORMAL) != 0)
+
+/* Check if a function is naked.  */
+#define ARC_NAKED_P(TYPE) (((TYPE) & ARC64_FUNCTION_NAKED) != 0)
+
+typedef struct GTY (()) machine_function
+{
+  struct arc64_frame frame;
+  /* Record if the function has a variable argument list.  */
+  int uses_anonymous_args;
+  /* Record if the type of the current function.  */
+  unsigned int fn_type;
+} machine_function;
+
+/* IDs for all the ARC builtins.  */
+
+enum arc64_builtin_id
+  {
+#define DEF_BUILTIN(NAME, N_ARGS, TYPE, ICODE, MASK)	\
+    ARC64_BUILTIN_ ## NAME,
+#include "builtins.def"
+#undef DEF_BUILTIN
+
+    ARC64_BUILTIN_COUNT
+  };
+
+struct GTY(()) arc64_builtin_description
+{
+  enum insn_code icode;
+  int n_args;
+  tree fndecl;
+};
+
+static GTY(()) struct arc64_builtin_description
+arc_bdesc[ARC64_BUILTIN_COUNT] =
+{
+#define DEF_BUILTIN(NAME, N_ARGS, TYPE, ICODE, MASK)		\
+  { (enum insn_code) CODE_FOR_ ## ICODE, N_ARGS, NULL_TREE },
+#include "builtins.def"
+#undef DEF_BUILTIN
+};
+
+/* vec_perm support.  */
+struct e_vec_perm_d
+{
+  rtx target, op0, op1;
+  vec_perm_indices perm;
+  machine_mode vmode;
+  bool one_vector_p;
+  bool testing_p;
+};
+
+static tree arc64_fndecl_attribute (tree *, tree, tree, int, bool *);
+static tree arc64_interrupt_attribute (tree *, tree, tree, int, bool *);
+
+/* { name, min_len, max_len, decl_req, type_req, fn_type_req,
+   affects_type_identity, handler, exclude } */
+const struct attribute_spec arc64_attribute_table[] =
+{
+  /* Functions which are used for ISR, return address is using ILINK reg.  */
+  { "interrupt", 0, 1, false, true, true, false, arc64_interrupt_attribute,
+    NULL },
+
+  /* Function which are not having the prologue and epilogue generated
+     by the compiler.  */
+  { "naked", 0, 0, true, false, false,  false, arc64_fndecl_attribute,
+    NULL },
+
+  { NULL, 0, 0, false, false, false, false, NULL, NULL }
+};
+
+/* Local variable true if we output scalled address.  */
+static bool scalled_p = false;
+/* Simple LUT for log2.  */
+static const int lutlog2[] = {0, 0, 1, 0, 2, 0, 0, 0,
+			      3, 0, 0, 0, 0, 0, 0, 0 };
+
+/* Safe access lut log2 table.  */
+#define ARC64LOG2(X) (((X) > 15) ? 3 : lutlog2[((X) & 0x0f)])
+
+/* Check if an offset is scalled.  */
+#define ARC64_CHECK_SCALLED_IMMEDIATE(offset, mode)			\
+  (ARC64LOG2 (GET_MODE_SIZE (mode))					\
+   && VERIFY_SHIFT (INTVAL (offset), ARC64LOG2 (GET_MODE_SIZE (mode)))	\
+   && SIGNED_INT9 (INTVAL (offset) >> ARC64LOG2 (GET_MODE_SIZE (mode))))
+
+/* ALIGN FRAMES on word boundaries.  */
+#define ARC64_STACK_ALIGN(LOC)						\
+  (((LOC) + STACK_BOUNDARY / BITS_PER_UNIT - 1) & -STACK_BOUNDARY/BITS_PER_UNIT)
+
+
+/* Callback function used for function attributes.  */
+
+static tree
+arc64_fndecl_attribute (tree *node, tree name,
+			tree args ATTRIBUTE_UNUSED,
+			int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
+{
+  if (TREE_CODE (*node) != FUNCTION_DECL)
+    {
+      warning (OPT_Wattributes, "%qE attribute only applies to functions",
+	       name);
+      *no_add_attrs = true;
+    }
+
+  return NULL_TREE;
+}
+
+/* Handle an "interrupt" attribute; arguments as in
+   struct attribute_spec.handler.  */
+
+static tree
+arc64_interrupt_attribute (tree *, tree name, tree args, int,
+			   bool *no_add_attrs)
+{
+
+  if (is_attribute_p ("interrupt", name))
+    {
+      if (args)
+	{
+	  tree value = TREE_VALUE (args);
+
+	  if (TREE_CODE (value) != STRING_CST)
+	    {
+	      warning (OPT_Wattributes,
+		       "argument of %qE attribute is not a string constant",
+		       name);
+	      *no_add_attrs = true;
+	    }
+	  else if (strcmp (TREE_STRING_POINTER (value), "ilink"))
+	    {
+	      warning (OPT_Wattributes,
+		       "argument of %qE attribute is not \"ilink\"",
+		       name);
+	      *no_add_attrs = true;
+	    }
+	}
+    }
+  return NULL_TREE;
+}
+
+/* ARC64 stack frame generated by this compiler looks like:
+
+	+-------------------------------+
+	|                               |
+	|  incoming stack arguments     |
+	|                               |
+	+-------------------------------+ <-- incoming stack pointer (aligned)
+	|                               |
+	|  callee-allocated save area   |
+	|  for register varargs         |
+	|                               |
+	+-------------------------------+ <-- arg_pointer_rtx
+	|                               |
+	|  GPR save area                |
+	|                               |
+	+-------------------------------+
+	|  Return address register      |
+	|  (if required)                |
+	+-------------------------------+
+	|  FP (if required)             |
+	+-------------------------------+ <-- (hard) frame_pointer_rtx
+	|                               |
+	|  Local variables              |
+	|                               |
+	+-------------------------------+
+	|  outgoing stack arguments     |
+	|                               |
+	+-------------------------------+ <-- stack_pointer_rtx (aligned)
+
+  Dynamic stack allocations such as alloca insert data after local
+  variables.  */
+
+/* Return TRUE if a register needs to be saved, exception making
+   BLINK, and FP registers.  BLINK is never check by this routine,
+   while FP is only checked if `frame_pointer_required` is FALSE.  */
+
+static bool
+arc64_save_reg_p (int regno)
+{
+  bool call_saved;
+  bool might_clobber;
+  bool eh_needed;
+
+  gcc_assert (regno <= F31_REGNUM);
+  gcc_assert (regno >= R0_REGNUM);
+
+  switch (regno)
+    {
+    case R60_REGNUM:
+    case R61_REGNUM:
+    case R62_REGNUM:
+    case R63_REGNUM:
+    case ILINK_REGNUM:
+    case BLINK_REGNUM:
+    case SP_REGNUM:
+      /* Special registers, they are handled separately.  */
+      return false;
+
+    case R27_REGNUM:
+      if (frame_pointer_needed)
+	return false;
+      break;
+
+    case F0_REGNUM:
+    case F1_REGNUM:
+    case F2_REGNUM:
+    case F3_REGNUM:
+    case F4_REGNUM:
+    case F5_REGNUM:
+    case F6_REGNUM:
+    case F7_REGNUM:
+    case F8_REGNUM:
+    case F9_REGNUM:
+    case F10_REGNUM:
+    case F11_REGNUM:
+    case F12_REGNUM:
+    case F13_REGNUM:
+    case F14_REGNUM:
+    case F15_REGNUM:
+    case F16_REGNUM:
+    case F17_REGNUM:
+    case F18_REGNUM:
+    case F19_REGNUM:
+    case F20_REGNUM:
+    case F21_REGNUM:
+    case F22_REGNUM:
+    case F23_REGNUM:
+    case F24_REGNUM:
+    case F25_REGNUM:
+    case F26_REGNUM:
+    case F27_REGNUM:
+    case F28_REGNUM:
+    case F29_REGNUM:
+    case F30_REGNUM:
+    case F31_REGNUM:
+      if (!ARC64_HAS_FP_BASE)
+	return false;
+      break;
+
+    default:
+      break;
+    }
+
+  call_saved = !global_regs[regno] && !call_used_or_fixed_reg_p (regno);
+  might_clobber = df_regs_ever_live_p (regno) || crtl->saves_all_registers;
+
+  /* In a frame that calls __builtin_eh_return two data registers are used to
+     pass values back to the exception handler.  Ensure that these registers are
+     spilled to the stack so that the exception throw code can find them, and
+     update the saved values.  The handling code will then consume these
+     reloaded values to handle the exception.  */
+  eh_needed = crtl->calls_eh_return
+    && (EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM);
+
+  if ((call_saved && might_clobber) || eh_needed)
+    return true;
+
+  /* If this is an interrupt handler, then we must save extra registers.  */
+  if (ARC_INTERRUPT_P (cfun->machine->fn_type))
+    {
+      /* ARCv3 has ACCUMULATOR register as baseline.  */
+      if (regno == R58_REGNUM)
+	return true;
+
+      if (df_regs_ever_live_p (regno)
+	  /* if this is not a leaf function, then we must save all temporary
+	     registers.  */
+	  || (!crtl->is_leaf && call_used_regs[regno] && !fixed_regs[regno]))
+	return true;
+    }
+  return false;
+}
+
+/* Compute the frame info.  */
+
+static void
+arc64_compute_frame_info (void)
+{
+  int regno;
+  HOST_WIDE_INT offset = 0;
+  struct arc64_frame *frame = &cfun->machine->frame;
+
+  gcc_assert (!frame->layout_p);
+
+  memset (frame, 0, sizeof (*frame));
+
+  if (!ARC_NAKED_P(cfun->machine->fn_type))
+    {
+      /* Find out which GPR need to be saved.  */
+      for (regno = R0_REGNUM, offset = 0;
+	   regno <= F31_REGNUM;
+	   regno++)
+	if (arc64_save_reg_p (regno))
+	  {
+	    /* TBI: probably I need to make the saving of the FP registers
+	       separate bulk from GPIs such that I can use latter on enter/leave
+	       instruction seamlessly (i.e. first save FPregs/latter GPI, the
+	       leave return feature will not work).  */
+	    /* TBI: the FPUS only configuration is having only 32bit registers,
+	       thus I can stack 2 FP registers in one stack slot ;).  */
+	    frame->reg_offset[regno] = offset;
+	    offset += UNITS_PER_WORD;
+	  }
+	else
+	  frame->reg_offset[regno] = -1;
+
+      /* Check if we need to save the return address.  */
+      if (!crtl->is_leaf
+	  || df_regs_ever_live_p (BLINK_REGNUM)
+	  || crtl->calls_eh_return)
+	{
+	  frame->reg_offset[BLINK_REGNUM] = offset;
+	  offset += UNITS_PER_WORD;
+	}
+
+      /* Check if we need frame pointer.  It is mutual exclusive with
+	 arc64_save_reg_p call.  */
+      if (frame_pointer_needed)
+	{
+	  frame->reg_offset[R27_REGNUM] = offset;
+	  offset += UNITS_PER_WORD;
+	}
+    }
+
+  /* 1. At the bottom of the stack are any outgoing stack
+     arguments.  */
+  frame->saved_outargs_size = ARC64_STACK_ALIGN (crtl->outgoing_args_size);
+
+  /* 2. Size of locals and temporaries.  */
+  frame->saved_locals_size = ARC64_STACK_ALIGN (get_frame_size ());
+
+  /* 3. Size of the saved registers (including FP/BLINK).
+     FIXME! FPR registers.  */
+  frame->saved_regs_size = ARC64_STACK_ALIGN (offset);
+
+  /* 4. Size of the callee-allocated area for pretend stack
+     arguments.  */
+  frame->saved_varargs_size = ARC64_STACK_ALIGN (crtl->args.pretend_args_size);
+
+  /* Total size.  */
+  frame->frame_size = frame->saved_outargs_size + frame->saved_locals_size
+    + frame->saved_regs_size + frame->saved_varargs_size;
+
+  gcc_assert (frame->frame_size == ARC64_STACK_ALIGN (frame->frame_size));
+  frame->layout_p = reload_completed;
+}
+
+/* Emit a frame insn which adjusts stack pointer by OFFSET.  */
+
+static void
+frame_stack_add (HOST_WIDE_INT offset)
+{
+  rtx tmp;
+  HOST_WIDE_INT lo = sext_hwi (offset, 32);
+  unsigned HOST_WIDE_INT hi = sext_hwi (offset >> 32, 32);
+
+  if (hi != 0xffffffffULL || hi != 0ULL)
+    tmp = gen_rtx_SET (stack_pointer_rtx,
+		       gen_rtx_PLUS (Pmode, stack_pointer_rtx,
+				     gen_rtx_HIGH (Pmode, GEN_INT (hi))));
+
+  tmp = gen_rtx_SET (stack_pointer_rtx,
+		     plus_constant (Pmode, stack_pointer_rtx, lo));
+  tmp = emit_insn (tmp);
+  RTX_FRAME_RELATED_P (tmp) = 1;
+  add_reg_note (tmp, REG_FRAME_RELATED_EXPR,
+		gen_rtx_SET (stack_pointer_rtx,
+			     plus_constant (Pmode, stack_pointer_rtx,
+					    offset)));
+}
+
+/* Helper for prologue: emit frame store with pre_modify or pre_dec to
+   save register REG on stack.  An initial offset OFFSET can be passed
+   to the function.  If a DISPLACEMENT is defined, it will be used to
+   generate pre_modify instead of pre_dec.  */
+
+static HOST_WIDE_INT
+frame_save_reg (rtx reg, HOST_WIDE_INT offset, HOST_WIDE_INT displacement)
+{
+  rtx addr, tmp;
+
+  if (offset)
+    {
+      tmp = plus_constant (Pmode, stack_pointer_rtx,
+			       offset - GET_MODE_SIZE (GET_MODE (reg)));
+      addr = gen_frame_mem (GET_MODE (reg),
+			    gen_rtx_PRE_MODIFY (Pmode,
+						stack_pointer_rtx,
+						tmp));
+    }
+  else if (displacement)
+    {
+      tmp = plus_constant (Pmode, stack_pointer_rtx, (-displacement));
+      addr = gen_frame_mem (GET_MODE (reg),
+			    gen_rtx_PRE_MODIFY (Pmode,
+						stack_pointer_rtx,
+						tmp));
+    }
+  else
+    addr = gen_frame_mem (GET_MODE (reg), gen_rtx_PRE_DEC (Pmode,
+							   stack_pointer_rtx));
+  tmp = emit_move_insn (addr, reg);
+  RTX_FRAME_RELATED_P (tmp) = 1;
+
+  return (displacement ? displacement : GET_MODE_SIZE (GET_MODE (reg)))
+    - offset;
+}
+
+/* ARC prologue saving regs routine.   */
+
+static HOST_WIDE_INT
+arc64_save_callee_saves (void)
+{
+  struct arc64_frame *frame = &cfun->machine->frame;
+  machine_mode save_mode = DImode;
+  int regno;
+  HOST_WIDE_INT offset = -frame->saved_varargs_size;
+  HOST_WIDE_INT frame_allocated = 0;
+  rtx reg;
+
+  for (regno = F31_REGNUM; regno >= R0_REGNUM; regno--)
+    {
+      HOST_WIDE_INT disp = 0;
+      if (frame->reg_offset[regno] == -1
+	  /* Hard frame pointer is saved in a different place.  */
+	  || (frame_pointer_needed && regno == R27_REGNUM)
+	  /* blink register is saved in a different place.  */
+	  || (regno == BLINK_REGNUM))
+	continue;
+
+      save_mode = word_mode;
+      if (ARC64_HAS_FP_BASE && FP_REGNUM_P (regno))
+	{
+	  save_mode = ARC64_HAS_FPUD ? DFmode : SFmode;
+	  disp = UNITS_PER_WORD;
+	}
+      else if (regno >= 1
+	       && (((regno - 1) % 2) == 0)
+	       && (frame->reg_offset[regno - 1] != -1))
+	{
+	  /* Use 64-bit double stores for context saving.  */
+	  if (!TARGET_64BIT && TARGET_LL64)
+	    {
+	      save_mode = DImode;
+	      --regno;
+	    }
+	  /* Use 128-bit double stores for context saving.  */
+	  else if (TARGET_64BIT && TARGET_WIDE_LDST)
+	    {
+	      save_mode = TImode;
+	      --regno;
+	    }
+	}
+
+      reg = gen_rtx_REG (save_mode, regno);
+      frame_allocated += frame_save_reg (reg, offset, disp);
+      offset = 0;
+    }
+
+  /* Save BLINK if required.  */
+  if (frame->reg_offset[BLINK_REGNUM] != -1)
+    {
+      reg = gen_rtx_REG (Pmode, BLINK_REGNUM);
+      frame_allocated += frame_save_reg (reg, offset, 0);
+      offset = 0;
+    }
+
+  /* Save FP if required.  */
+  if (frame_pointer_needed)
+    {
+      frame_allocated += frame_save_reg (hard_frame_pointer_rtx, offset, 0);
+      offset = 0;
+    }
+
+  /* Emit mov fp,sp, if required.  */
+  if (frame_pointer_needed)
+    {
+      rtx tmp = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
+      RTX_FRAME_RELATED_P (tmp) = 1;
+    }
+
+  return frame_allocated;
+}
+
+/* Helper for epilogue: emit frame load with post_modify or post_inc
+   to restore register REG from stack.  The initial offset is passed
+   via OFFSET.  */
+
+static HOST_WIDE_INT
+frame_restore_reg (rtx reg, HOST_WIDE_INT displacement)
+{
+  rtx addr, insn, tmp;
+
+  if (displacement)
+    {
+      tmp = plus_constant (Pmode, stack_pointer_rtx, displacement);
+      addr = gen_frame_mem (GET_MODE (reg),
+			    gen_rtx_POST_MODIFY (Pmode,
+						 stack_pointer_rtx,
+						 tmp));
+    }
+  else
+    addr = gen_frame_mem (GET_MODE (reg),
+			  gen_rtx_POST_INC (Pmode, stack_pointer_rtx));
+  insn = emit_move_insn (reg, addr);
+  RTX_FRAME_RELATED_P (insn) = 1;
+  add_reg_note (insn, REG_CFA_RESTORE, reg);
+
+  if (reg == hard_frame_pointer_rtx)
+    add_reg_note (insn, REG_CFA_DEF_CFA,
+		  plus_constant (Pmode, stack_pointer_rtx,
+				 GET_MODE_SIZE (GET_MODE (reg))));
+  else
+    add_reg_note (insn, REG_CFA_ADJUST_CFA,
+		  gen_rtx_SET (stack_pointer_rtx,
+			       plus_constant (Pmode, stack_pointer_rtx,
+					      GET_MODE_SIZE (GET_MODE (reg)))));
+
+  return displacement ? displacement : GET_MODE_SIZE (GET_MODE (reg));
+}
+
+/* ARC' epilogue restore regs routine.  */
+
+static HOST_WIDE_INT
+arc64_restore_callee_saves (bool sibcall_p ATTRIBUTE_UNUSED)
+{
+  struct arc64_frame *frame = &cfun->machine->frame;
+  HOST_WIDE_INT offset, frame_deallocated = 0;
+  rtx reg;
+  int regno;
+  machine_mode restore_mode = DImode;
+
+  /* Recover the frame_pointer location for the current frame.  */
+  offset = frame->frame_size - (frame->saved_regs_size
+				+ frame->saved_varargs_size);
+
+  /* Emit mov sp,fp if need.  Thus, we get rid of the offset without
+     using a possible expensive add3 instruction.  */
+  if (frame_pointer_needed)
+    {
+      rtx tmp = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
+      RTX_FRAME_RELATED_P (tmp) = 1;
+    }
+  else if (offset)
+    frame_stack_add (offset);
+
+  frame_deallocated += offset;
+
+  if (frame_pointer_needed)
+    frame_deallocated += frame_restore_reg (hard_frame_pointer_rtx, 0);
+
+  if (frame->reg_offset[BLINK_REGNUM] != -1)
+    {
+      reg = gen_rtx_REG (Pmode, BLINK_REGNUM);
+      frame_deallocated += frame_restore_reg (reg, 0);
+    }
+
+  for (regno = R0_REGNUM; regno <= F31_REGNUM; regno++)
+    {
+      HOST_WIDE_INT disp = 0;
+      bool double_load_p = false;
+
+      if (frame->reg_offset[regno] == -1
+	  /* Hard frame pointer has been restored.  */
+	  || (frame_pointer_needed && regno == R27_REGNUM)
+	  /* blink register has been restored.  */
+	  || (regno == BLINK_REGNUM))
+	continue;
+
+      restore_mode = word_mode;
+      if (ARC64_HAS_FP_BASE && FP_REGNUM_P (regno))
+	{
+	  restore_mode = ARC64_HAS_FPUD ? DFmode : SFmode;
+	  disp = UNITS_PER_WORD;
+	}
+      else if ((regno % 2) == 0
+	       && (!frame_pointer_needed || ((regno + 1) != R27_REGNUM))
+	       && (frame->reg_offset[regno + 1] != -1
+		   && ((regno + 1) != BLINK_REGNUM)))
+	{
+	  /* Use 64-bit double loads for context restoring.  */
+	  if (!TARGET_64BIT && TARGET_LL64)
+	    {
+	      restore_mode = DImode;
+	      double_load_p = true;
+	    }
+	  /* Use 128-bit double loads for context restoring.  */
+	  else if (TARGET_64BIT && TARGET_WIDE_LDST)
+	    {
+	      restore_mode = TImode;
+	      double_load_p = true;
+	    }
+	}
+
+      reg = gen_rtx_REG (restore_mode, regno);
+      frame_deallocated += frame_restore_reg (reg, disp);
+
+      if (double_load_p)
+	regno++;
+    }
+
+  return frame_deallocated;
+}
+
+/* Emit an insn that's a simple single-set.  Both the operands must be
+   known to be valid.  */
+inline static rtx_insn *
+emit_set_insn (rtx x, rtx y)
+{
+  return emit_insn (gen_rtx_SET (x, y));
+}
+
+/* Given FROM and TO register numbers, say whether this elimination is allowed.
+   Frame pointer elimination is automatically handled.
+
+   All eliminations are permissible. If we need a frame
+   pointer, we must eliminate ARG_POINTER_REGNUM into
+   FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM.  */
+
+static bool
+arc64_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
+{
+  return ((to == HARD_FRAME_POINTER_REGNUM) || (to == STACK_POINTER_REGNUM));
+}
+
+/* We force all frames that call eh_return to require a frame pointer, this will
+   ensure that the previous frame pointer is stored on entry to the function,
+   and will then be reloaded at function exit.  */
+
+static bool
+arc64_frame_pointer_required (void)
+{
+ return cfun->calls_alloca || crtl->calls_eh_return;
+}
+
+/* Giving a symbol, return how it will be addressed.  */
+
+static arc64_symb
+arc64_get_symbol_type (rtx x)
+{
+  bool is_local = false, is_tls = false;
+
+  /* Labels are always local, so a short access will suffice.  FIXME!
+     For large model, we should use a pc-rel accessing.  */
+  if (LABEL_REF_P (x))
+    return flag_pic ? ARC64_PIC :
+      (arc64_cmodel_var ==  ARC64_CMODEL_LARGE ? ARC64_MAYBE_LARGE :
+       ARC64_LO32);
+
+  /* FIXME! Maybe I should assert here.  */
+  if (!SYMBOL_REF_P (x))
+    return ARC64_UNK;
+
+  is_local = SYMBOL_REF_DECL (x)
+    ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
+    : SYMBOL_REF_LOCAL_P (x);
+  is_tls = SYMBOL_REF_TLS_MODEL (x);
+
+  if (is_tls)
+    return ARC64_TLS;
+
+  if (!flag_pic)
+    switch (arc64_cmodel_var)
+      {
+      case ARC64_CMODEL_SMALL:
+      case ARC64_CMODEL_MEDIUM:
+	return ARC64_LO32;
+      case ARC64_CMODEL_LARGE:
+	return ARC64_MAYBE_LARGE;
+      default:
+	gcc_unreachable ();
+      }
+  else if (flag_pic == 1)
+    return is_local ? ARC64_PCREL : ARC64_PIC;
+  else if (flag_pic == 2)
+    return is_local ? ARC64_PCREL : ARC64_MAYBE_LPIC;
+  else
+    gcc_unreachable ();
+}
+
+/* Helper legitimate address. Extra takes an input to discriminate
+   among load or store addresses.  */
+static bool
+arc64_legitimate_address_1_p (machine_mode mode,
+			      rtx x,
+			      bool strict ATTRIBUTE_UNUSED,
+			      bool load_p,
+			      bool scaling_p)
+{
+  if (REG_P (x))
+    return true;
+
+  if (CONST_INT_P (x))
+    return true;
+
+  if (CONSTANT_P (x))
+    {
+      /* Don't allow constant + offset when we don't have native
+	 ld/st, as the compiler may use very large offsets.  These
+	 memory accesses are splited anyhow.  */
+      if (GET_MODE_SIZE (mode) == UNITS_PER_WORD * 2)
+	{
+	  /* 32-bit and no double loads?  */
+	  if (!TARGET_64BIT && !TARGET_LL64)
+	    return false;
+	  /* 64-bit and no double loads?  */
+	  if (TARGET_64BIT && !TARGET_WIDE_LDST)
+	    return false;
+	  /* fall thru  */
+	}
+      if (GET_CODE (XEXP (x, 0)) == PLUS
+	  && CONST_INT_P (XEXP (XEXP (x, 0), 1))
+	  /* Reloc addendum is only 32bit.   */
+	  && UNSIGNED_INT32 (INTVAL (XEXP (XEXP (x, 0), 1))))
+	x = XEXP (XEXP (x, 0), 0);
+    }
+
+  if (GET_CODE (x) == SYMBOL_REF
+      || GET_CODE (x) == LABEL_REF)
+    return (arc64_get_symbol_type (x) == ARC64_LO32);
+
+  /* Check register + offset address type.  */
+  if (GET_CODE (x) == PLUS
+      && REG_P (XEXP (x, 0))
+      && CONST_INT_P (XEXP (x, 1)))
+    {
+      machine_mode scaling_mode = mode;
+      rtx offset = XEXP (x, 1);
+      HOST_WIDE_INT ioffset = INTVAL (offset);
+
+
+      if (GET_MODE_SIZE (scaling_mode) == 2 * UNITS_PER_WORD)
+	{
+	  /* Double load/stores are not scaling with 128 bits but with the
+	     register size.  */
+	  scaling_mode = smallest_int_mode_for_size (BITS_PER_WORD);
+
+	  /* Adjust the offset as we may need to split this address.  */
+	  if (ioffset > 0)
+	    ioffset += UNITS_PER_WORD;
+	}
+      scaling_mode = scaling_p ? scaling_mode : QImode;
+
+      /* ST instruction can only accept a single register plus a small s9 offset
+	 as address.  */
+      if ((ARC64LOG2 (GET_MODE_SIZE (scaling_mode))
+	   && VERIFY_SHIFT (ioffset, ARC64LOG2 (GET_MODE_SIZE (scaling_mode)))
+	   && SIGNED_INT9 (ioffset >> ARC64LOG2 (GET_MODE_SIZE (scaling_mode))))
+	  || SIGNED_INT9 (ioffset))
+	return true;
+
+      if (load_p
+	  /* FIXME! we can use address scalling here to fit even more.  */
+	  && (UNSIGNED_INT32 (INTVAL (offset))
+	      || SIGNED_INT32 (INTVAL (offset)))
+	  && !optimize_size)
+	return true;
+    }
+
+  /* Indexed addresses.  */
+  if (load_p
+      && GET_CODE (x) == PLUS
+      && REG_P (XEXP (x, 0))
+      && REG_P (XEXP (x, 1)))
+    {
+      if (GET_MODE_SIZE (mode) >= 2 * UNITS_PER_WORD)
+	{
+	  if (!TARGET_64BIT)
+	    return TARGET_LL64;
+	  else
+	    return TARGET_WIDE_LDST;
+	}
+      return true;
+    }
+
+  /* Scalled addresses.  Permitted variants:
+     ld.as rx, [rb,ri]         addr = rb + ri * scaling
+     ld.as rx, [offset32, ri]  addr = offset32 + ri * scalling
+
+     The store address can have only immediate operands scalled.  This
+     case toghether with its load variant are handled by above
+     code.  */
+  if (scaling_p
+      && load_p
+      && GET_CODE (x) == PLUS
+      && (REG_P (XEXP (x, 1)) || CONST_INT_P (XEXP (x, 1)))
+      /* Check multiplication.  */
+      && GET_CODE (XEXP (x, 0)) == MULT
+      && REG_P (XEXP (XEXP (x, 0), 0))
+      && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
+    {
+      /* x is plus(mult(index, scaling), base) => base + index*scaling  */
+      const rtx mult = XEXP (x, 0);
+      const int scaling = INTVAL (XEXP (mult, 1));
+
+      switch (GET_MODE_SIZE (mode))
+	{
+	case 2:	  /* ldh  */
+	case 4:	  /* ld   */
+	  if (scaling == GET_MODE_SIZE (mode))
+	    return true;
+	  break;
+	case 8:	  /* ldd or ldl  */
+	  if (scaling == 4)
+	    return (!TARGET_64BIT && TARGET_LL64);
+	  if (scaling == 8)
+	    return TARGET_64BIT;
+	  break;
+	case 16:  /* lddl  */
+	  if (scaling == 8)
+	    return TARGET_WIDE_LDST;
+	  break;
+	default:
+	  break;
+	}
+    }
+
+  if ((GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC
+       || GET_CODE (x) == POST_DEC || GET_CODE (x) == POST_INC)
+      && REG_P (XEXP (x, 0)))
+    return true;
+
+  if ((GET_CODE (x) == PRE_MODIFY || GET_CODE (x) == POST_MODIFY))
+    return arc64_legitimate_address_1_p (mode, XEXP (x, 1), strict,
+					 load_p, false);
+
+  /* PIC address (LARGE).  */
+  if (GET_CODE (x) == LO_SUM
+      && REG_P (XEXP (x, 0))
+      && GET_CODE (XEXP (x, 1)) == UNSPEC)
+    return true;
+
+  /* PIC address (small) or local symbol.  */
+  if (load_p
+      && GET_CODE (x) == UNSPEC
+      && (XINT (x, 1) == ARC64_UNSPEC_GOT32
+	  || XINT (x, 1) == ARC64_UNSPEC_TLS_IE
+	  || XINT (x, 1) == ARC64_UNSPEC_PCREL))
+    return true;
+
+  return false;
+}
+
+/* Return TRUE if X is a legitimate address for accessing memory in
+   mode MODE.  We do recognize addresses like:
+   - [Rb]
+   - [Rb, s9]
+   - [Rb, Ri] (ld only)
+   - [Rb, limm] (ld only)
+   - predec/postdec
+   - preinc/postinc
+   - premodif/postmodif
+*/
+
+static bool
+arc64_legitimate_address_p (machine_mode mode,
+			    rtx x,
+			    bool strict ATTRIBUTE_UNUSED)
+{
+  /* Allow all the addresses accepted by load.  */
+  return arc64_legitimate_address_1_p (mode, x, strict, true, true);
+}
+
+/* Helper for legitimate constant.  */
+static bool
+arc64_legitimate_constant1_p (machine_mode mode, rtx x, bool nosym)
+{
+  switch (GET_CODE (x))
+    {
+    case CONST_DOUBLE:
+    case CONST_INT:
+    case CONST_WIDE_INT:
+    case HIGH:
+      if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
+	return false;
+      return true;
+
+    case SYMBOL_REF:
+      /* TODO: We should use arc64_get_symbol_type function here and retun
+	 true/false depending on the type of the symbol.  */
+      if (SYMBOL_REF_TLS_MODEL (x))
+	return false;
+      if (nosym || flag_pic)
+	return false;
+      /* fallthrough  */
+    case LABEL_REF:
+      /* FIXME: Labels should be PC-rel when PIC, and make sure they are not
+	 ending up in constant pool.  */
+      return true;
+
+    case CONST:
+      if (GET_CODE (XEXP (x, 0)) == PLUS)
+	{
+	  rtx tmp = XEXP (x, 0);
+	  /* Do not allow @symb + offset constants.  */
+	  bool t1 = arc64_legitimate_constant1_p (mode, XEXP (tmp, 0), true);
+	  bool t2 = arc64_legitimate_constant1_p (mode, XEXP (tmp, 1), true);
+	  return (t1 && t2);
+	}
+      return false;
+
+    default:
+      return false;
+    }
+}
+
+/* Implement TARGET_LEGITIMATE_CONSTANT_P hook.  Return true for constants
+   that should be rematerialized rather than spilled.  */
+
+static bool
+arc64_legitimate_constant_p (machine_mode mode, rtx x)
+{
+  return arc64_legitimate_constant1_p (mode, x, false);
+}
+
+/* Giving a mode, return true if we can pass it in fp registers.  */
+
+bool
+arc64_use_fp_regs (machine_mode mode)
+{
+  if (!FLOAT_MODE_P (mode))
+    return false;
+
+  /* FPU unit can have either 32 or 64 bit wide data path.  */
+  /* FIXME: Use macros for the sizes.  */
+  if ((ARC64_HAS_FPUS && (GET_MODE_SIZE (mode) == (UNITS_PER_WORD / 2)))
+      || (ARC64_HAS_FPUH && (GET_MODE_SIZE (mode) == (UNITS_PER_WORD / 4)))
+      || ARC64_HAS_FPUD)
+    return true;
+  return false;
+}
+
+static rtx
+arc64_gen_fp_pair (machine_mode mode, unsigned regno1,
+		 machine_mode mode1, HOST_WIDE_INT offset1,
+		 unsigned regno2, machine_mode mode2,
+		 HOST_WIDE_INT offset2)
+{
+  return gen_rtx_PARALLEL
+    (mode,
+     gen_rtvec (2,
+		gen_rtx_EXPR_LIST (VOIDmode,
+				   gen_rtx_REG (mode1, regno1),
+				   GEN_INT (offset1)),
+		gen_rtx_EXPR_LIST (VOIDmode,
+				   gen_rtx_REG (mode2, regno2),
+				   GEN_INT (offset2))));
+}
+
+static rtx
+arc64_layout_arg (struct arc64_arg_info *info, cumulative_args_t pcum_v,
+		  machine_mode mode, const_tree type, bool named)
+{
+  CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
+  HOST_WIDE_INT size;
+  unsigned int nregs;
+  bool record_p = type ? (TREE_CODE (type) == RECORD_TYPE) : false;
+
+  memset (info, 0, sizeof (*info));
+  info->off_fpr = pcum->fregs;
+  info->off_gpr = pcum->iregs;
+
+  /* Find out the size of argument.  */
+  size = type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode);
+
+  /* When named, we can pass FP types into FP registers if they exists and they
+     have the right size, unless a record type is used.  */
+  if (named
+      && !record_p
+      && arc64_use_fp_regs (mode))
+    {
+      size = ROUND_UP (size, UNITS_PER_FP_REG);
+      nregs = size / UNITS_PER_FP_REG;
+
+      if (info->off_fpr + nregs <= MAX_ARC64_PARM_REGS)
+	{
+	  int fregno = F0_REGNUM + info->off_fpr;
+	  info->nfpr = nregs;
+	  switch (GET_MODE_CLASS (mode))
+	    {
+	    case MODE_VECTOR_FLOAT:
+	      /* FIXME! for double-sized vectors, we may need to use double
+		 register.  */
+	    case MODE_FLOAT:
+	      return gen_rtx_REG (mode, fregno);
+
+	    case MODE_COMPLEX_FLOAT:
+	      gcc_assert (nregs == 2);
+	      return arc64_gen_fp_pair (mode, fregno, GET_MODE_INNER (mode), 0,
+					fregno + 1, GET_MODE_INNER (mode),
+					GET_MODE_UNIT_SIZE (mode));
+
+	    default:
+	      gcc_unreachable ();
+	    }
+	}
+      /* No free FP-reg, continue using R-regs for the remaining FP
+	 arguments.  */
+    }
+
+  size = ROUND_UP (size, UNITS_PER_WORD);
+  nregs = size / UNITS_PER_WORD;
+
+  /* Partition the argument between register and stack.  */
+  gcc_assert (info->nfpr == 0);
+  info->ngpr = MIN (nregs, MAX_ARC64_PARM_REGS - info->off_gpr);
+  info->stack_p = (nregs - info->ngpr) != 0;
+
+  if (info->ngpr)
+    return gen_rtx_REG (mode, R0_REGNUM + info->off_gpr);
+  return NULL_RTX;
+}
+
+/* Worker for return_in_memory.  */
+/* FIXME! shall we use pass_by_reference?  */
+
+static bool
+arc64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
+{
+  HOST_WIDE_INT size;
+
+  /* Maybe we may need to return simple scalar types in registers:
+  if (!AGGREGATE_TYPE_P (type)
+      && TREE_CODE (type) != COMPLEX_TYPE)
+    return false;
+  */
+  if (AGGREGATE_TYPE_P (type) || TREE_ADDRESSABLE (type))
+    return true;
+
+  size = int_size_in_bytes (type);
+
+  /* Double sized float vectors are mapped into even-odd register
+     pair, hence use the stack when someone wants to pass them to
+     the caller.  */
+  if (VECTOR_FLOAT_TYPE_P (type) && size > UNITS_PER_WORD)
+    return true;
+
+  /* Types larger than 2 registers returned in memory.  */
+  return ((size < 0) || (size > 2 * UNITS_PER_WORD));
+}
+
+/* Worker for pass_by_reference.  */
+
+static bool
+arc64_pass_by_reference (cumulative_args_t cum_v,
+			 const function_arg_info &arg)
+{
+  HOST_WIDE_INT size = arg.type_size_in_bytes ();
+  struct arc64_arg_info info;
+  CUMULATIVE_ARGS *pcum = get_cumulative_args (cum_v);
+
+  /* Double sized fp-vectors are passed on the stack.  */
+  if (arg.type
+      && VECTOR_FLOAT_TYPE_P (arg.type) && size > UNITS_PER_WORD)
+    return true;
+
+  /* N.B. std_gimplify_va_arg_expr passes NULL for cum.  However, we
+     do not use variadic arguments in fp-regs.  */
+  if (pcum != NULL)
+    {
+      /* Check if we can use fp regs.  */
+      arc64_layout_arg (&info, cum_v, arg.mode, arg.type, arg.named);
+      if (info.nfpr)
+	return false;
+    }
+
+  /* In earlier passes, the *_pass_by_reference() hook is called with the
+     "COMPLEX" as the "argument of the function" and later"COMPLEX.element"
+     is considered to be the "argument of the function".  This check makes
+     a unified decision in all those scenarios.  */
+  if (COMPLEX_MODE_P (arg.mode))
+    {
+      const machine_mode mode = GET_MODE_INNER (arg.mode);
+      size = GET_MODE_SIZE (mode);
+    }
+
+  /* Variable sized arguments are always returned by reference, and
+     arguments which are variable sized or larger than 2 registers are
+     passed by reference.  */
+  return !IN_RANGE (size, 0, 2 * UNITS_PER_WORD);
+}
+
+/* The function to update the summarizer variable *CUM to advance past
+   an argument in the argument list.  The values MODE, TYPE and NAMED
+   describe that argument.  Once this is done, the variable *CUM is
+   suitable for analyzing the *following* argument with
+   `FUNCTION_ARG', etc.  */
+
+static void
+arc64_function_arg_advance (cumulative_args_t pcum_v,
+			    const function_arg_info &arg)
+{
+  struct arc64_arg_info info;
+  CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
+
+  arc64_layout_arg (&info, pcum_v, arg.mode, arg.type, arg.named);
+
+  pcum->fregs = info.nfpr + info.off_fpr;
+  pcum->iregs = info.ngpr + info.off_gpr;
+}
+
+/* Implement TARGET_ARG_PARTIAL_BYTES.  */
+
+static int
+arc64_arg_partial_bytes (cumulative_args_t pcum_v,
+			 const function_arg_info &arg)
+{
+  struct arc64_arg_info info;
+
+  arc64_layout_arg (&info, pcum_v, arg.mode, arg.type, arg.named);
+  gcc_assert ((info.nfpr == 0) || (info.ngpr == 0));
+
+  return info.stack_p ? info.ngpr * UNITS_PER_WORD : 0;
+}
+
+/* This function is used to control a function argument is passed in a
+   register, and which register.
+
+   The arguments are CUM, of type CUMULATIVE_ARGS, which summarizes
+   (in a way defined by INIT_CUMULATIVE_ARGS and FUNCTION_ARG_ADVANCE)
+   all of the previous arguments so far passed in registers; MODE, the
+   machine mode of the argument; TYPE, the data type of the argument
+   as a tree node or 0 if that is not known (which happens for C
+   support library functions); and NAMED, which is 1 for an ordinary
+   argument and 0 for nameless arguments that correspond to `...' in
+   the called function's prototype.
+
+   The returned value should either be a `reg' RTX for the hard
+   register in which to pass the argument, or zero to pass the
+   argument on the stack.  */
+
+static rtx
+arc64_function_arg (cumulative_args_t pcum_v,
+		    const function_arg_info &arg)
+{
+  struct arc64_arg_info info;
+
+  return arc64_layout_arg (&info, pcum_v, arg.mode, arg.type, arg.named);
+}
+
+/* Define how to find the value returned by a function.  VALTYPE is
+   the data type of the value (as a tree).  If the precise function
+   being called is known, FN_DECL_OR_TYPE is its FUNCTION_DECL;
+   otherwise, FN_DECL_OR_TYPE is its type.  */
+
+static rtx
+arc64_function_value (const_tree type,
+		      const_tree func,
+		      bool outgoing ATTRIBUTE_UNUSED)
+{
+  machine_mode mode = TYPE_MODE (type);
+  int unsignedp = TYPE_UNSIGNED (type);
+
+  if (INTEGRAL_TYPE_P (type))
+    mode = promote_function_mode (type, mode, &unsignedp, func, 1);
+
+  if (arc64_use_fp_regs (mode))
+    {
+      switch (GET_MODE_CLASS (mode))
+	{
+	case MODE_VECTOR_FLOAT:
+	  /* FIXME! for double-sized vectors, we may need to use double
+	     register.  */
+	case MODE_FLOAT:
+	  return gen_rtx_REG (mode, F0_REGNUM);
+
+	case MODE_COMPLEX_FLOAT:
+	  return arc64_gen_fp_pair (mode, F0_REGNUM, GET_MODE_INNER (mode), 0,
+				    F1_REGNUM, GET_MODE_INNER (mode),
+				    GET_MODE_UNIT_SIZE (mode));
+
+	default:
+	  gcc_unreachable ();
+	}
+
+    }
+  return gen_rtx_REG (mode, R0_REGNUM);
+}
+
+/* Implements TARGET_FUNCTION_VALUE_REGNO_P.
+   Return true if REGNO is the number of a hard register in which the values
+   of called function may come back.  */
+
+static bool
+arc64_function_value_regno_p (const unsigned int regno)
+{
+  /* Maximum of 16 bytes can be returned in the general registers.  Examples
+     of 16-byte return values are: 128-bit integers and 16-byte small
+     structures (excluding homogeneous floating-point aggregates).
+
+     We need to implement untyped_call instruction pattern when
+     returning more than one value.  */
+
+  if (regno == R0_REGNUM)
+    return true;
+
+  if (regno == F0_REGNUM)
+    return ARC64_HAS_FP_BASE;
+
+  return false;
+}
+
+static bool
+arc64_split_complex_arg (const_tree)
+{
+  return true;
+}
+
+/* Implement TARGET_SETUP_INCOMING_VARARGS.  */
+
+static void
+arc64_setup_incoming_varargs (cumulative_args_t cum_v,
+			      const function_arg_info &arg,
+			      int *pretend_size, int no_rtl)
+{
+  CUMULATIVE_ARGS cum = *get_cumulative_args (cum_v);
+  int gpi_saved;
+
+  /* The caller has advanced CUM up to, but not beyond, the last named
+     argumend.  Advance a local copu of CUM past the last "real" named
+     argument, to find out how many registers are left over.  */
+  arc64_function_arg_advance (pack_cumulative_args (&cum), arg);
+
+  cfun->machine->uses_anonymous_args = 1;
+  if (!FUNCTION_ARG_REGNO_P (cum.iregs))
+    return;
+
+  gpi_saved = MAX_ARC64_PARM_REGS - cum.iregs;
+
+  if (!no_rtl && gpi_saved > 0)
+    {
+      rtx ptr, mem;
+      ptr = plus_constant (Pmode, arg_pointer_rtx, 0);
+      mem = gen_frame_mem (BLKmode, ptr);
+      set_mem_alias_set (mem, get_varargs_alias_set ());
+
+      move_block_from_reg (R0_REGNUM + cum.iregs, mem, gpi_saved);
+    }
+
+  /* FIXME! do I need to ROUND_UP (pretend, STACK_BOUNDARY /
+     BITS_PER_UNIT) ?  */
+  *pretend_size = gpi_saved * UNITS_PER_WORD;
+}
+
+/* Implement TARGET_HARD_REGNO_NREGS.  */
+
+static unsigned int
+arc64_hard_regno_nregs (unsigned int regno,
+			machine_mode mode)
+{
+  if (FP_REGNUM_P (regno))
+    return CEIL (GET_MODE_SIZE (mode), UNITS_PER_FP_REG);
+  return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
+}
+
+/* Implement TARGET_HARD_REGNO_MODE_OK.  */
+
+static bool
+arc64_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
+{
+  if (GET_MODE_CLASS (mode) == MODE_CC)
+    return regno == CC_REGNUM;
+
+  if (regno == SP_REGNUM
+      || regno == FRAME_POINTER_REGNUM
+      || regno == ARG_POINTER_REGNUM)
+    return (mode == Pmode);
+
+  if (regno <= R58_REGNUM)
+    {
+      if (GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
+	return true;
+      else if (GET_MODE_SIZE (mode) <= (UNITS_PER_WORD * 2))
+	return ((regno & 1) == 0);
+    }
+  else if (FLOAT_MODE_P (mode) && FP_REGNUM_P (regno))
+    {
+      /* FIXME! I should make the decision base on the WIDE option
+	 alone, if we need double regs or not.  */
+      if (ARC64_VFP_128
+	  && (GET_MODE_SIZE (mode) <= (UNITS_PER_FP_REG * 2))
+	  && (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT))
+	return ((regno & 1) == 0);
+      return true;
+    }
+
+  return false;
+}
+
+/* Implement TARGET_MODES_TIEABLE_P.  Tie QI/HI/SI/DI modes together.  */
+
+static bool
+arc64_modes_tieable_p (machine_mode mode1, machine_mode mode2)
+{
+  if (GET_MODE_CLASS (mode1) == MODE_INT
+      && GET_MODE_CLASS (mode2) == MODE_INT
+      && GET_MODE_SIZE (mode1) <= UNITS_PER_WORD
+      && GET_MODE_SIZE (mode2) <= UNITS_PER_WORD)
+    return true;
+
+  return false;
+}
+
+static inline bool
+arc64_short_insn_p (rtx_insn *insn)
+{
+  enum attr_iscompact iscompact;
+
+  iscompact = get_attr_iscompact (insn);
+  if (iscompact == ISCOMPACT_YES)
+    return true;
+
+  if (iscompact == ISCOMPACT_MAYBE)
+    return (get_attr_length (insn) == 2)
+      || (get_attr_length (insn) == 6);
+
+  return (get_attr_length (insn) == 2);
+}
+
+/* Returns the index of the ARC condition code string in
+   `arc_condition_codes'.  COMPARISON should be an rtx like `(eq (...)
+   (...))'.  */
+
+static int
+get_arc64_condition_code (rtx comparison)
+{
+  switch (GET_MODE (XEXP (comparison, 0)))
+    {
+    case E_DImode: /* brcc/bbit instructions.  */
+    case E_SImode:
+    case E_CCmode:
+      switch (GET_CODE (comparison))
+	{
+	case EQ : return ARC_CC_EQ;
+	case NE : return ARC_CC_NE;
+	case GT : return ARC_CC_GT;
+	case LE : return ARC_CC_LE;
+	case GE : return ARC_CC_GE;
+	case LT : return ARC_CC_LT;
+	case GTU : return ARC_CC_HI;
+	case LEU : return ARC_CC_LS;
+	case LTU : return ARC_CC_LO;
+	case GEU : return ARC_CC_HS;
+	default : gcc_unreachable ();
+	}
+    case E_CC_ZNmode:
+      switch (GET_CODE (comparison))
+	{
+	case EQ : return ARC_CC_EQ;
+	case NE : return ARC_CC_NE;
+	case GE: return ARC_CC_P;
+	case LT: return ARC_CC_N;
+	case GT : return ARC_CC_PNZ;
+	default : gcc_unreachable ();
+	}
+    case E_CC_Zmode:
+      switch (GET_CODE (comparison))
+	{
+	case EQ : return ARC_CC_EQ;
+	case NE : return ARC_CC_NE;
+	default : gcc_unreachable ();
+	}
+    case E_CC_Cmode:
+      switch (GET_CODE (comparison))
+	{
+	case LTU : return ARC_CC_C;
+	case GEU : return ARC_CC_NC;
+	default : gcc_unreachable ();
+	}
+    case E_CC_Vmode:
+      switch (GET_CODE (comparison))
+	{
+	case EQ : return ARC_CC_NV;
+	case NE : return ARC_CC_V;
+	default : gcc_unreachable ();
+	}
+    case E_CC_FPUmode:
+    case E_CC_FPUEmode:
+      switch (GET_CODE (comparison))
+	{
+	case EQ: return ARC_CC_EQ;
+	case NE: return ARC_CC_NE;
+	case GT: return ARC_CC_GT;
+	case GE: return ARC_CC_GE;
+	case LT:
+	  /* Equivalent with N, short insn friendly.  */
+	  return ARC_CC_C;
+	case LE: return ARC_CC_LS;
+	case UNORDERED: return ARC_CC_V;
+	case ORDERED: return ARC_CC_NV;
+	case UNGT: return ARC_CC_HI;
+	case UNGE:
+	   /* Equivalent with NV, short insn friendly.  */
+	  return ARC_CC_HS;
+	case UNLT: return ARC_CC_LT;
+	case UNLE: return ARC_CC_LE;
+	default: gcc_unreachable ();
+	}
+      break;
+    default : gcc_unreachable ();
+    }
+  gcc_unreachable ();
+}
+
+/* Address scaling is a bit tricky in case of double loads/stores.
+   In normal cases, the address scaling takes the element size
+   of the data it is handling as the offset. However, in case of
+   a double load/store the offset size is the same size of a single
+   element and not the double of it. e.g.:
+
+   ldb.as   r1, [r0, 1]	      offset is 1 (1*1), data is  1 byte
+   ldw.as   r1, [r0, 1]	      offset is 2 (1*2), data is  2 bytes
+   ld.as    r1, [r0, 1]	      offset is 4 (1*4), data is  4 bytes
+   ldl.as   r1, [r0, 1]	      offset is 8 (1*8), data is  8 bytes
+
+   ldd.as   r1, [r0, 1]	      offset is 4 (1*4), data is  8 bytes
+   lddl.as  r1, [r0, 1]	      offset is 8 (1*8), data is 16 bytes
+*/
+
+static machine_mode
+arc64_get_effective_mode_for_address_scaling (const machine_mode mode)
+{
+  if (GET_MODE_SIZE (mode) == (UNITS_PER_WORD * 2))
+    {
+      gcc_assert (DOUBLE_LOAD_STORE);
+      return Pmode;
+    }
+  return mode;
+}
+
+/* Print operand X (an rtx) in assembler syntax to file FILE.  CODE is
+   a letter or dot (`z' in `%z0') or 0 if no letter was specified.
+   For `%' followed by punctuation, CODE is the punctuation and X is
+   null.  Letters `acln' are reserved.  The acceptable formatting
+   commands given by CODE are:
+     '0': Print a normal operand, if it's a general register,
+	  then we assume DImode.
+     'U': Load/store update or scaling indicator.
+     'm': output condition code without 'dot'.
+     'M': output inverse condition code without 'dot'.
+     'w': output proper condition code for emulated brcc with u6 immediate.
+     'W': output proper condition code for emulated brcc with r/limm.
+     '?': Short instruction suffix.
+     '*': Delay slot suffix
+     'L': Lower 32bit of immediate or symbol.
+     'H': Higher 32bit of an immediate, 64b-register or symbol.
+     'C': Constant address, switches on/off @plt.
+     's': Scalled immediate.
+     'S': Scalled immediate, to be used in pair with 's'.
+     'N': Negative immediate, to be used in pair with 's'.
+     'V': 2x16b vector immediate, hi lane is zero.
+     'P': Constant address, swithces on/off _s to be used with 'C'
+     'A': output aq, rl or aq.rl flags for atomic ops.
+*/
+
+static void
+arc64_print_operand (FILE *file, rtx x, int code)
+{
+  HOST_WIDE_INT ival;
+  const char * const arc_condition_codes[] =
+    {
+     "al", 0, "eq", "ne", "p", "n", "lo", "hs", "v", "nv",
+     "gt", "le", "ge", "lt", "hi", "ls", "pnz", 0
+    };
+  const char * const ebrcc_u6ccodes[] =
+    {
+     "na", "na", "na", "na", "na", "na", "na", "na", "na", "na",
+     "ge", "lt", "na", "na", "hs", "lo", "na", "na"
+    };
+  const char * const ebrcc_rccodes[] =
+    {
+     "na", "na", "na", "na", "na", "na", "na", "na", "na", "na",
+     "lt", "ge", "na", "na", "lo", "hs", "na", "na"
+    };
+
+  int scalled = 0;
+  int sign = 1;
+  machine_mode effective_mode;
+
+  switch (code)
+    {
+    case '*':
+      if (final_sequence && final_sequence->len () != 1)
+	{
+	  rtx_insn *delay = final_sequence->insn (1);
+
+	  if (delay->deleted ())
+	    return;
+	  fputs (".d", file);
+	}
+      return;
+
+    case '?':
+      if (arc64_short_insn_p (current_output_insn))
+	fputs ("_s", file);
+      break;
+
+    case 'U' :
+      /* Output a load/store with update indicator if appropriate.  */
+      if (!MEM_P (x))
+	{
+	  output_operand_lossage ("invalid operand for %%U code");
+	  return;
+	}
+
+      /* FIXME! consider volatile accesses as .di accesses, everything
+	 under an option.  */
+      if (MEM_VOLATILE_P (x) && TARGET_VOLATILE_DI)
+	fputs (".di", file);
+
+      switch (GET_CODE (XEXP (x, 0)))
+	{
+	case PRE_INC:
+	case PRE_DEC:
+	case PRE_MODIFY:
+	  fputs (".a", file);
+	  break;
+
+	case POST_INC:
+	case POST_DEC:
+	case POST_MODIFY:
+	  fputs (".ab", file);
+	  break;
+
+	case PLUS:
+	  effective_mode =
+	    arc64_get_effective_mode_for_address_scaling (GET_MODE (x));
+	  if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
+	    fputs (".as", file);
+	  else if (REG_P (XEXP (XEXP (x, 0), 0))
+		   && CONST_INT_P (XEXP (XEXP (x, 0), 1))
+		   && ARC64_CHECK_SCALLED_IMMEDIATE (XEXP (XEXP (x, 0), 1),
+						     effective_mode))
+	    {
+	      fputs (".as", file);
+	      scalled_p = true;
+	    }
+	default:
+	  break;
+	}
+      break;
+
+    case 'L':
+      if (GET_CODE (x) == SYMBOL_REF
+	  || GET_CODE (x) == LABEL_REF)
+	{
+	  output_addr_const (asm_out_file, x);
+	  fputs ("@u32", file);
+	  break;
+	}
+      else if (REG_P (x))
+	{
+	  asm_fprintf (file, "%s", reg_names [REGNO (x)]);
+	  break;
+	}
+      else if (!CONST_INT_P (x))
+	{
+	  output_operand_lossage ("invalid operand for %%L code");
+	  return;
+	}
+      ival = INTVAL (x);
+      ival &= 0xffffffffULL;
+      fprintf (file,"0x%08" PRIx32, (uint32_t) ival);
+      break;
+
+    case 'H':
+      if (GET_CODE (x) == SYMBOL_REF
+	  || GET_CODE (x) == LABEL_REF
+	  || GET_CODE (x) == UNSPEC)
+	{
+	  output_addr_const (asm_out_file, x);
+	  break;
+	}
+      else if (CONST_INT_P (x))
+	{
+	  ival = INTVAL (x);
+	  ival >>= 32;
+	  fprintf (file, "%d", (int32_t) ival);
+	}
+      else if (REG_P (x))
+	asm_fprintf (file, "%s", reg_names [REGNO (x) + 1]);
+      else
+	{
+	  output_operand_lossage ("invalid operand for %%H code");
+	  return;
+	}
+      break;
+
+    case 'V':
+      if (!CONST_INT_P (x))
+	{
+	  output_operand_lossage ("invalid operand for %%V code");
+	  return;
+	}
+      ival = INTVAL (x);
+      ival &= 0xffffULL;
+      fprintf (file, "0x%08" PRIx32, (uint32_t) ival);
+      break;
+
+    case 'm':
+      fputs (arc_condition_codes[get_arc64_condition_code (x)], file);
+      break;
+
+    case 'M':
+      fputs (arc_condition_codes[ARC_INVERSE_CONDITION_CODE
+				 (get_arc64_condition_code (x))], file);
+      break;
+
+    case 'w':
+      fputs (ebrcc_u6ccodes[get_arc64_condition_code (x)], file);
+      break;
+
+    case 'W':
+      fputs (ebrcc_rccodes[get_arc64_condition_code (x)], file);
+      break;
+
+    case 'C':
+      if (GET_CODE (x) != SYMBOL_REF
+	  && GET_CODE (x) != LABEL_REF)
+	{
+	  output_operand_lossage ("invalid operand for %%C code");
+	  return;
+	}
+      output_addr_const (asm_out_file, x);
+      /* N.B. The instruction is valid, hence any symbol which its
+	 type is LPIC is valid for instruction, see
+	 arc64_is_long_call_p.  */
+      switch (arc64_get_symbol_type (x))
+	{
+	case ARC64_PIC:
+	  fputs ("@plt", file);
+	  break;
+	case ARC64_LPIC:
+	  fputs ("@plt34", file);
+	  break;
+	default:
+	  break;
+	}
+      break;
+
+    case 'P':
+      if (GET_CODE (x) != SYMBOL_REF
+	  && GET_CODE (x) != LABEL_REF)
+	{
+	  output_operand_lossage ("invalid operand for %%P code");
+	  return;
+	}
+      if (arc64_use_plt34_p (x))
+	fputs ("_s", file);
+      break;
+
+    case 's':
+      if (REG_P (x))
+	break;
+      if (!CONST_INT_P (x))
+	{
+	  output_operand_lossage ("invalid operand for %%s code");
+	  return;
+	}
+      ival = INTVAL (x);
+      if ((ival & 0x07) == 0)
+	  scalled = 3;
+      else if ((ival & 0x03) == 0)
+	  scalled = 2;
+      else if ((ival & 0x01) == 0)
+	  scalled = 1;
+
+      if (scalled)
+	asm_fprintf (file, "%d", scalled);
+      break;
+
+    case 'N':
+      if (REG_P (x))
+	{
+	  output_operand_lossage ("invalid operand for %%N code");
+	  return;
+	}
+      sign = -1;
+      /* fall through */
+    case 'S':
+      if (REG_P (x))
+	{
+	  asm_fprintf (file, "%s", reg_names [REGNO (x)]);
+	  return;
+	}
+      if (!CONST_INT_P (x))
+	{
+	  output_operand_lossage ("invalid operand for %%N or %%S code");
+	  return;
+	}
+      ival = sign * INTVAL (x);
+      if ((ival & 0x07) == 0)
+	  scalled = 3;
+      else if ((ival & 0x03) == 0)
+	  scalled = 2;
+      else if ((ival & 0x01) == 0)
+	  scalled = 1;
+
+      asm_fprintf (file, "%wd", (ival >> scalled));
+      break;
+
+    case 'A':
+      if (!ARC64_HAS_ATOMIC_3)
+	return;
+      if (!CONST_INT_P (x))
+	{
+	  output_operand_lossage ("invalid operand for %%A");
+	  return;
+	}
+      ival = INTVAL (x);
+      switch ((enum memmodel) ival)
+	{
+	case MEMMODEL_ACQ_REL:
+	  fputs (".aq.rl", file);
+	  break;
+
+	case MEMMODEL_SEQ_CST:
+	case MEMMODEL_SYNC_SEQ_CST:
+	case MEMMODEL_ACQUIRE:
+	case MEMMODEL_CONSUME:
+	case MEMMODEL_SYNC_ACQUIRE:
+	  fputs (".aq", file);
+	  break;
+
+	case MEMMODEL_RELEASE:
+	case MEMMODEL_SYNC_RELEASE:
+	  fputs (".rl", file);
+	  break;
+
+	case MEMMODEL_RELAXED:
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+
+    case 0:
+      if (x == NULL)
+	{
+	  output_operand_lossage ("missing operand");
+	  return;
+	}
+
+      switch (GET_CODE (x))
+	{
+	case REG :
+	  asm_fprintf (file, "%s", reg_names [REGNO (x)]);
+	  break;
+
+	case MEM :
+	  fputc ('[', file);
+	  output_address (GET_MODE (x), XEXP (x, 0));
+	  fputc (']', file);
+	  break;
+
+	case CONST:
+	case LABEL_REF:
+	case SYMBOL_REF:
+	case UNSPEC:
+	  output_addr_const (asm_out_file, x);
+	  break;
+
+	case CONST_DOUBLE:
+	  {
+	    long l;
+	    int msize;
+	    machine_mode mode = GET_MODE (x);
+	    /* Maybe I need to define TARGET_SUPPORTS_WIDE_INT.  */
+	    gcc_assert (mode != VOIDmode);
+	    /* GET_MODE_BITSIZE BITS_PER_WORD */
+	    msize = GET_MODE_SIZE (mode);
+	    if (msize > UNITS_PER_LIMM)
+	      msize = UNITS_PER_LIMM;
+	    msize *= 8;
+	    l = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (x),
+				float_mode_for_size (msize).require ());
+	    asm_fprintf (file, "0x%08lx", l);
+	    break;
+	  }
+	case CONST_INT:
+	  asm_fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
+	  break;
+
+	default:
+	  output_operand_lossage ("invalid operand");
+	  return;
+	}
+      break;
+
+    default:
+      output_operand_lossage ("invalid operand prefix '%%%c'", code);
+    }
+}
+
+/* Print address 'addr' of a memory access with mode 'mode'.  */
+
+static void
+arc64_print_operand_address (FILE *file , machine_mode mode, rtx addr)
+{
+  rtx base, index = 0;
+  machine_mode effective_mode = mode;
+
+  switch (GET_CODE (addr))
+    {
+    case REG :
+      fputs (reg_names[REGNO (addr)], file);
+      break;
+
+    case CONST:
+      output_address (mode, XEXP (addr, 0));
+      break;
+
+    case PLUS :
+      if (GET_CODE (XEXP (addr, 0)) == MULT)
+	index = XEXP (XEXP (addr, 0), 0), base = XEXP (addr, 1);
+      else if (CONST_INT_P (XEXP (addr, 0)))
+	index = XEXP (addr, 0), base = XEXP (addr, 1);
+      else
+	base = XEXP (addr, 0), index = XEXP (addr, 1);
+
+      gcc_assert (OBJECT_P (base));
+      effective_mode =
+	arc64_get_effective_mode_for_address_scaling (mode);
+      if (REG_P (base)
+	  && scalled_p
+	  && CONST_INT_P (index)
+	  && ARC64_CHECK_SCALLED_IMMEDIATE (index, effective_mode))
+	{
+	  index = GEN_INT (INTVAL (index) >>
+			   ARC64LOG2 (GET_MODE_SIZE (effective_mode)));
+	}
+      scalled_p = false;
+
+      arc64_print_operand_address (file, mode, base);
+      if (CONSTANT_P (base) && CONST_INT_P (index))
+	fputc ('+', file);
+      else
+	fputc (',', file);
+      gcc_assert (OBJECT_P (index));
+      arc64_print_operand_address (file, mode, index);
+      break;
+
+    case PRE_INC:
+    case POST_INC:
+      output_address (VOIDmode,
+		      plus_constant (Pmode, XEXP (addr, 0),
+				     GET_MODE_SIZE (mode)));
+      break;
+
+    case PRE_DEC:
+    case POST_DEC:
+      output_address (VOIDmode,
+		      plus_constant (Pmode, XEXP (addr, 0),
+				     -GET_MODE_SIZE (mode)));
+      break;
+
+    case PRE_MODIFY:
+    case POST_MODIFY:
+      output_address (VOIDmode, XEXP (addr, 1));
+      break;
+
+    case LO_SUM:
+      /* This type of address can be only accepted by LD instructions.  */
+      base = XEXP (addr, 0);
+      index = XEXP (addr, 1);
+      arc64_print_operand_address (file, mode, base);
+      fputc (',', file);
+      output_addr_const (file, index);
+      break;
+
+    case UNSPEC:
+      /* Small PIC.  */
+      fputs ("pcl,", file);
+      output_addr_const (file, addr);
+      break;
+
+    case LABEL_REF:
+    case SYMBOL_REF:
+    case CONST_INT:
+      output_addr_const (file, addr);
+      break;
+
+    default:
+      gcc_unreachable ();
+      break;
+    }
+}
+
+/* Target hook for indicating whether a punctuation character for
+   TARGET_PRINT_OPERAND is valid.  */
+
+static bool
+arc64_print_operand_punct_valid_p (unsigned char code)
+{
+  return (code == '?' || code == '*');
+}
+
+/* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA.  */
+
+static bool
+arc64_output_addr_const_extra (FILE *file, rtx x)
+{
+  rtx base, offset = NULL_RTX;
+
+  if (GET_CODE (x) == UNSPEC)
+    {
+      base = XVECEXP (x, 0, 0);
+      if (GET_CODE (base) == CONST
+	  && GET_CODE (XEXP (base, 0)) == PLUS)
+	{
+	  offset = XEXP (XEXP (base, 0), 1);
+	  base = XEXP (XEXP (base, 0), 0);
+	}
+      output_addr_const (file, base);
+      switch (XINT (x, 1))
+	{
+	case ARC64_UNSPEC_PCREL:
+	  fputs ("@pcl", file);
+	  break;
+
+	case ARC64_UNSPEC_GOT32:
+	case ARC64_UNSPEC_GOT:
+	  fputs ("@gotpc", file);
+	  break;
+
+	case ARC64_UNSPEC_TLS_GD:
+	  fputs ("@tlsgd", file);
+	  break;
+
+	case ARC64_UNSPEC_TLS_IE:
+	  fputs ("@tlsie", file);
+	  break;
+
+	case ARC64_UNSPEC_TLS_OFF:
+	  fputs ("@tpoff", file);
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+
+      if (offset != NULL_RTX)
+	{
+	  fputs ("+", file);
+	  output_addr_const (file, offset);
+	}
+      return true;
+    }
+
+  return false;
+}
+
+/* Wrap X in an unspec of kind KIND.  */
+
+static rtx
+gen_sym_unspec (rtx x, int kind)
+{
+  return gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), kind);
+}
+
+/* The __tls_get_attr symbol.  */
+static GTY(()) rtx arc_tls_symbol;
+
+/* Emit a call to __tls_get_addr.  TI is the argument to this function.
+   RET is an RTX for the return value location.  The entire insn sequence
+   is returned.  */
+
+static void
+arc64_tls_call (rtx dest, rtx arg)
+{
+  rtx argreg = gen_reg_rtx (Pmode);
+  if (!arc_tls_symbol)
+    arc_tls_symbol = init_one_libfunc ("__tls_get_addr");
+
+  df_set_regs_ever_live (BLINK_REGNUM, true);
+  emit_insn (gen_rtx_SET (argreg, arg));
+  emit_library_call_value (arc_tls_symbol, dest, LCT_CONST, Pmode,
+			   argreg, Pmode);
+}
+
+/* Handle LARGE memory model for RTX.  */
+
+static rtx
+arc64_large_address (rtx base, rtx scratch)
+{
+  if (!TARGET_64BIT)
+    return base;
+
+  emit_insn (gen_rtx_SET (scratch, gen_rtx_HIGH (Pmode, base)));
+  return gen_rtx_LO_SUM (Pmode, scratch, copy_rtx (base));
+}
+
+/* Create a legitimate mov instruction for the given BASE (unspec).  */
+
+static rtx
+arc64_legit_unspec (rtx base)
+{
+  rtx t1, ret;
+  gcc_assert (can_create_pseudo_p ());
+
+  switch (arc64_cmodel_var)
+    {
+    case ARC64_CMODEL_SMALL:
+    case ARC64_CMODEL_MEDIUM:
+      return base;
+
+    case ARC64_CMODEL_LARGE:
+      ret = gen_reg_rtx (Pmode);
+      t1 = gen_reg_rtx (Pmode);
+      emit_insn (gen_rtx_SET (ret, arc64_large_address (base, t1)));
+      return ret;
+
+    default:
+      break;
+    }
+  gcc_unreachable ();
+}
+
+/* Return a legitimized TLS address to access ADDR, which is a
+   SYMBOL_REF.  */
+
+static rtx
+arc64_legitimize_tls_address (rtx addr)
+{
+  rtx t1, t2;
+  rtx base;
+  enum tls_model model = SYMBOL_REF_TLS_MODEL (addr);
+
+  gcc_assert (can_create_pseudo_p ());
+
+  switch (model)
+    {
+    case TLS_MODEL_LOCAL_DYNAMIC:
+    case TLS_MODEL_GLOBAL_DYNAMIC:
+      /* Gen:
+	 addl r0,pcl,@ADDR@tlsgd
+	 bl __tls_get_addr@plt  */
+      t2 = gen_reg_rtx (Pmode);
+      base = gen_sym_unspec (addr, ARC64_UNSPEC_TLS_GD);
+      t1 = arc64_legit_unspec (base);
+      arc64_tls_call (t2, t1);
+      return t2;
+
+    case TLS_MODEL_INITIAL_EXEC:
+      /* Gen:
+	 ldl  rx,[pcl,@ADDR@tlsie]
+	 addl rx,rx,r30  */
+      addr = arc64_legit_unspec (gen_sym_unspec (addr, ARC64_UNSPEC_TLS_IE));
+      addr = copy_to_mode_reg (Pmode, gen_const_mem (Pmode, addr));
+      return gen_rtx_PLUS (Pmode, addr, gen_rtx_REG (Pmode, R30_REGNUM));
+
+    case TLS_MODEL_LOCAL_EXEC:
+      /* Gen:
+	 addl rx,r30,@ADDR@tpoff  */
+      addr = arc64_legit_unspec (gen_sym_unspec (addr, ARC64_UNSPEC_TLS_OFF));
+      return gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, R30_REGNUM), addr);
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Helper function.  Returns a valid ARC64 RTX that represents the
+   argument X which is an invalid address RTX.  The argument SCRATCH
+   may be used as a temp when building affresses.  */
+
+static rtx
+arc64_legitimize_address_1 (rtx x, rtx scratch)
+{
+  rtx base, addend, t1;
+  bool is_local = true, ATTRIBUTE_UNUSED is_weak = false;
+
+  switch (GET_CODE (x))
+    {
+    case SYMBOL_REF:
+      is_local = SYMBOL_REF_DECL (x)
+	? targetm.binds_local_p (SYMBOL_REF_DECL (x))
+	: SYMBOL_REF_LOCAL_P (x);
+      is_weak = SYMBOL_REF_WEAK (x);
+      if (SYMBOL_REF_TLS_MODEL (x))
+	return arc64_legitimize_tls_address (x);
+      /* FALLTHRU */
+
+    case LABEL_REF:
+      t1 = can_create_pseudo_p () ? gen_reg_rtx (Pmode) : scratch;
+      gcc_assert (t1);
+      if (!flag_pic)
+	{
+	  switch (arc64_cmodel_var)
+	    {
+	    case ARC64_CMODEL_SMALL:
+	    case ARC64_CMODEL_MEDIUM:
+	      return x;
+	    default:
+	      return arc64_large_address (x, t1);
+	    }
+	}
+      else if (is_local)
+	{
+	  /* Local symbol, we can access it using a simple
+	     PCL-relative access.  */
+	  base = gen_sym_unspec (x, ARC64_UNSPEC_PCREL);
+	  return base;
+	}
+      else if (flag_pic)
+	{
+	  /* Global symbol, we access it via a load from the GOT
+	     (small model).  I.e., load pointer address via GOT, do
+	     the access of the datum using the loaded pointer.  */
+	  /* FIXME! to enable LARGE/small pic models make the above
+	     condition flag_pic == 1.  */
+	  base = gen_sym_unspec (x, ARC64_UNSPEC_GOT32);
+	  return gen_const_mem (Pmode, base);
+	}
+      else
+	{
+	  /* Global symbol, we access it via a load from the GOT
+	     (LARGE model).  */
+	  base = gen_sym_unspec (x, ARC64_UNSPEC_GOT);
+	  t1 = arc64_large_address (base, t1);
+	  return gen_const_mem (Pmode, t1);
+	}
+
+    case LO_SUM:
+      return x;
+
+    case CONST:
+      /* We expect something like: const (plus (symbol_ref) (const_int))
+	 A c-function which will generate this should be:
+	 int a;
+	 void b (void) { a = "" ? "" + 8 : 3; }
+       */
+      gcc_assert (can_create_pseudo_p ());
+      split_const (x, &base, &addend);
+      base = force_reg (Pmode, base);
+      if (addend == const0_rtx)
+	return base;
+      return gen_rtx_PLUS (Pmode, base, addend);
+
+    default:
+      break;
+    }
+
+  gcc_unreachable ();
+}
+
+
+/* Nested function support.  */
+
+/* Output assembler code for a block containing the constant parts of
+   a trampoline, leaving space for variable parts.  */
+
+static void
+arc64_asm_trampoline_template (FILE *f)
+{
+  if (!TARGET_64BIT)  /* ARC32 */
+    {
+      /* ld_s r12,[pcl,8]
+	 ld   r11,[pcl,12]
+	 j_s [r12]  */
+      asm_fprintf (f, "\tld_s\t%s,[pcl,8]\n", reg_names[R12_REGNUM]);
+      asm_fprintf (f, "\tld\t%s,[pcl,12]\n", reg_names[STATIC_CHAIN_REGNUM]);
+      asm_fprintf (f, "\tj_s\t[%s]\n", reg_names[R12_REGNUM]);
+    }
+  else /* TARGET_64BIT */
+    {
+      /* nop
+	 ldl  r12,[pcl,12]
+	 ldl  r11,[pcl,16]
+	 j    [r12] */
+      asm_fprintf (f, "\tnop\n");
+      asm_fprintf (f, "\tldl\t%s,[pcl,12]\n", reg_names[R12_REGNUM]);
+      asm_fprintf (f, "\tldl\t%s,[pcl,16]\n", reg_names[STATIC_CHAIN_REGNUM]);
+      asm_fprintf (f, "\tj\t[%s]\n", reg_names[R12_REGNUM]);
+    }
+  /* .(x)word function's address
+     .(x)word static chain value  */
+  assemble_aligned_integer (POINTER_BYTES, const0_rtx);
+  assemble_aligned_integer (POINTER_BYTES, const0_rtx);
+}
+
+/* Helper initialize trampoline.  */
+
+static void
+arc64_initialize_trampoline (rtx tramp, tree fndecl, rtx cxt)
+{
+  rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
+  const int fnaddr_offset = TRAMPOLINE_CODE_SIZE;
+  const int cxt_offset = TRAMPOLINE_CODE_SIZE + POINTER_BYTES;
+
+  emit_block_move (tramp, assemble_trampoline_template (),
+		   GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
+  emit_move_insn (adjust_address (tramp, Pmode, fnaddr_offset), fnaddr);
+  emit_move_insn (adjust_address (tramp, Pmode, cxt_offset), cxt);
+  /* FIXME: maybe it's good to use "maybe_emit_call_builtin___clear_cache"  */
+  emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
+		     LCT_NORMAL, VOIDmode, XEXP (tramp, 0), Pmode,
+		     plus_constant (Pmode, XEXP (tramp, 0), TRAMPOLINE_SIZE),
+		     Pmode);
+}
+
+/* Implement FUNCTION_OK_FOR_SIBCALL hook.  */
+
+static bool
+arc64_function_ok_for_sibcall (tree decl,
+			       tree exp ATTRIBUTE_UNUSED)
+{
+  /* Don't use sibcall for naked functions.  */
+  if (ARC_NAKED_P (cfun->machine->fn_type))
+    return false;
+
+  /* Don't use sibcall for ISR functions.  */
+  if (ARC_INTERRUPT_P (cfun->machine->fn_type))
+    return false;
+
+  if (decl && targetm.binds_local_p (decl))
+    return true;
+
+  /* We don't have an instruction to do what bl_s sym@plt34 does.  */
+  if (flag_pic == 2)
+    return false;
+
+  return true;
+}
+
+/* Implement INIT_LIBFUNCS hook.  */
+
+static void
+arc64_init_libfuncs (void)
+{
+  set_optab_libfunc (ffs_optab, SImode, "__ffssi2");
+  set_optab_libfunc (clz_optab, SImode, "__clzsi2");
+  set_optab_libfunc (ctz_optab, SImode, "__ctzsi2");
+  set_optab_libfunc (popcount_optab, SImode, "__popcountsi2");
+  set_optab_libfunc (parity_optab, SImode, "__paritysi2");
+}
+
+/* Helper evp_dump_stack_info.  */
+
+static void
+arc64_print_format_registers(FILE *stream,
+			     unsigned regno,
+			     enum machine_mode mode)
+{
+  unsigned int  j, nregs;
+  unsigned int ll = 0;
+
+  nregs = arc64_hard_regno_nregs (regno, mode);
+  /* Make sure BLKmode has a number of regs attached.  */
+  nregs = nregs ? nregs : 2;
+  for (j = regno + nregs; j > regno; j--)
+    {
+      asm_fprintf (stream,"%s", reg_names[j - 1]);
+      ll += strlen (reg_names[j - 1]);
+    }
+  asm_fprintf (stream,"`");
+  for (j = ll; j < 20; j++)
+    asm_fprintf (stream, " ");
+
+  asm_fprintf (stream,"\t(%d)\n",
+	   GET_MODE_SIZE (mode));
+}
+
+/* Place some comment into assembler stream describing the current
+   function.  */
+
+static void
+arc64_output_function_prologue (FILE *f)
+{
+  int regno, i;
+  struct arc64_frame *frame = &cfun->machine->frame;
+  tree parm = DECL_ARGUMENTS (current_function_decl);
+
+  asm_fprintf (f, "\t# args = %wd, pretend = %ld, frame = %wd\n",
+	       (HOST_WIDE_INT) crtl->args.size,
+	       frame->saved_varargs_size,
+	       (HOST_WIDE_INT) get_frame_size ());
+  asm_fprintf (f, "\t# frame_needed = %d, uses_anonymous_args = %d\n",
+	       frame_pointer_needed,
+	       cfun->machine->uses_anonymous_args);
+  asm_fprintf (f, "\t# size = %wd bytes\n",
+	       frame->frame_size);
+  asm_fprintf (f, "\t# + outargs = %wd bytes\n",
+	       frame->saved_outargs_size);
+  asm_fprintf (f, "\t# + locals  = %wd bytes\n",
+	       frame->saved_locals_size);
+  asm_fprintf (f, "\t# + regs    = %wd bytes\n",
+	       frame->saved_regs_size);
+  asm_fprintf (f, "\t# + varargs = %wd bytes\n",
+	       frame->saved_varargs_size);
+
+  if (crtl->calls_eh_return)
+    asm_fprintf (f, "\t# Calls __builtin_eh_return.\n");
+
+  for (regno = R0_REGNUM; regno <= F31_REGNUM; regno++)
+    if (frame->reg_offset[regno] != -1)
+      asm_fprintf (f, "\t# regsave[%s] => %ld\n", reg_names[regno],
+		   frame->reg_offset[regno]);
+
+  asm_fprintf(f, "\t# Parameters:\n");
+  while (parm)
+    {
+      rtx  rtl = DECL_INCOMING_RTL (parm);
+      if (rtl)
+	{
+	  asm_fprintf(f,"\t#  ");
+	  tree decl_name;
+	  decl_name = DECL_NAME (parm);
+	  if (decl_name != NULL && IDENTIFIER_POINTER (decl_name) != NULL)
+	    {
+	      const char *name =  lang_hooks.dwarf_name (parm, 0);
+	      if(name)
+		asm_fprintf(f, "%-20.20s =`", name);
+	      else
+		asm_fprintf(f, "N.A.`");
+	    }
+	  if (REG_P (rtl))
+	    {
+	      unsigned regno = REGNO (rtl);
+	      enum machine_mode mode = GET_MODE (rtl);
+	      arc64_print_format_registers (f, regno, mode);
+	    }
+	  else if (MEM_P (rtl))
+	    {
+	      rtx addr = XEXP (rtl, 0);
+	      long argPtrOfs = frame->frame_size -
+		arc64_initial_elimination_offset (ARG_POINTER_REGNUM,
+						  (frame_pointer_needed ?
+						   HARD_FRAME_POINTER_REGNUM :
+						   STACK_POINTER_REGNUM));
+	      if (GET_CODE (addr) == PLUS)
+		{
+		  rtx ofs = XEXP (addr, 1);
+		  gcc_assert (CONST_INT_P (ofs));
+		  argPtrOfs += INTVAL (ofs);
+		}
+	      asm_fprintf (f, "%s[%4ld]`                 (%d)\n",
+			   (frame_pointer_needed ? "fp" : "sp"),
+			   argPtrOfs,
+			   GET_MODE_SIZE (GET_MODE (rtl)));
+	    }
+	  else if (GET_CODE (rtl) == PARALLEL)
+	    {
+	      asm_fprintf (f,"xvec`                 (%d)\n",
+			   GET_MODE_SIZE (GET_MODE (rtl)));
+	      for (i = 0; i < XVECLEN (rtl, 0); i++)
+		{
+		  rtx xv = XEXP (XVECEXP (rtl, 0, i), 0);
+		  if (REG_P (xv))
+		    {
+		      unsigned regno = REGNO (xv);
+		      enum machine_mode mode = GET_MODE (xv);
+		      asm_fprintf (f,"#                         `");
+		      arc64_print_format_registers (f, regno, mode);
+		    }
+		}
+	    }
+	  else if (GET_CODE (rtl) == CONCAT)
+	    {
+	      rtx op0 = XEXP (rtl, 0);
+	      rtx op1 = XEXP (rtl, 1);
+	      if (REG_P (op0))
+		arc64_print_format_registers (f, REGNO (op0), GET_MODE (op0));
+	      else
+		asm_fprintf(f, "MEM`\n");
+	      asm_fprintf(f,"\t#\t\t\t+`");
+	      if (REG_P (op1))
+		arc64_print_format_registers (f, REGNO (op1), GET_MODE (op1));
+	      else
+		asm_fprintf(f, "MEM`\n");
+	    }
+	  else
+	    {
+	      asm_fprintf(f,"N.A.`\n");
+	    }
+	}
+      parm = TREE_CHAIN (parm);
+    }
+}
+
+/* Helper for INSN_COST.
+
+   Per Segher Boessenkool: rtx_costs computes the cost for any rtx (an
+   insn, a set, a set source, any random piece of one).  set_src_cost,
+   set_rtx_cost, etc. are helper functions that use that.
+
+   Those functions do not work for parallels.  Also, costs are not
+   additive like this simplified model assumes.  Also, more complex
+   backends tend to miss many cases in their rtx_costs function.
+
+   Many passes that want costs want to know the cost of a full insn.  Like
+   combine.  That's why I created insn_cost: it solves all of the above
+   problems.  */
+
+static int
+arc64_insn_cost (rtx_insn *insn, bool speed)
+{
+  int cost;
+
+  /* Needed for ifcvt.  */
+  if (GET_CODE (PATTERN (insn)) == USE)
+    return 1;
+
+  if (recog_memoized (insn) < 0)
+    return 0;
+
+
+  /* Use cost if provided.  */
+  cost = get_attr_cost (insn);
+  if (cost > 0)
+    return cost;
+
+  cost = pattern_cost (PATTERN (insn), speed);
+  return cost;
+#if 0
+  /* If optimizing for size, we want the insn size.  */
+  if (!speed)
+    return get_attr_length (insn);
+
+  /* Use cost if provided.  */
+  cost = get_attr_cost (insn);
+  if (cost > 0)
+    return cost;
+
+  /* For speed make a simple cost model: memory access is more
+     expensive than any other instruction.  */
+  enum attr_type type = get_attr_type (insn);
+
+  switch (type)
+    {
+    case TYPE_LD:
+    case TYPE_ST:
+      cost = COSTS_N_INSNS (2);
+      break;
+
+    default:
+      cost = COSTS_N_INSNS (1);
+      break;
+    }
+
+  return cost;
+#endif
+}
+
+/* Helper for arc64_short_access_p.  */
+
+static bool
+check_short_insn_register_p (rtx op, bool hclass_p)
+{
+  if (!REG_P (op))
+    return false;
+
+  return (REGNO (op) >= FIRST_PSEUDO_REGISTER
+	  || COMPACT_REG_P (REGNO (op))
+	  || (hclass_p && (REGNO (op) <= R30_REGNUM)));
+}
+
+/* Helper for arc64_short_access_p.  */
+
+static bool
+check_short_insn_constant_p (rtx op, machine_mode mode)
+{
+  HOST_WIDE_INT ival;
+
+  if (!CONST_INT_P (op))
+    return false;
+
+  ival = INTVAL (op);
+
+  /* Check u5, u6, u7 short immediates.  */
+  if (VERIFY_SHIFT (ival, ARC64LOG2 (GET_MODE_SIZE (mode)))
+      && UNSIGNED_INT5 (ival >> ARC64LOG2 (GET_MODE_SIZE (mode))))
+    return true;
+
+  return false;
+}
+
+/* Output code to add DELTA to the first argument, and then jump to
+   FUNCTION.  Used for C++ multiple inheritance.  */
+
+static void
+arc64_output_mi_thunk (FILE *file,
+		       tree thunk_fndecl,
+		       HOST_WIDE_INT delta,
+		       HOST_WIDE_INT vcall_offset,
+		       tree function)
+{
+  const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl));
+  rtx this_rtx, fnaddr, temp1;
+  rtx_insn *insn;
+
+  /* Pretend to be a post-reload pass while generating rtl.  */
+  reload_completed = 1;
+
+  /* Mark the end of the (empty) prologue.  */
+  emit_note (NOTE_INSN_PROLOGUE_END);
+
+  /* Determine if we can use a sibcall to call FUNCTION directly.  */
+  fnaddr = gen_rtx_MEM (FUNCTION_MODE, XEXP (DECL_RTL (function), 0));
+
+  /* We need one temporary register in some cases.  */
+  temp1 = gen_rtx_REG (Pmode, R12_REGNUM);
+
+  /* Find out which register contains the "this" pointer.  */
+  if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
+    this_rtx = gen_rtx_REG (Pmode, R1_REGNUM);
+  else
+    this_rtx = gen_rtx_REG (Pmode, R0_REGNUM);
+
+  /* Add DELTA to THIS_RTX.  */
+  if (delta != 0)
+    {
+      rtx offset = GEN_INT (delta);
+      /* FIXME! check if delta fits in 32bit immediate.  Also we can
+	 switch from an ADD to a SUB instruction.  */
+      gcc_assert (UNSIGNED_INT32 (delta) || SIGNED_INT32 (delta));
+      emit_insn (gen_rtx_SET (this_rtx,
+			      gen_rtx_PLUS (Pmode, this_rtx, offset)));
+    }
+
+  if (vcall_offset != 0)
+    {
+      rtx addr;
+
+      /* Set TEMP1 to *THIS_RTX.  */
+      emit_insn (gen_rtx_SET (temp1, gen_rtx_MEM (Pmode, this_rtx)));
+
+      /* Set ADDR to a legitimate address for *THIS_RTX + VCALL_OFFSET.  */
+      /* FIXME! check if vcall_offset fits in 32bit immediate. */
+      gcc_assert (UNSIGNED_INT32 (vcall_offset) || SIGNED_INT32 (vcall_offset));
+      addr = plus_constant (Pmode, temp1, vcall_offset);
+
+      /* Load the offset and add it to THIS_RTX.  */
+      emit_insn (gen_rtx_SET (temp1, gen_rtx_MEM (Pmode, addr)));
+      emit_insn (gen_add3_insn (this_rtx, this_rtx, temp1));
+    }
+
+  /* Jump to the target function.  */
+  insn = emit_call_insn (gen_sibcall (fnaddr, const0_rtx, const0_rtx));
+  SIBLING_CALL_P (insn) = 1;
+
+  /* Run just enough of rest_of_compilation.  This sequence was
+     "borrowed" from alpha.c.  */
+  insn = get_insns ();
+  split_all_insns_noflow ();
+  shorten_branches (insn);
+  assemble_start_function (thunk_fndecl, fnname);
+  final_start_function (insn, file, 1);
+  final (insn, file, 1);
+  final_end_function ();
+  assemble_end_function (thunk_fndecl, fnname);
+
+  /* Stop pretending to be a post-reload pass.  */
+  reload_completed = 0;
+}
+
+/* Helper INIT_EXPANDERS.  */
+
+static struct machine_function *
+arc64_init_machine_status (void)
+{
+  struct machine_function *machine;
+  machine = ggc_cleared_alloc<machine_function> ();
+  return machine;
+}
+
+static tree
+arc64_builtin_decl (unsigned id, bool initialize_p ATTRIBUTE_UNUSED)
+{
+  if (id < ARC64_BUILTIN_COUNT)
+    return arc_bdesc[id].fndecl;
+
+  return error_mark_node;
+}
+
+/* Transform UP into lowercase and write the result to LO.
+   You must provide enough space for LO.  Return LO.  */
+
+static char*
+arc64_tolower (char *lo, const char *up)
+{
+  char *lo0 = lo;
+
+  for (; *up; up++, lo++)
+    *lo = TOLOWER (*up);
+
+  *lo = '\0';
+
+  return lo0;
+}
+
+/* Helper for adding the builtins.  */
+static void
+arc64_init_builtins (void)
+{
+  tree void_ftype_usint_usint
+    = build_function_type_list (void_type_node, unsigned_type_node,
+				unsigned_type_node, NULL_TREE);
+  tree usint_ftype_usint
+    = build_function_type_list  (long_unsigned_type_node,
+				 unsigned_type_node, NULL_TREE);
+  tree void_ftype_void
+    = build_function_type_list (void_type_node, NULL_TREE);
+  tree void_ftype_usint
+    = build_function_type_list (void_type_node, unsigned_type_node,
+				NULL_TREE);
+  tree long_ftype_long
+    = build_function_type_list (long_long_integer_type_node,
+				long_long_integer_type_node, NULL_TREE);
+
+  tree void_ftype_long_long
+    = build_function_type_list (void_type_node, long_long_integer_type_node,
+				long_long_integer_type_node, NULL_TREE);
+
+  /* Add the builtins.  */
+#define DEF_BUILTIN(NAME, N_ARGS, TYPE, ICODE, MASK)			\
+  {									\
+    int id = ARC64_BUILTIN_ ## NAME;					\
+    const char *Name = "__builtin_arc_" #NAME;				\
+    char *name = (char*) alloca (1 + strlen (Name));			\
+									\
+    gcc_assert (id < ARC64_BUILTIN_COUNT);				\
+    if (MASK)								\
+      arc_bdesc[id].fndecl						\
+	= add_builtin_function (arc64_tolower(name, Name), TYPE, id,	\
+				BUILT_IN_MD, NULL, NULL_TREE);		\
+  }
+#include "builtins.def"
+#undef DEF_BUILTIN
+}
+
+/* Helper arc_expand_builtin, generates a pattern for the given icode
+   and arguments.  */
+
+static rtx_insn *
+apply_GEN_FCN (enum insn_code icode, rtx *arg)
+{
+  switch (insn_data[icode].n_generator_args)
+    {
+    case 0:
+      return GEN_FCN (icode) ();
+    case 1:
+      return GEN_FCN (icode) (arg[0]);
+    case 2:
+      return GEN_FCN (icode) (arg[0], arg[1]);
+    case 3:
+      return GEN_FCN (icode) (arg[0], arg[1], arg[2]);
+    case 4:
+      return GEN_FCN (icode) (arg[0], arg[1], arg[2], arg[3]);
+    case 5:
+      return GEN_FCN (icode) (arg[0], arg[1], arg[2], arg[3], arg[4]);
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Expand an expression EXP that calls a built-in function,
+   with result going to TARGET if that's convenient
+   (and in mode MODE if that's convenient).
+   SUBTARGET may be used as the target for computing one of EXP's operands.
+   IGNORE is nonzero if the value is to be ignored.  */
+
+static rtx
+arc64_expand_builtin (tree exp,
+		      rtx target,
+		      rtx subtarget ATTRIBUTE_UNUSED,
+		      machine_mode mode ATTRIBUTE_UNUSED,
+		      int ignore ATTRIBUTE_UNUSED)
+{
+  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+  unsigned int id = DECL_FUNCTION_CODE (fndecl);
+  const struct arc64_builtin_description *d = &arc_bdesc[id];
+  int i, j, n_args = call_expr_nargs (exp);
+  rtx pat = NULL_RTX;
+  rtx xop[5];
+  enum insn_code icode = d->icode;
+  machine_mode tmode = insn_data[icode].operand[0].mode;
+  int nonvoid;
+  tree arg0;
+  rtx op0;
+
+  if (id >= ARC64_BUILTIN_COUNT)
+    internal_error ("bad builtin fcode");
+
+  /* 1st part: Expand special builtins.  */
+  switch (id)
+    {
+    case ARC64_BUILTIN_NOP:
+      emit_insn (gen_nopv ());
+      return NULL_RTX;
+
+    case ARC64_BUILTIN_BRK:
+      gcc_assert (icode != 0);
+      emit_insn (GEN_FCN (icode) (const1_rtx));
+      return NULL_RTX;
+
+    case ARC64_BUILTIN_TRAP_S:
+      arg0 = CALL_EXPR_ARG (exp, 0);
+      fold (arg0);
+      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
+
+      gcc_assert (icode != 0);
+      emit_insn (GEN_FCN (icode) (op0));
+      return NULL_RTX;
+    default:
+      break;
+    }
+
+  /* 2nd part: Expand regular builtins.  */
+  if (icode == 0)
+    internal_error ("bad builtin fcode");
+
+  nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
+  j = 0;
+
+  if (nonvoid)
+    {
+      if (target == NULL_RTX
+	  || GET_MODE (target) != tmode
+	  || !insn_data[icode].operand[0].predicate (target, tmode))
+	{
+	  target = gen_reg_rtx (tmode);
+	}
+      xop[j++] = target;
+    }
+
+  gcc_assert (n_args <= 4);
+  for (i = 0; i < n_args; i++, j++)
+    {
+      tree arg = CALL_EXPR_ARG (exp, i);
+      machine_mode mode = insn_data[icode].operand[j].mode;
+      rtx op = expand_expr (arg, NULL_RTX, mode, EXPAND_NORMAL);
+      machine_mode opmode = GET_MODE (op);
+
+      if (CONST_INT_P (op))
+	opmode = mode;
+
+      if ((opmode == SImode) && (mode == HImode))
+	{
+	  opmode = HImode;
+	  op = gen_lowpart (HImode, op);
+	}
+
+      /* In case the insn wants input operands in modes different from
+	 the result, abort.  */
+      gcc_assert (opmode == mode || opmode == VOIDmode);
+
+      if (!insn_data[icode].operand[i + nonvoid].predicate (op, mode))
+	op = copy_to_mode_reg (mode, op);
+
+      xop[j] = op;
+    }
+
+  pat = apply_GEN_FCN (icode, xop);
+  if (pat == NULL_RTX)
+    return NULL_RTX;
+
+  emit_insn (pat);
+
+  if (nonvoid)
+    return target;
+  else
+    return const0_rtx;
+}
+
+/* A callback for the hw-doloop pass.  Called when a loop we have discovered
+   turns out not to be optimizable; we have to split the loop_end pattern into
+   a subtract and a test.  */
+
+static void
+hwloop_fail (hwloop_info loop)
+{
+  rtx test;
+  rtx insn;
+
+  if (TARGET_64BIT)
+    emit_insn_before (gen_adddi_cmp0 (loop->iter_reg,
+				      loop->iter_reg,
+				      constm1_rtx),
+		      loop->loop_end);
+  else
+    emit_insn_before (gen_addsi_cmp0 (loop->iter_reg,
+				      loop->iter_reg,
+				      constm1_rtx),
+		      loop->loop_end);
+
+  test = gen_rtx_NE (VOIDmode, gen_rtx_REG (CC_ZNmode, CC_REGNUM), const0_rtx);
+  test = gen_rtx_IF_THEN_ELSE (VOIDmode, test,
+			       gen_rtx_LABEL_REF (Pmode, loop->start_label),
+			       pc_rtx);
+  insn = emit_jump_insn_before (gen_rtx_SET (pc_rtx, test),
+				loop->loop_end);
+
+  JUMP_LABEL (insn) = loop->start_label;
+  LABEL_NUSES (loop->start_label)++;
+  delete_insn (loop->loop_end);
+}
+
+/* Optimize LOOP.  We just are checking that the loop isn't too long,
+   returns true if so.  Return true if successful, false if the loop
+   should be marked bad.  If it returns false, the FAIL function is
+   called.  */
+
+static bool
+hwloop_optimize (hwloop_info loop)
+{
+  unsigned int length;
+
+  /* Call shorten_branches to calculate the insn lengths.  */
+  shorten_branches (get_insns());
+
+  if (!INSN_ADDRESSES_SET_P ())
+    {
+      fprintf (dump_file, ";; loop %d has an unknown length\n", loop->loop_no);
+      return false;
+    }
+
+  length = INSN_ADDRESSES (INSN_UID (loop->loop_end))
+    - INSN_ADDRESSES (INSN_UID (loop->start_label));
+  loop->length = length;
+  if (dump_file)
+    fprintf (dump_file, ";; loop %d with length %d\n", loop->loop_no,
+	     loop->length);
+  if (loop->length > MAX_LOOP_LENGTH
+      || loop->length < MIN_LOOP_LENGTH)
+    {
+      if (dump_file)
+	fprintf (dump_file, ";; loop %d is too long\n", loop->loop_no);
+      return false;
+    }
+  if (loop->length == 0)
+    {
+      if (dump_file)
+	fprintf (dump_file, ";; loop %d is empty\n", loop->loop_no);
+      return false;
+    }
+
+  return true;
+}
+
+/* A callback for the hw-doloop pass.  This function examines INSN; if
+   it is a loop_end pattern we recognize, return the reg rtx for the
+   loop counter.  Otherwise, return NULL_RTX.  */
+
+static rtx
+hwloop_pattern_reg (rtx_insn *insn)
+{
+  rtx reg;
+
+  if (!JUMP_P (insn)
+      || (TARGET_64BIT && (recog_memoized (insn) != CODE_FOR_dbnzdi))
+      || (!TARGET_64BIT && (recog_memoized (insn) != CODE_FOR_dbnzsi)))
+    return NULL_RTX;
+
+  reg = SET_DEST (XVECEXP (PATTERN (insn), 0, 1));
+  if (!REG_P (reg))
+    return NULL_RTX;
+  return reg;
+}
+
+static struct hw_doloop_hooks arc64_doloop_hooks =
+{
+  hwloop_pattern_reg,
+  hwloop_optimize,
+  hwloop_fail
+};
+
+/* Machine specific reorg step.  */
+static void
+arc64_reorg (void)
+{
+  compute_bb_for_insn ();
+  df_analyze ();
+  reorg_loops (true, &arc64_doloop_hooks);
+
+  /* Search MAC instructions and remove the super-flu move from
+     accumulator to a register.  Hence, we try to repair what we do in
+     madd expands or in mac* splits.  */
+  for (rtx_insn *insn = get_insns (); insn; insn = next_real_insn (insn))
+    {
+      rtx op0, op1, op2, tmp;
+      enum insn_code icode = CODE_FOR_nothing;
+      machine_mode mode = E_VOIDmode;
+
+      if (!INSN_P (insn))
+	continue;
+
+      /* 1st find the MAC instruction with null (accumulator)
+	 output.  */
+      switch (INSN_CODE (insn))
+	{
+	case CODE_FOR_umachi0:
+	  icode = CODE_FOR_umachi;
+	  mode = E_SImode;
+	  break;
+
+	case CODE_FOR_machi0:
+	  icode = CODE_FOR_machi;
+	  mode = E_SImode;
+	  break;
+
+	case CODE_FOR_umacd0:
+	  icode = CODE_FOR_umacd;
+	  mode = E_DImode;
+	  break;
+
+	case CODE_FOR_macd0:
+	  icode = CODE_FOR_macd;
+	  mode = E_DImode;
+	  break;
+
+	case CODE_FOR_macsi0:
+	  icode = CODE_FOR_macsi;
+	  mode = E_SImode;
+	  break;
+
+	case CODE_FOR_dmach0:
+	  icode = CODE_FOR_dmach;
+	  mode = E_HImode;
+	  break;
+
+	default:
+	  continue;
+	}
+
+      gcc_assert (REGNO (SET_DEST (PATTERN (insn))) == R58_REGNUM);
+      rtx_insn *nxt = next_real_insn (insn);
+
+      /* 2nd Check if it is a move instruction.  */
+      tmp = PATTERN (nxt);
+      if (GET_CODE (tmp) != SET
+	  || (GET_CODE (SET_SRC (tmp)) != REG)
+	  || (GET_CODE (SET_DEST (tmp)) != REG))
+	continue;
+
+      op0 = SET_DEST (tmp);
+      op1 = SET_SRC (tmp);
+      if (REGNO (op1) != R58_REGNUM)
+	continue;
+
+      /* Make the new MAC instruction.  */
+      switch (INSN_CODE (insn))
+	{
+	case CODE_FOR_umachi0:
+	case CODE_FOR_umacd0:
+	case CODE_FOR_machi0:
+	case CODE_FOR_macd0:
+	  if (!TARGET_64BIT && ((REGNO (op0) & 1) != 0))
+	    continue;
+	  tmp = SET_SRC (PATTERN (insn));
+	  op1 = XEXP (XEXP (XEXP (tmp, 0), 0), 0);
+	  op2 = XEXP (XEXP (XEXP (tmp, 0), 1), 0);
+	  break;
+
+	case CODE_FOR_dmach0:
+	case CODE_FOR_macsi0:
+	  tmp = SET_SRC (PATTERN (insn));
+	  op1 = XEXP (XEXP (tmp, 0), 0);
+	  op2 = XEXP (XEXP (tmp, 0), 1);
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+
+      emit_insn_before (GEN_FCN (icode) (op0, op1, op2,
+					 gen_rtx_REG (mode, R58_REGNUM)),
+			insn);
+
+      /* Remove the old MAC and MOV instruction.  */
+      set_insn_deleted (insn);
+      set_insn_deleted (nxt);
+    }
+}
+
+/* Expand a compare and swap pattern.  */
+
+static void
+emit_unlikely_jump (rtx insn)
+{
+  rtx_insn *jump = emit_jump_insn (insn);
+  add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
+}
+
+/* Expand code to perform a 8 or 16-bit compare and swap by doing
+   32-bit compare and swap on the word containing the byte or
+   half-word.  The difference between a weak and a strong CAS is that
+   the weak version may simply fail.  The strong version relies on two
+   loops, one checks if the SCOND op is succsfully or not, the other
+   checks if the 32 bit accessed location which contains the 8 or 16
+   bit datum is not changed by other thread.  The first loop is
+   implemented by the atomic_compare_and_swapsdi_1 pattern.  The second
+   loops is implemented by this routine.  */
+
+static void
+arc_expand_compare_and_swap_qh (rtx bool_result, rtx result, rtx mem,
+				rtx oldval, rtx newval, rtx weak,
+				rtx mod_s, rtx mod_f)
+{
+  rtx addr1 = force_reg (Pmode, XEXP (mem, 0));
+  rtx addr = gen_reg_rtx (Pmode);
+  rtx off = gen_reg_rtx (SImode);
+  rtx oldv = gen_reg_rtx (SImode);
+  rtx newv = gen_reg_rtx (SImode);
+  rtx oldvalue = gen_reg_rtx (SImode);
+  rtx newvalue = gen_reg_rtx (SImode);
+  rtx res = gen_reg_rtx (SImode);
+  rtx resv = gen_reg_rtx (SImode);
+  rtx memsi, val, mask, end_label, loop_label, cc, x;
+  machine_mode mode;
+  bool is_weak = (weak != const0_rtx);
+
+  /* Truncate the address.  */
+  emit_insn (gen_rtx_SET (addr,
+			  gen_rtx_AND (Pmode, addr1, GEN_INT (-4))));
+
+  /* Compute the datum offset.  */
+
+  emit_insn (gen_rtx_SET (off, gen_rtx_AND (SImode,
+					    gen_lowpart(SImode, addr1),
+					    GEN_INT (3))));
+
+  /* Normal read from truncated address.  */
+  memsi = gen_rtx_MEM (SImode, addr);
+  set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER);
+  MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem);
+
+  val = copy_to_reg (memsi);
+
+  /* Convert the offset in bits.  */
+  emit_insn (gen_rtx_SET (off,
+			  gen_rtx_ASHIFT (SImode, off, GEN_INT (3))));
+
+  /* Get the proper mask.  */
+  if (GET_MODE (mem) == QImode)
+    mask = force_reg (SImode, GEN_INT (0xff));
+  else
+    mask = force_reg (SImode, GEN_INT (0xffff));
+
+  emit_insn (gen_rtx_SET (mask,
+			  gen_rtx_ASHIFT (SImode, mask, off)));
+
+  /* Prepare the old and new values.  */
+  emit_insn (gen_rtx_SET (val,
+			  gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
+				       val)));
+
+  oldval = gen_lowpart (SImode, oldval);
+  emit_insn (gen_rtx_SET (oldv,
+			  gen_rtx_ASHIFT (SImode, oldval, off)));
+
+  newval = gen_lowpart_common (SImode, newval);
+  emit_insn (gen_rtx_SET (newv,
+			  gen_rtx_ASHIFT (SImode, newval, off)));
+
+  emit_insn (gen_rtx_SET (oldv,
+			  gen_rtx_AND (SImode, oldv, mask)));
+
+  emit_insn (gen_rtx_SET (newv,
+			  gen_rtx_AND (SImode, newv, mask)));
+
+  if (!is_weak)
+    {
+      end_label = gen_label_rtx ();
+      loop_label = gen_label_rtx ();
+      emit_label (loop_label);
+    }
+
+  /* Make the old and new values.  */
+  emit_insn (gen_rtx_SET (oldvalue,
+			  gen_rtx_IOR (SImode, oldv, val)));
+
+  emit_insn (gen_rtx_SET (newvalue,
+			  gen_rtx_IOR (SImode, newv, val)));
+
+  /* Try an 32bit atomic compare and swap.  It clobbers the CC
+     register.  */
+  if (GET_MODE (mem) == SImode)
+    emit_insn (gen_atomic_compare_and_swapsi_1 (res, memsi, oldvalue, newvalue,
+						weak, mod_s, mod_f));
+  else /* DImode */
+    emit_insn (gen_atomic_compare_and_swapdi_1 (res, memsi, oldvalue, newvalue,
+						weak, mod_s, mod_f));
+
+  /* Regardless of the weakness of the operation, a proper boolean
+     result needs to be provided.  */
+  x = gen_rtx_REG (CC_Zmode, CC_REGNUM);
+  x = gen_rtx_EQ (SImode, x, const0_rtx);
+  emit_insn (gen_rtx_SET (bool_result, x));
+
+  if (!is_weak)
+    {
+      /* Check the results: if the atomic op is successfully the goto
+	 to end label.  */
+      x = gen_rtx_REG (CC_Zmode, CC_REGNUM);
+      x = gen_rtx_EQ (VOIDmode, x, const0_rtx);
+      x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
+				gen_rtx_LABEL_REF (Pmode, end_label), pc_rtx);
+      emit_jump_insn (gen_rtx_SET (pc_rtx, x));
+
+      /* Wait for the right moment when the accessed 32-bit location
+	 is stable.  */
+      emit_insn (gen_rtx_SET (resv,
+			      gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
+					   res)));
+      mode = SELECT_CC_MODE (NE, resv, val);
+      cc = gen_rtx_REG (mode, CC_REGNUM);
+      emit_insn (gen_rtx_SET (cc, gen_rtx_COMPARE (mode, resv, val)));
+
+      /* Set the new value of the 32 bit location, proper masked.  */
+      emit_insn (gen_rtx_SET (val, resv));
+
+      /* Try again if location is unstable.  Fall through if only
+	 scond op failed.  */
+      x = gen_rtx_NE (VOIDmode, cc, const0_rtx);
+      x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
+				gen_rtx_LABEL_REF (Pmode, loop_label), pc_rtx);
+      emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
+
+      emit_label (end_label);
+    }
+
+  /* End: proper return the result for the given mode.  */
+  emit_insn (gen_rtx_SET (res,
+			  gen_rtx_AND (SImode, res, mask)));
+
+  emit_insn (gen_rtx_SET (res,
+			  gen_rtx_LSHIFTRT (SImode, res, off)));
+
+  emit_move_insn (result, gen_lowpart (GET_MODE (result), res));
+}
+
+
+/* This hook may conditionally modify five variables: fixed_regs,
+   call_used_regs, global_regs, reg_names and reg_class_contents.  */
+
+static void
+arc64_conditional_register_usage (void)
+{
+  int regno;
+
+  /* When having floating point, we enable the registers to be used by compiler
+     and set the appropriate call used registers (i.e., f0-f15).  */
+  if (ARC64_HAS_FP_BASE)
+    {
+      for (regno = F0_REGNUM; regno <= F31_REGNUM; regno++)
+	{
+	  fixed_regs[regno] = 0;
+	  call_used_regs[regno] = (regno < F16_REGNUM) ? 1 : 0;
+	}
+    }
+}
+
+/* Implement TARGET_LIBGCC_FLOATING_POINT_MODE_SUPPORTED_P - return TRUE
+   if MODE is HFmode, and punt to the generic implementation otherwise.  */
+
+static bool
+arc64_libgcc_floating_mode_supported_p (scalar_float_mode mode)
+{
+  return (mode == HFmode
+	  ? ARC64_HAS_FPUH
+	  : default_libgcc_floating_mode_supported_p (mode));
+}
+
+/* Implement TARGET_SCALAR_MODE_SUPPORTED_P - return TRUE
+   if MODE is HFmode, and punt to the generic implementation otherwise.  */
+
+static bool
+arc64_scalar_mode_supported_p (scalar_mode mode)
+{
+  return (mode == HFmode
+	  ? ARC64_HAS_FPUH
+	  : default_scalar_mode_supported_p (mode));
+}
+
+/* Implements target hook vector_mode_supported_p.  */
+
+static bool
+arc64_vector_mode_supported_p (machine_mode mode)
+{
+  switch (mode)
+    {
+      /* 32-bit fp SIMD vectors.  */
+    case E_V2HFmode:
+      return ARC64_VFP_32;
+      /* 64-bit fp SIMD vectors.  */
+    case E_V4HFmode:
+    case E_V2SFmode:
+      return ARC64_VFP_64;
+      /* 128-bit fp SIMD vectors.  */
+    case E_V8HFmode:
+    case E_V4SFmode:
+    case E_V2DFmode:
+      return ARC64_VFP_128;
+
+      /* 32-bit SIMD vectors.  */
+    case E_V2HImode:
+      /* 64-bit SIMD vectors.  */
+    case E_V4HImode:
+    case E_V2SImode:
+      return TARGET_SIMD;
+
+    default:
+      return false;
+    }
+}
+
+/* Implements target hook TARGET_VECTORIZE_PREFERRED_SIMD_MODE.  */
+
+static machine_mode
+arc64_preferred_simd_mode (scalar_mode mode)
+{
+  switch (mode)
+    {
+    case E_HFmode:
+      if (ARC64_VFP_128)
+	return V8HFmode;
+      if (ARC64_VFP_64)
+	return V4HFmode;
+      if (ARC64_VFP_32)
+	return V2HFmode;
+      return word_mode;
+
+    case E_SFmode:
+      if (ARC64_VFP_128)
+	return V4SFmode;
+      if (ARC64_VFP_64)
+	return V2SFmode;
+      return word_mode;
+
+    case E_DFmode:
+      if (ARC64_VFP_128)
+	return V2DFmode;
+      return word_mode;
+
+    case E_HImode:
+      return TARGET_SIMD ? V4HImode : word_mode;
+    case E_SImode:
+      return TARGET_SIMD ? V2SImode : word_mode;
+
+    default:
+      return word_mode;
+    }
+}
+
+/* Implements target hook
+   TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES.  */
+
+static unsigned int
+arc64_autovectorize_vector_modes (vector_modes *modes, bool)
+{
+  if (ARC64_VFP_128)
+    {
+      modes->quick_push (V8HFmode);
+      modes->quick_push (V4SFmode);
+      modes->quick_push (V2DFmode);
+    }
+  else if (ARC64_VFP_64)
+    {
+      modes->quick_push (V4HFmode);
+      modes->quick_push (V2SFmode);
+    }
+  else if (ARC64_VFP_32)
+    modes->quick_push (V2HFmode);
+
+  if (TARGET_SIMD)
+    {
+      modes->quick_push (V4HImode);
+      modes->quick_push (V2SImode);
+    }
+  return 0;
+}
+
+/* Vectorization costs.  */
+static int
+arc64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
+				  tree vectype,
+				  int misalign ATTRIBUTE_UNUSED)
+{
+  unsigned elements;
+
+  switch (type_of_cost)
+    {
+    case scalar_stmt:
+      return 1;
+
+    case scalar_load:
+      return 1;
+
+    case scalar_store:
+      return 1;
+
+    case vector_stmt:
+      return 1; /* fp operations are more efficient than int.  */
+
+    case vector_load:
+      return 1;
+
+    case vector_store:
+      return 1;
+
+    case vec_to_scalar:
+      return 1; /* We have extract instructions.  */
+
+    case scalar_to_vec:
+      return 1; /* fp is more efficient than int.  */
+
+    case unaligned_load:
+    case vector_gather_load:
+      return 1; /* Maybe I need to reflect unaligned flag here.  */
+
+    case unaligned_store:
+    case vector_scatter_store:
+      return 1; /* Likewise.  */
+
+    case cond_branch_taken:
+      return 3; /* A jump is always expensive.  */
+
+    case cond_branch_not_taken:
+      return 1;
+
+    case vec_perm:
+      return 1; /* We don't really have vec_perm.  */
+
+    case vec_promote_demote:
+      return 1;
+
+    case vec_construct:
+      elements = estimated_poly_value (TYPE_VECTOR_SUBPARTS (vectype));
+      return elements / 2;
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Return a new RTX holding the result of moving POINTER forward by
+   AMOUNT bytes.  */
+
+static rtx
+arc64_move_pointer (rtx pointer, poly_int64 amount)
+{
+  rtx next = plus_constant (Pmode, XEXP (pointer, 0), amount);
+
+  return adjust_automodify_address (pointer, GET_MODE (pointer),
+				    next, amount);
+}
+
+/* Return a new RTX holding the result of moving POINTER forward by the
+   size of the mode it points to.  */
+
+static rtx
+arc64_progress_pointer (rtx pointer)
+{
+  return arc64_move_pointer (pointer, GET_MODE_SIZE (GET_MODE (pointer)));
+}
+
+/* Copy one MODE sized block from SRC to DST, then progress SRC and DST by
+   MODE bytes.  */
+
+static void
+arc64_copy_one_block_and_progress_pointers (rtx *src, rtx *dst,
+					    machine_mode mode)
+{
+  rtx reg = gen_reg_rtx (mode);
+
+  /* "Cast" the pointers to the correct mode.  */
+  *src = adjust_address (*src, mode, 0);
+  *dst = adjust_address (*dst, mode, 0);
+  /* Emit the memcpy.  */
+  emit_move_insn (reg, *src);
+  emit_move_insn (*dst, reg);
+  /* Move the pointers forward.  */
+  *src = arc64_progress_pointer (*src);
+  *dst = arc64_progress_pointer (*dst);
+}
+
+/* Moving f regs to r regs is not a very good idea. */
+static int
+arc64_register_move_cost (machine_mode,
+			  reg_class_t from_class, reg_class_t to_class)
+{
+  if ((from_class == FP_REGS && to_class == GENERAL_REGS)
+      || (to_class == FP_REGS && from_class == GENERAL_REGS))
+    return 200;
+  return 2;
+}
+
+/* Check/emit vector duplicate instructions.  */
+
+static bool
+arc64_simd_dup (struct e_vec_perm_d *d)
+{
+  machine_mode vmode = d->vmode;
+  HOST_WIDE_INT elt;
+  rtx t0, parallel, select;
+  rtx in0 = d->op0;
+  rtx out = d->target;
+
+  if (!TARGET_64BIT
+      || !d->one_vector_p
+      || vmode == E_V2HImode
+      || d->perm.encoding ().encoded_nelts () != 1
+      || !d->perm[0].is_constant (&elt)
+      /* elt is zero, then the vec_dup pattern does as good as we do here.  */
+      || elt == 0)
+    return false;
+
+  if (d->testing_p)
+    return true;
+
+  switch (vmode)
+    {
+    case E_V8HFmode:
+    case E_V4HFmode:
+    case E_V2HFmode:
+    case E_V2SFmode:
+    case E_V4SFmode:
+      if (elt != 0)
+	{
+	  t0 = gen_reg_rtx (GET_MODE_INNER (vmode));
+	  parallel = gen_rtx_PARALLEL (vmode, gen_rtvec (1, GEN_INT (elt)));
+	  select = gen_rtx_VEC_SELECT (GET_MODE_INNER (vmode), in0, parallel);
+	  emit_set_insn (t0, select);
+	  emit_set_insn (out, gen_rtx_VEC_DUPLICATE (vmode, t0));
+	  return true;
+	}
+
+      /* FALLTHRU */
+    case E_V2DFmode:
+    case E_V2SImode:
+      parallel = gen_rtx_PARALLEL (vmode, gen_rtvec (1, GEN_INT (elt)));
+      select = gen_rtx_VEC_SELECT (GET_MODE_INNER (vmode), in0, parallel);
+      emit_set_insn (out, gen_rtx_VEC_DUPLICATE (vmode, select));
+      return true;
+
+    case E_V4HImode:
+      if (elt == 0)
+	{
+	  t0 = gen_reg_rtx (vmode);
+	  emit_insn (gen_arc64_sel_lane2_0v4hi (t0, in0, in0));
+	  emit_insn (gen_arc64_sel_lane2_0v4hi (out, t0, t0));
+	  return true;
+	}
+      else if (elt == 1)
+	{
+	  t0 = gen_reg_rtx (vmode);
+	  emit_insn (gen_arc64_sel_lane3_1v4hi (t0, in0, in0));
+	  emit_insn (gen_arc64_sel_lane2_0v4hi (out, t0, t0));
+	  return true;
+	}
+      else if (elt == 2)
+	{
+	  t0 = gen_reg_rtx (vmode);
+	  emit_insn (gen_arc64_sel_lane2_0v4hi (t0, in0, in0));
+	  emit_insn (gen_arc64_sel_lane3_1v4hi (out, t0, t0));
+	  return true;
+	}
+      else if (elt == 3)
+	{
+	  t0 = gen_reg_rtx (vmode);
+	  emit_insn (gen_arc64_sel_lane3_1v4hi (t0, in0, in0));
+	  emit_insn (gen_arc64_sel_lane3_1v4hi (out, t0, t0));
+	  return true;
+	}
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  gcc_unreachable ();
+}
+
+/* Recognize VPACK instructions.  */
+
+static bool
+arc64_simd_vpack (struct e_vec_perm_d *d)
+{
+  HOST_WIDE_INT odd;
+  poly_uint64 nelt = d->perm.length ();
+  rtx out, in0, in1;
+  machine_mode vmode = d->vmode;
+
+  if (FLOAT_MODE_P (vmode)
+      || !d->perm[0].is_constant (&odd)
+      || (odd != 0 && odd != 1)
+      || !d->perm.series_p (0, 1, odd, 2)
+      || !d->perm.series_p (2, 1, nelt + odd, 2))
+    return false;
+
+  switch (vmode)
+    {
+    case E_V2SImode:
+    case E_V4HImode:
+      if (!TARGET_64BIT)
+	return false;
+      break;
+
+    case E_V2HImode:
+      break;
+
+    default:
+      return false;
+    }
+
+  /* Success!  */
+  if (d->testing_p)
+    return true;
+
+  in0 = d->op0;
+  in1 = d->op1;
+  out = d->target;
+  switch (vmode)
+    {
+    case E_V4HImode:
+      if (odd)
+	emit_insn (gen_arc64_sel_lane3_1v4hi (out, in0, in1));
+      else
+	emit_insn (gen_arc64_sel_lane2_0v4hi (out, in0, in1));
+      break;
+
+    case E_V2SImode:
+      if (odd)
+	emit_insn (gen_arc64_sel_lane1_v2si (out, in0, in1));
+      else
+	emit_insn (gen_arc64_sel_lane0_v2si (out, in0, in1));
+      break;
+
+    case E_V2HImode:
+      if (odd)
+	emit_insn (gen_arc64_sel_lane1_v2hi (out, in0, in1));
+      else
+	emit_insn (gen_arc64_sel_lane0_v2hi (out, in0, in1));
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+  return true;
+}
+
+/* Reverse vector, recognize swapl and vfexch instructions.  */
+
+static bool
+arc64_simd_swapl (struct e_vec_perm_d *d)
+{
+  poly_uint64 nelt = d->perm.length ();
+  machine_mode vmode = d->vmode;
+  rtx t0, t1, t2, out, in0;
+  rtx src;
+  unsigned int unspec;
+
+  if (GET_MODE_UNIT_SIZE (vmode) > 4
+      || !TARGET_64BIT)
+    return false;
+
+  if (!d->one_vector_p)
+    return false;
+
+  if (!d->perm.series_p (0, 1, nelt - 1, -1))
+    return false;
+
+  /* Success! */
+  if (d->testing_p)
+    return true;
+
+  in0 = d->op0;
+  out = d->target;
+  t0 = d->target;
+  t1 = d->target;
+
+  switch (vmode)
+    {
+    case E_V4HImode:
+      t0 = gen_reg_rtx (vmode);
+      t1 = gen_reg_rtx (vmode);
+      t2 = gen_reg_rtx (vmode);
+      emit_insn (gen_arc64_swapl (t0, in0));
+      emit_insn (gen_arc64_swapv4hi (t1, in0));
+      emit_insn (gen_arc64_swapv4hi (t2, t0));
+      emit_insn (gen_arc64_swp_lane0_v4hi (out, t2, t1));
+      break;
+
+    case E_V2SImode:
+      emit_insn (gen_arc64_swaplv2si (out, in0));
+      break;
+
+    case E_V2HImode:
+      emit_insn (gen_arc64_swapv2hi (out, in0));
+      break;
+
+    case E_V8HFmode:
+      t1 = gen_reg_rtx (vmode);
+      /* Fall through.  */
+    case E_V4SFmode:
+      t0 = gen_reg_rtx (vmode);
+      /* Fall through.  */
+    case E_V2DFmode:
+      unspec = ARC64_UNSPEC_DEXCH;
+      src = gen_rtx_UNSPEC (vmode, gen_rtvec (1, in0), unspec);
+      emit_set_insn (t0, src);
+      if (vmode == E_V2DFmode)
+	return true;
+
+      unspec = ARC64_UNSPEC_SEXCH;
+      src = gen_rtx_UNSPEC (vmode, gen_rtvec (1, t0), unspec);
+      emit_set_insn (t1, src);
+      if (vmode == E_V4SFmode)
+	return true;
+
+      unspec = ARC64_UNSPEC_HEXCH;
+      src = gen_rtx_UNSPEC (vmode, gen_rtvec (1, t1), unspec);
+      emit_set_insn (out, src);
+      break;
+
+    case E_V4HFmode:
+      t1 = gen_reg_rtx (vmode);
+      /* Fall through.  */
+    case E_V2SFmode:
+      unspec = ARC64_UNSPEC_SEXCH;
+      src = gen_rtx_UNSPEC (vmode, gen_rtvec (1, in0), unspec);
+      emit_set_insn (t1, src);
+      if (vmode == E_V2SFmode)
+	return true;
+      in0 = t1;
+      /* Fall through.  */
+
+    case E_V2HFmode:
+      unspec = ARC64_UNSPEC_HEXCH;
+      src = gen_rtx_UNSPEC (vmode, gen_rtvec (1, in0), unspec);
+      emit_set_insn (out, src);
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+  return true;
+}
+
+/* Detect cases when we can use swap instruction.  */
+
+static bool
+arc64_simd_swap (struct e_vec_perm_d *d)
+{
+  rtx t0, t1, t2, out, in0;
+  machine_mode vmode = d->vmode;
+
+  if (vmode != E_V4HImode
+      || !TARGET_64BIT)
+    return false;
+
+  if (!d->one_vector_p)
+    return false;
+
+  if (!d->perm.series_p (0, 2, 1, 2)
+      || !d->perm.series_p (1, 2, 0, 2))
+    return false;
+
+  /* Success! */
+  if (d->testing_p)
+    return true;
+
+  in0 = d->op0;
+  out = d->target;
+
+  t0 = gen_reg_rtx (vmode);
+  t1 = gen_reg_rtx (vmode);
+  t2 = gen_reg_rtx (vmode);
+  emit_insn (gen_arc64_swapl (t0, in0));
+  emit_insn (gen_arc64_swapv4hi (t1, in0));
+  emit_insn (gen_arc64_swapv4hi (t2, t0));
+  emit_insn (gen_arc64_swp_lane0_v4hi (out, t1, t2));
+  return true;
+}
+
+/* Detect cases when we can use vapck2wl for 4xVectors.  */
+
+static bool
+arc64_simd_vpack2wl (struct e_vec_perm_d *d)
+{
+  machine_mode vmode = d->vmode;
+
+  if (vmode != E_V4HImode
+      || !TARGET_64BIT)
+    return false;
+
+  if (d->perm[0] != 0
+      || d->perm[1] != 1
+      || (d->perm[2] != 4 && d->perm[2] != 0)
+      || (d->perm[3] != 5 && d->perm[3] != 1))
+    return false;
+
+  /* Success! */
+  if (d->testing_p)
+    return true;
+
+  emit_insn (gen_arc64_swp_lane0_v4hi (d->target, d->op0, d->op1));
+  return true;
+}
+
+static bool
+arc64_simd_vpack2wm (struct e_vec_perm_d *d)
+{
+  machine_mode vmode = d->vmode;
+
+  if (vmode != E_V4HImode
+      || !TARGET_64BIT)
+    return false;
+
+  if (d->perm[0] != 2
+      || d->perm[1] != 3
+      || (d->perm[2] != 6 && d->perm[2] != 2)
+      || (d->perm[3] != 7 && d->perm[3] != 3))
+    return false;
+
+  /* Success! */
+  if (d->testing_p)
+    return true;
+
+  emit_insn (gen_arc64_swp_lane1_v4hi (d->target, d->op0, d->op1));
+  return true;
+}
+
+/* Recognize patterns for {H,S,D}EXCH insns, which reverse elements:
+   VFHEXCH (v2hf): h0 h1
+   VFHEXCH (v4hf): h2 h3 h0 h1
+   VFHEXCH (v8hf): h6 h7 h4 h5 h2 h3 h0 h1
+
+   VFSEXCH (v4hf): h1h0 h3h2
+   VFSEXCH (v8hf): h5h4 h7h6 h1h0 h3h2
+
+   VFDEXCH (v8hf): h3h2h1h0 h7h6h5h4
+
+   VFSEXCH (v2sf): s0 s1
+   VFSEXCH (v4sf): s2 s3 s0 s1
+
+   VFDEXCH (v4sf): s1s0 s3s2
+
+   VFDEXCH (v2df): d0 d1
+ */
+
+static bool
+arc64_simd_exch (struct e_vec_perm_d *d)
+{
+  HOST_WIDE_INT diff;
+  unsigned int i, size, unspec;
+  machine_mode vmode = d->vmode;
+
+  if (!ARC64_HAS_FP_BASE
+      || !FLOAT_MODE_P (vmode)
+      || !d->one_vector_p
+      || !d->perm[0].is_constant (&diff)
+      || !diff)
+    return false;
+
+  size = diff * GET_MODE_UNIT_BITSIZE (vmode);
+  if (size == 64)
+    {
+      if (!ARC64_HAS_FPUD)
+	return false;
+      unspec = ARC64_UNSPEC_DEXCH;
+    }
+  else if (size == 32)
+    {
+      unspec = ARC64_UNSPEC_SEXCH;
+    }
+  else if (size == 16)
+    {
+      unspec = ARC64_UNSPEC_HEXCH;
+    }
+  else
+    return false;
+
+  switch (diff)
+    {
+    case 1:
+      for (i = 0; i < 2; i++)
+	if (!d->perm.series_p (i, 2, diff - i, 2))
+	  return false;
+      break;
+
+    case 2:
+    case 4:
+      for (i = 0; i < diff; i++)
+	if (!d->perm.series_p (i, diff, diff + i, -diff))
+	  return false;
+      break;
+
+    default:
+      return false;
+    }
+
+  /* Success! */
+  if (d->testing_p)
+    return true;
+
+  rtx src = gen_rtx_UNSPEC (vmode, gen_rtvec (1, d->op0), unspec);
+  emit_set_insn (d->target, src);
+  return true;
+}
+
+/* Recognize FV<P>UNPACKL/FV<P>UNPACKM instructions.
+
+   VFHUNPKL (v2hf): Ch0 Bh0
+   VFHUNPKL (v4hf): Ch2 Ch0 Bh2 Bh0
+   VFHUNPKL (v8hf): Ch6 Ch4 Ch2 Ch0 Bh6 Bh4 Bh2 Bh0
+
+   VFSUNPKL (v4hf): Ch1Ch0 Bh1Bh0
+   VFSUNPKL (v8hf): Ch5Ch4 Ch1Ch0 Bh5Bh4 Bh1Bh0
+
+   VFDUNPKL (v8hf): Ch3Ch2Ch1Ch0 Bh3Bh2Bh1Bh0
+
+   VFSUNPKL (v2sf): Cs0 Bs0
+   VFSUNPKL (v4sf): Cs2 Cs0 Bs2 Bs0
+
+   VFDUNPKL (v4sf): Cs1Cs0 Bs1Bs0
+
+   VFDUNPKL (v2df): Cd0 Bd0
+
+   VFHUNPKM (v2hf): Ch1 Bh1
+   VFHUNPKM (v4hf): Ch3 Ch1 Bh3 Bh1
+   VFHUNPKM (v8hf): Ch7 Ch5 Ch3 Ch1 Bh7 Bh5 Bh3 Bh1
+
+   VFSUNPKM (v4hf): Ch3Ch2 Bh3Bh2
+   VFSUNPKM (v8hf): Ch7Ch6 Ch3Ch2 Bh7Bh6 Bh3Bh2
+
+   VFDUNPKM (v8hf): Ch7Ch6Ch5Ch4 Bh7Bh6Bh5Bh4
+
+   VFSUNPKM (v2sf): Cs1 Bs1
+   VFSUNPKM (v4sf): Cs3 Cs1 Bs3 Bs1
+
+   VFDUNPKM (v4sf): Cs3Cs2 Bs3Bs2
+
+   VFDUNPKM (v2df): Cd1 Bd1
+ */
+
+static bool
+arc64_simd_unpk (struct e_vec_perm_d *d)
+{
+  HOST_WIDE_INT odd, lo;
+  poly_uint64 nelt = d->perm.length ();
+  unsigned int i, j, size, unspec, diff = 0;
+  machine_mode vmode = d->vmode;
+
+  if (!ARC64_HAS_FP_BASE
+      || !FLOAT_MODE_P (vmode)
+      || !d->perm[0].is_constant (&odd)
+      || (odd == 3)
+      || (odd < 0 && odd > (HOST_WIDE_INT)(nelt >> 1)))
+    return false;
+
+  /* If ODD is set, then diff == odd.  Thus, the below condition should
+     hold.  */
+  lo = (odd == 0) ? 1 : odd;
+  for (i = 4; (i >= lo) && (diff == 0); i >>= 1)
+    {
+      bool found = true;
+      for (j = 0; (j < i) && found; j++)
+	if (!d->perm.series_p (j, i, odd + j, i * 2 )
+	    || !d->perm.series_p ((nelt >> 1) + j, i, nelt + odd + j, i * 2))
+	  found = false;
+      if (found)
+	diff = i;
+    }
+
+  size = diff * GET_MODE_UNIT_BITSIZE (vmode);
+  if (size == 64)
+    {
+      if (!ARC64_HAS_FPUD)
+	return false;
+      unspec = odd ? ARC64_UNSPEC_DUNPKM : ARC64_UNSPEC_DUNPKL;
+    }
+  else if (size == 32)
+    {
+      unspec = odd ? ARC64_UNSPEC_SUNPKM : ARC64_UNSPEC_SUNPKL;
+    }
+  else if (size == 16)
+    {
+      unspec = odd ? ARC64_UNSPEC_HUNPKM : ARC64_UNSPEC_HUNPKL;
+    }
+  else
+    return false;
+
+  /* Success!  */
+  if (d->testing_p)
+    return true;
+
+  rtx src = gen_rtx_UNSPEC (vmode, gen_rtvec (2, d->op0, d->op1), unspec);
+  emit_set_insn (d->target, src);
+  return true;
+}
+
+/* Recognize VF<p>PACKL and VF<p>PACKM instructions.
+
+   VFHPACKL (v2hf): Ch0 Bh0
+   VFHPACKL (v4hf): Ch1 Bh1 Ch0 Bh0
+   VFHPACKL (v8hf): Ch3 Bh3 Ch2 Bh2 Ch1 Bh1 Ch0 Bh0
+
+   VFSPACKL (v4hf): Ch1Ch0 Bh1Bh0
+   VFSPACKL (v8hf): Ch3Ch2 Bh3Bh2 Ch1Ch0 Bh1Bh0
+
+   VFDPACKL (v8hf): Ch3Ch2Ch1Ch0 Bh3Bh2Bh1Bh0
+
+   VFSPACKL (v2sf): Cs0 Bs0
+   VFSPACKL (v4sf): Cs1 Bs1 Cs0 Bs0
+
+   VFDPACKL (v4sf): Cs1Cs0 Bs1Bs0
+
+   VFDPACKL (v2df): Cd0 Bd0
+
+
+   VFHPACKM (v2hf): Ch1 Bh1
+   VFHPACKM (v4hf): Ch3 Bh3 Ch2 Bh2
+   VFHPACKM (v8hf): Ch7 Bh7 Ch6 Bh6 Ch5 Bh5 Ch4 Bh4
+
+   VFSPACKM (v4hf): Ch3Ch2 Bh3Bh2
+   VFSPACKM (v8hf): Ch7Ch6 Bh7Bh6 Ch5Ch4 Bh5Bh4
+
+   VFDPACKM (v8hf): Ch7Ch6Ch5Ch4 Bh7Bh6Bh5Bh4
+
+   VFSPACKM (v2sf): Cs1 Bs1
+   VFSPACKM (v4sf): Cs3 Bs3 Cs2 Bs2
+
+   VFDPACKM (v4sf): Cs3Cs2 Bs3Bs2
+
+   VFDPACKM (v2df): Cd1 Bd1
+ */
+
+static bool
+arc64_simd_pack (struct e_vec_perm_d *d)
+{
+  HOST_WIDE_INT odd;
+  poly_uint64 nelt = d->perm.length ();
+  unsigned int i, j, size, unspec, diff = 0;
+  machine_mode vmode = d->vmode;
+
+  if (!ARC64_HAS_FP_BASE
+      || !FLOAT_MODE_P (vmode)
+      || !d->perm[0].is_constant (&odd)
+      || (odd != 0 && odd != (HOST_WIDE_INT)(nelt >> 1)))
+    return false;
+
+  for (i = 4; (i > 0) && (diff == 0); i >>= 1)
+    {
+      bool found = true;
+      for (j = 0; (j < i) && found; j++)
+	if (!d->perm.series_p (j, 2 * i, odd + j, i)
+	    || !d->perm.series_p (i + j, 2 * i, nelt + odd + j, i))
+	  found = false;
+      if (found)
+	diff = i;
+    }
+
+  size = diff * GET_MODE_UNIT_BITSIZE (vmode);
+  if (size == 64)
+    {
+      if (!ARC64_HAS_FPUD)
+	return false;
+      unspec = odd ? ARC64_UNSPEC_DPACKM : ARC64_UNSPEC_DPACKL;
+    }
+  else if (size == 32)
+    {
+      unspec = odd ? ARC64_UNSPEC_SPACKM : ARC64_UNSPEC_SPACKL;
+    }
+  else if (size == 16)
+    {
+      unspec = odd ? ARC64_UNSPEC_HPACKM : ARC64_UNSPEC_HPACKL;
+    }
+  else
+    return false;
+
+  /* Success!  */
+  if (d->testing_p)
+    return true;
+
+  rtx src = gen_rtx_UNSPEC (vmode, gen_rtvec (2, d->op0, d->op1), unspec);
+  emit_set_insn (d->target, src);
+  return true;
+}
+
+/* Recognize VF<p>BFLYL and VF<p>BFLYM instructions.
+
+   VFHBFLYL (v2hf): Ch0 Bh0
+   VFHBFLYL (v4hf): Ch2 Bh2 Ch0 Bh0
+   VFHBFLYL (v8hf): Ch6 Bh6 Ch4 Bh4 Ch2 Bh2 Ch0 Bh0
+
+   VFSBFLYL (v4hf): Ch1Ch0 Bh1Bh0
+   VFSBFLYL (v8hf): Ch5Ch4 Bh5Bh4 Ch1Ch0 Bh1Bh0
+
+   VFDBFLYL (v8hf): Ch3Ch2Ch1Ch0 Bh3Bh2Bh1Bh0
+
+   VFSBFLYL (v2sf): Cs0 Bs0
+   VFSBFLYL (v4sf): Cs2 Bs2 Cs0 Bs0
+
+   VFDBFLYL (v4sf): Cs1Cs0 Bs1Bs0
+
+   VFDBFLYL (v2df): Cd0 Bd0
+
+
+   VFHBFLYM (v2hf): Ch1 Bh1
+   VFHBFLYM (v4hf): Ch3 Bh3 Ch1 Bh1
+   VFHBFLYM (v8hf): Ch7 Bh7 Ch5 Bh5 Ch3 Bh3 Ch1 Bh1
+
+   VFSBFLYM (v4hf): Ch3Ch2 Bh3Bh2
+   VFSBFLYM (v8hf): Ch7Ch6 Bh7Bh6 Ch3Ch2 Bh3Bh2
+
+   VFDBFLYM (v8hf): Ch7Ch6Ch5Ch4 Bh7Bh6Bh5Bh4
+
+   VFSBFLYM (v2sf): Cs1 Bs1
+   VFSBFLYM (v4sf): Cs3 Bs3 Cs1 Bs1
+
+   VFDBFLYM (v4sf): Cs3Cs2 Bs3Bs2
+
+   VFDBFLYM (v2df): Cd1 Bd1
+ */
+
+static bool
+arc64_simd_bfly (struct e_vec_perm_d *d)
+{
+  HOST_WIDE_INT odd;
+  poly_uint64 nelt = d->perm.length ();
+  unsigned int i, j, size, unspec, diff = 0;
+  machine_mode vmode = d->vmode;
+
+  if (!ARC64_HAS_FP_BASE
+      || !FLOAT_MODE_P (vmode)
+      || !d->perm[0].is_constant (&odd)
+      || (odd == 3)
+      || (odd < 0 && odd > (HOST_WIDE_INT)(nelt >> 1)))
+    return false;
+
+  for (i = 4; (i > 0) && (diff == 0); i >>= 1)
+    {
+      bool found = true;
+      for (j = 0; (j < i) && found; j++)
+	if (!d->perm.series_p (j, 2 * i, odd + j, 2 * i)
+	    || !d->perm.series_p (i + j, 2 * i, nelt + odd + j, 2 * i))
+	  found = false;
+      if (found)
+	diff = i;
+    }
+
+  size = diff * GET_MODE_UNIT_BITSIZE (vmode);
+  if (size == 64)
+    {
+      if (!ARC64_HAS_FPUD)
+	return false;
+      unspec = odd ? ARC64_UNSPEC_DBFLYM : ARC64_UNSPEC_DBFLYL;
+    }
+  else if (size == 32)
+    {
+      unspec = odd ? ARC64_UNSPEC_SBFLYM : ARC64_UNSPEC_SBFLYL;
+    }
+  else if (size == 16)
+    {
+      unspec = odd ? ARC64_UNSPEC_HBFLYM : ARC64_UNSPEC_HBFLYL;
+    }
+  else
+    return false;
+
+  /* Success!  */
+  if (d->testing_p)
+    return true;
+
+  rtx src = gen_rtx_UNSPEC (vmode, gen_rtvec (2, d->op0, d->op1), unspec);
+  emit_set_insn (d->target, src);
+  return true;
+}
+
+/* Implement combination of vpack4hl/vpack4hm instructions.  */
+
+static bool
+arc64_simd_lane_pack (struct e_vec_perm_d *d)
+{
+  machine_mode vmode = d->vmode;
+  HOST_WIDE_INT elem;
+  poly_uint64 nelt = d->perm.length ();
+  rtx t0, t1;
+  rtx in0 = d->op0;
+  rtx in1 = d->op1;
+  rtx out = d->target;
+
+  if (vmode != E_V4HImode
+      || !TARGET_64BIT
+      || !d->perm[0].is_constant (&elem)
+      || (elem != 0 && elem != 2)
+      || !d->perm.series_p (0, 2, elem, 1)
+      || !d->perm.series_p (1, 2, elem + nelt, 1))
+    return false;
+
+  /* Success! */
+  if (d->testing_p)
+    return true;
+
+  t0 = gen_reg_rtx (vmode);
+  t1 = gen_reg_rtx (vmode);
+  emit_insn (gen_arc64_sel_lane2_0v4hi (t0, in0, in1));
+  emit_insn (gen_arc64_sel_lane3_1v4hi (t1, in0, in1));
+  if (elem == 0)
+    emit_insn (gen_arc64_sel_lane2_0v4hi (out, t0, t1));
+  else
+    emit_insn (gen_arc64_sel_lane3_1v4hi (out, t0, t1));
+  return true;
+}
+
+/* Implement TARGET_VECTORIZE_VEC_PERM_CONST.  */
+
+static bool
+arc64_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
+				rtx op1, const vec_perm_indices &sel)
+{
+  struct e_vec_perm_d d;
+
+  /* Check whether the mask can be applied to a single vector.  */
+  if (sel.ninputs () == 1
+      || (op0 && rtx_equal_p (op0, op1)))
+    d.one_vector_p = true;
+  else if (sel.all_from_input_p (0))
+    {
+      d.one_vector_p = true;
+      op1 = op0;
+    }
+  else if (sel.all_from_input_p (1))
+    {
+      d.one_vector_p = true;
+      op0 = op1;
+    }
+  else
+    d.one_vector_p = false;
+
+  d.perm.new_vector (sel.encoding (), d.one_vector_p ? 1 : 2,
+		     sel.nelts_per_input ());
+  d.vmode = vmode;
+  d.target = target;
+  d.op0 = op0 ? force_reg (vmode, op0) : NULL_RTX;
+  if (op0 == op1)
+    d.op1 = op1;
+  else
+    d.op1 = op1 ? force_reg (vmode, op1) : NULL_RTX;
+  d.testing_p = !target;
+
+  /* The pattern matching functions above are written to look for a small
+     number to begin the sequence (0, 1, N/2).  If we begin with an index
+     from the second operand, we can swap the operands.  */
+  poly_int64 nelt = d.perm.length ();
+  if (known_ge (d.perm[0], nelt))
+    {
+      d.perm.rotate_inputs (1);
+      std::swap (d.op0, d.op1);
+    }
+  if (known_gt (nelt, 1))
+    {
+      if (arc64_simd_dup (&d))
+	return true;
+      else if (arc64_simd_vpack (&d))
+	return true;
+      else if (arc64_simd_swapl (&d))
+	return true;
+      else if (arc64_simd_swap (&d))
+	return true;
+      else if (arc64_simd_vpack2wl (&d))
+	return true;
+      else if (arc64_simd_vpack2wm (&d))
+	return true;
+      else if (arc64_simd_exch (&d))
+	return true;
+      else if (arc64_simd_unpk (&d))
+	return true;
+      else if (arc64_simd_pack (&d))
+	return true;
+      else if (arc64_simd_bfly (&d))
+	return true;
+      else if (arc64_simd_lane_pack (&d))
+	return true;
+    }
+  return false;
+}
+
+/* Provide the costs of an addressing mode that contains ADDR.
+   LOAD_P is true when address is used to load a value.  */
+
+static int
+arc64_address_cost (rtx addr, machine_mode mode,
+		    addr_space_t as  ATTRIBUTE_UNUSED,
+		    bool speed)
+{
+  const int cost_limm = speed ? 0 : COSTS_N_INSNS (1);
+
+  if (CONSTANT_P (addr))
+    return cost_limm;
+
+  /* The cheapest construct are the addresses which fit a store
+     instruction (or a fp load/store instruction).  */
+  if (arc64_legitimate_address_1_p (mode, addr, true, false, true))
+    switch (GET_CODE (addr))
+      {
+      case PRE_DEC:
+      case PRE_INC:
+      case POST_DEC:
+      case POST_INC:
+      case PRE_MODIFY:
+      case POST_MODIFY:
+	return 0;
+
+      default:
+	return 1;
+      }
+
+  /* Anything else has a limm.  */
+  return cost_limm + 2;
+}
+
+/* Compute the rtx cost.  */
+
+static bool
+arc64_rtx_costs (rtx x, machine_mode mode, rtx_code outer,
+                int opno ATTRIBUTE_UNUSED, int *cost, bool speed)
+{
+  rtx op0, op1;
+  const int cost_limm = speed ? 0 : COSTS_N_INSNS (1);
+  int factor;
+
+  /* If we use a mode larger than UNITS_PER_WORD factor it in.  N.B. The cost is
+     already factored in, however, the costs for MULT and DIV is too large.  */
+  factor = CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
+
+  switch (GET_CODE (x))
+    {
+    case SET:
+      op0 = SET_DEST (x);
+      op1 = SET_SRC (x);
+
+      switch (GET_CODE (op0))
+	{
+	case MEM:
+	  /* Store instruction.  */
+
+	  if ((factor == 2) && DOUBLE_LOAD_STORE)
+	    *cost = COSTS_N_INSNS (1);
+	  *cost += arc64_address_cost (XEXP (op0, 0), mode, 0, speed);
+	  if (CONST_INT_P (op1))
+	    {
+	      *cost += speed ? 0 :
+		satisfies_constraint_S06S0 (op1) ? 0 : cost_limm;
+	      return true;
+	    }
+
+	  *cost += rtx_cost (op1, mode, SET, 1, speed);
+	  return true;
+
+	case SUBREG:
+	  if (!REG_P (SUBREG_REG (op0)))
+	    *cost += rtx_cost (SUBREG_REG (op0), VOIDmode, SET, 0, speed);
+
+	  /* Fall through.  */
+	case REG:
+	  /* Cost is just the cost of the RHS of the set.  */
+	  *cost += rtx_cost (op1, mode, SET, 1, speed);
+	  return true;
+
+	default:
+	  break;
+	}
+      return false;
+
+    case MEM:
+      /* Generic/loads.  */
+
+      if ((factor == 2) && DOUBLE_LOAD_STORE)
+	*cost = COSTS_N_INSNS (1);
+      *cost += arc64_address_cost (XEXP (x, 0), mode, 0, speed);
+      return true;
+
+    case MINUS:
+    case PLUS:
+      op0 = XEXP (x, 0);
+      op1 = XEXP (x, 1);
+
+      if ((mode != SImode) && (mode != DImode))
+	*cost += 1;
+
+      /* Check if we have add{1,2,3} instruction.  */
+      if ((GET_CODE (op0) == ASHIFT
+	   && _1_2_3_operand (XEXP (op0, 1), VOIDmode))
+	  || (GET_CODE (op0) == MULT
+	      && _2_4_8_operand (XEXP (op0, 1), VOIDmode)))
+	{
+	  /* Check if 2nd instruction operand is constant int.  This
+	     always goes as limm.  */
+	  if (CONST_INT_P (op1))
+	    *cost += cost_limm ;
+	}
+      return true;
+      break;
+
+    case COMPARE:
+      op0 = XEXP (x, 0);
+      op1 = XEXP (x, 1);
+
+      /* Vitually, any instruction can do compare with zero.  */
+      if (op1 == const0_rtx)
+	*cost = 0;
+      return true;
+
+    case ZERO_EXTEND:
+      op0 = XEXP (x, 0);
+
+      /* Zero extending from an SI operation is cheap.  */
+      if (MEM_P (op0))
+	{
+	  /* All loads can zero extend to any size for free.  */
+	  *cost = rtx_cost (op0, VOIDmode, ZERO_EXTEND, 0, speed);
+	  return true;
+	}
+      if (mode == DImode
+	  && GET_MODE (op0) == SImode
+	  && outer == SET)
+	{
+	  int op_cost = rtx_cost (op0, VOIDmode, ZERO_EXTEND, 0, speed);
+	  if (op_cost)
+	    *cost = op_cost;
+	  return true;
+	}
+      break;
+
+    case SIGN_EXTEND:
+      op0 = XEXP (x, 0);
+      if (MEM_P (op0))
+	{
+	  /* All loads can sign extend to any size for free.  */
+	  *cost = rtx_cost (op0, VOIDmode, SIGN_EXTEND, 0, speed);
+	  return true;
+	}
+      *cost += COSTS_N_INSNS (2);
+      break;
+
+    case CONST_INT:
+      {
+	HOST_WIDE_INT imm = INTVAL (x);
+
+	/* In general any 32bit constant can be loaded immediately,
+	   however, when we compile for speed, we try to avoid
+	   them.  */
+	*cost = 0;
+	if (UNSIGNED_INT6 (imm))
+	  return true;
+	else
+	  switch (outer)
+	    {
+	    case SET:
+	      if (SIGNED_INT12 (imm))
+		return true;
+	      break;
+
+	    default:
+	      break;
+	    }
+      }
+      /* FALLTHRU */
+    case CONST:
+    case LABEL_REF:
+    case SYMBOL_REF:
+      *cost = cost_limm;
+      return true;
+
+    case LSHIFTRT:
+      op0 = XEXP (x, 0);
+      if (REG_P (op0))
+	return true;
+      break;
+
+    case ASHIFT:
+    case ASHIFTRT:
+      return true;
+
+    case MULT:
+      op0 = XEXP (x, 0);
+      /* Multiplication has a large latency, use adds and shifts.  */
+      *cost = COSTS_N_INSNS (2);
+      /* 64x64 multiplication is expensive.  */
+      if (GET_MODE_SIZE (mode) != UNITS_PER_WORD
+	  && (GET_CODE (op0) != ZERO_EXTEND
+	      || GET_CODE (op0) != SIGN_EXTEND))
+	*cost = COSTS_N_INSNS (3);
+      else if (GET_MODE_SIZE (mode) == UNITS_PER_WORD * 2)
+	*cost = factor * COSTS_N_INSNS (4);
+
+      return true;
+
+    case MOD:
+    case UMOD:
+    case DIV:
+    case UDIV:
+      /* Fav synthetic divs.  */
+      *cost = factor * COSTS_N_INSNS (12);
+      return true;
+
+    case EQ:
+    case NE:
+      if (outer == IF_THEN_ELSE
+	  && (GET_CODE (XEXP (x, 0)) == AND
+	      || GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT)
+	  && XEXP (x, 1) == const0_rtx)
+	{
+	  *cost = 0;
+	  return true;
+	}
+      break;
+
+    case AND:
+    case XOR:
+    case IOR:
+      op0 = XEXP (x, 0);
+      op1 = XEXP (x, 1);
+
+      if ((REG_P (op0) || REG_P (op1))
+	  && (CONST_INT_P (op0) || CONST_INT_P (op1)))
+	return true;
+
+      /* Detect VPACK2HL instructions.  */
+      if (TARGET_SIMD
+	  && GET_CODE (op0) == AND
+	  && GET_CODE (op1) == ASHIFT
+	  && mode == E_SImode)
+	return true;
+
+      break;
+
+    default:
+      break;
+    }
+  return false;
+}
+
+/* Wrapper around arc64_rtx_costs, dumps the partial, or total cost
+   calculated for X.  This cost is stored in *COST.  Returns true
+   if the total cost of X was calculated.  */
+static bool
+arc64_rtx_costs_wrapper (rtx x, machine_mode mode, int outer,
+			 int param, int *cost, bool speed)
+{
+  bool result = arc64_rtx_costs (x, mode, (rtx_code) outer, param, cost, speed);
+
+  if (dump_file)
+    {
+      print_rtl_single (dump_file, x);
+      fprintf (dump_file, "\nARC: %s cost: %d (%s)\n",
+	       speed ? "Speed" : "Size",
+	       *cost, result ? "final" : "partial");
+    }
+
+  return result;
+}
+
+/* Implement TARGET_SCHED_MACRO_FUSION_P.  Return true if target supports
+   instruction fusion of some sort.  */
+
+static bool
+arc64_macro_fusion_p (void)
+{
+  /* When we use accumulators, make sure we schedule the producer/consumer of
+     accumulator close to each others.  */
+  return TARGET_SIMD;
+}
+
+/* Implement TARGET_SCHED_MACRO_FUSION_PAIR_P.  Return true if PREV and CURR
+   should be kept together during scheduling.  */
+
+static bool
+arc64_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
+{
+  rtx prev_set = single_set (prev);
+  rtx curr_set = single_set (curr);
+  /* prev and curr are simple SET insns i.e. no flag setting or branching.  */
+  bool simple_sets_p = prev_set && curr_set && !any_condjump_p (curr);
+
+  if (!arc64_macro_fusion_p ())
+    return false;
+
+  /* Don't handle anything with a jump.  FIXME! maybe it is interesting to keep
+     the cmp and jcc together for latter folding into BRcc insn.  */
+  if (!simple_sets_p)
+    return false;
+
+  /* 1st We are trying to match any MPY instruction which can have implicit
+     accumulator write and any mac instruction.  */
+  if (get_attr_type (prev) == TYPE_MPY
+      && get_attr_type (curr) == TYPE_MAC)
+    return true;
+
+  /* 2nd We try to match any back to back mac instruction.  */
+  if (get_attr_type (prev) == TYPE_MAC
+      && (get_attr_type (curr) == TYPE_MAC))
+    return true;
+  if (get_attr_type (prev) == TYPE_VMAC2H
+      && (get_attr_type (curr) == TYPE_VMAC2H))
+    return true;
+
+  /* 3rd Keep close to each other the MAC and the following MOV(L) rx,r58.  This
+     pattern will be match in machine reorg and simplified to a simple MAC
+     instruction.  */
+  if (get_attr_type (curr) == TYPE_MOVE
+      && REG_P (SET_SRC (curr_set))
+      && REGNO (SET_SRC (curr_set)) == R58_REGNUM
+      && get_attr_type (prev) == TYPE_MAC)
+    return true;
+
+#if 0
+  /* Try to keep r58 setting close to any previous related instruction.  We may
+     be able to merge those two into one instruction.  */
+  rtx set_dest;
+  set_dest = SET_DEST (curr_set);
+  if (get_attr_type (curr) == TYPE_MOVE
+      && REG_P (set_dest)
+      && REGNO (set_dest) == R58_REGNUM
+      && REG_P (SET_DEST (prev_set))
+      && REG_P (SET_SRC (curr_set))
+      && REGNO (SET_DEST (prev_set)) == REGNO (SET_SRC (curr_set)))
+    return true;
+
+  /* Try to keep any mac and any previous instruction close, dependency on add
+     operand.  */
+  if (get_attr_type (curr) == TYPE_MAC
+      && REG_P (SET_DEST (prev_set))
+      && GET_CODE (SET_SRC (curr_set)) == PLUS
+      && REG_P (XEXP (SET_SRC (curr_set), 1))
+      && REGNO (SET_DEST (prev_set)) != R58_REGNUM
+      && REGNO (SET_DEST (prev_set)) == REGNO (XEXP (SET_SRC (curr_set), 1)))
+    return true;
+#endif
+  return false;
+}
+
+static void
+arc64_override_options (void)
+{
+  if (arcv3_cpu_string)
+    {
+      const char *p = arcv3_cpu_string;
+      if (strncmp (p, "hs5", 3) == 0)
+	TARGET_64BIT = false;
+      else if (strncmp (p, "hs6", 3) == 0)
+	TARGET_64BIT = true;
+      else
+	error ("%<-mcpu=%s%>s is not a valid CPU option.", arcv3_cpu_string);
+      p += 3;
+      if ( *p == '8')
+	{
+	  if (TARGET_64BIT)
+	    {
+	      target_flags |= MASK_WIDE_LDST;
+	    }
+	  else
+	    {
+	      target_flags |= MASK_LL64;
+	    }
+	  target_flags |= MASK_SIMD;
+	}
+    }
+
+  if (TARGET_LL64 && TARGET_64BIT)
+    {
+      target_flags &= ~MASK_LL64;
+      warning (0, "Option -mll64 is ignored because the target"
+	          " is not 32-bit.");
+    }
+}
+
+/* Return the fixed registers used for condition codes.  */
+
+static bool
+arc64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
+{
+  *p1 = CC_REGNUM;
+  *p2 = INVALID_REGNUM;
+  return true;
+}
+
+/* Return true if FUNC is a naked function.  */
+static bool
+arc64_naked_function_p (tree func)
+{
+  tree func_decl = func;
+  if (func == NULL_TREE)
+    func_decl = current_function_decl;
+  return NULL_TREE != lookup_attribute ("naked", DECL_ATTRIBUTES (func_decl));
+}
+
+/* Implement 'TARGET_SET_CURRENT_FUNCTION'.  */
+
+static void
+arc64_set_current_function (tree decl)
+{
+  unsigned int fn_type = ARC64_FUNCTION_UNKNOWN;
+  tree func_decl = decl;
+
+  if (decl == NULL_TREE
+      || current_function_decl == NULL_TREE
+      || current_function_decl == error_mark_node
+      || ! cfun->machine
+      || cfun->machine->fn_type != ARC64_FUNCTION_UNKNOWN)
+    return;
+
+  /* Check if it is a naked function.  */
+  if (arc64_naked_function_p (decl))
+    fn_type |= ARC64_FUNCTION_NAKED;
+
+  if (func_decl == NULL_TREE)
+    func_decl = current_function_decl;
+
+  /* Now see if this is an interrupt handler.  */
+  if (lookup_attribute ("interrupt",
+			TYPE_ATTRIBUTES (TREE_TYPE (func_decl))) != NULL_TREE)
+    fn_type |= ARC64_FUNCTION_ILINK;
+
+  if (!ARC_NAKED_P (fn_type) && !ARC_INTERRUPT_P (fn_type))
+    fn_type |= ARC64_FUNCTION_NORMAL;
+
+  cfun->machine->fn_type = fn_type;
+
+  if (ARC_NAKED_P (fn_type) && ARC_INTERRUPT_P (fn_type))
+    error ("function attributes %qs and %qs are mutually exclusive",
+	   "interrupt", "naked");
+}
+
+/* Implement TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS.  */
+static bool
+arc64_allocate_stack_slots_for_args ()
+{
+  /* Naked functions should not allocate stack slots for arguments.  */
+  return !arc64_naked_function_p (current_function_decl);
+}
+
+/* Implement TARGET_WARN_FUNC_RETURN.  */
+static bool
+arc64_warn_func_return (tree decl)
+{
+  /* Naked functions are implemented entirely in assembly, including the
+     return sequence, so suppress warnings about this.  */
+  return !arc64_naked_function_p (decl);
+}
+
+/* Return false for selected jumps crossing between hot and cold partitions.  */
+
+static bool
+arc64_can_follow_jump (const rtx_insn *br1, const rtx_insn *br2)
+{
+  /* Avoid compiler warnings.  */
+  union {const rtx_insn *c; rtx_insn *r;} u;
+
+  u.c = br1;
+  if (flag_reorder_blocks_and_partition
+      && CROSSING_JUMP_P (br2))
+    switch (get_attr_type (u.r))
+      {
+      case TYPE_BRANCHCC:
+      case TYPE_BRCC:
+	return false;
+      case TYPE_BRANCH:
+	if (get_attr_length (u.r) == 2)
+	  return false;
+	break;
+      default:
+	break;
+      }
+
+  return true;
+}
+
+/* Implements target hook TARGET_SCHED_ISSUE_RATE.  */
+
+static int
+arc64_sched_issue_rate (void)
+{
+  return 2;
+}
+
+/*
+  Global functions.
+*/
+
+/* Returns TRUE if CALLEE should be treated as long-calls (i.e. called
+   via a register).  */
+
+bool
+arc64_is_long_call_p (rtx sym)
+{
+  arc64_symb symb_t = arc64_get_symbol_type (sym);
+
+  /* No subtleties for the time being, if user asks for large memory model,
+     everything goes via regs.  */
+  if (!TARGET_64BIT
+      && (arc64_cmodel_var == ARC64_CMODEL_LARGE))
+    return true;
+
+  switch (symb_t)
+    {
+    case ARC64_UNK:
+    case ARC64_LO32:
+      return false;
+
+    case ARC64_PCREL:
+    case ARC64_PIC:
+      return false;
+
+    case ARC64_LPIC:
+      /* fPIC + Large memory model forces everything in registers.  */
+      return (arc64_cmodel_var == ARC64_CMODEL_LARGE) ? true : false;
+
+    case ARC64_LARGE:
+      return true;
+
+    case ARC64_TLS:
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* X and Y are two things to compare using CODE.  Emit the compare insn and
+   return the rtx for the cc reg in the proper mode.  */
+
+rtx
+arc64_gen_compare_reg (enum rtx_code code, rtx x, rtx y)
+{
+  machine_mode mode = SELECT_CC_MODE (code, x, y);
+  rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
+
+  if (CONSTANT_P (x) && CONSTANT_P (y))
+    x = force_reg (word_mode, x);
+
+  emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
+  return cc_reg;
+}
+
+/* Prepare operands for move in MODE.  Return true iff the move has
+   been emitted.  */
+
+bool
+arc64_prepare_move_operands (rtx op0, rtx op1, machine_mode mode)
+{
+  if (MEM_P (op0) && !REG_P (op1))
+    {
+      if (mode == E_DImode
+	  || !satisfies_constraint_S06S0 (op1))
+	op1 = force_reg (mode, op1);
+    }
+  else if (GET_MODE_SIZE (mode) == UNITS_PER_WORD
+	   && CONSTANT_P (op1))
+    {
+      unsigned HOST_WIDE_INT lo;
+      unsigned HOST_WIDE_INT hi;
+      rtx tmp;
+
+      switch (GET_CODE (op1))
+	{
+	case CONST_INT:
+	  gcc_assert (mode == Pmode);
+	  if (!SIGNED_INT32 (INTVAL (op1)) && !UNSIGNED_INT32 (INTVAL (op1)))
+	    {
+	      HOST_WIDE_INT val;
+	      /* We have a large 64bit immediate:
+		 movhl rA, (val64 >> 32)
+		 orl   rA,rA, (val64 & 0xffffffff)
+		 FIXME! add strategies to minimize the size.  */
+
+	      val = INTVAL (op1);
+	      lo = zext_hwi (val, 32);
+	      hi = zext_hwi (val >> 32, 32);
+	      tmp = op0;
+
+	      if (can_create_pseudo_p ())
+		tmp = gen_reg_rtx (mode);
+
+	      /* Maybe do first a move cnst to movsi to get the
+		 constants minimized.  */
+	      emit_insn (gen_rtx_SET (tmp,
+				      gen_rtx_ASHIFT (mode, GEN_INT (hi),
+						      GEN_INT (32))));
+	      emit_insn (gen_rtx_SET (op0,
+				      plus_constant (mode, tmp, lo)));
+	      return true;
+	    }
+	  break;
+
+	case CONST_WIDE_INT:
+	  gcc_unreachable ();
+
+	case CONST_DOUBLE:
+	  if (mode == SFmode)
+	    return false;
+	  else
+	    {
+	      long res[2];
+	      unsigned HOST_WIDE_INT ival;
+	      scalar_int_mode imode = int_mode_for_mode (mode).require ();
+
+	      gcc_assert (mode == DFmode);
+
+	      real_to_target (res, CONST_DOUBLE_REAL_VALUE (op1),
+			      REAL_MODE_FORMAT (mode));
+	      lo = zext_hwi (res[0], 32);
+	      hi = zext_hwi (res[1], 32);
+
+	      ival = lo | (hi << 32);
+	      tmp = gen_reg_rtx (imode);
+	      emit_move_insn (tmp, gen_int_mode (ival, imode));
+	      emit_move_insn (op0, gen_lowpart (mode, tmp));
+	      return true;
+	    }
+
+	case CONST:
+	case SYMBOL_REF:
+	case LABEL_REF:
+	  op1 = arc64_legitimize_address_1 (op1, op0);
+	  break;
+
+	default:
+	  break;
+	}
+    }
+
+  /* Check and fix unsupported store addresses.  */
+  if (MEM_P (op0)
+      && !arc64_legitimate_address_1_p (mode, XEXP (op0, 0), false,
+					false, true))
+    {
+      rtx tmp = gen_reg_rtx (Pmode);
+      rtx addr = XEXP (op0, 0);
+      rtx t0 = XEXP (addr, 0);
+      rtx t1 = XEXP (addr, 1);
+
+      if (GET_CODE (t0) == MULT)
+	{
+	  rtx ta = XEXP (t0, 0);
+	  rtx tb = XEXP (t0, 1);
+	  t0 = gen_rtx_ASHIFT (Pmode, ta,
+			       GEN_INT (ARC64LOG2 (INTVAL (tb))));
+	}
+
+      emit_insn (gen_rtx_SET (tmp, gen_rtx_PLUS (Pmode, t0, t1)));
+      op0 = replace_equiv_address (op0, tmp);
+    }
+  emit_insn (gen_rtx_SET (op0, op1));
+  return true;
+}
+
+/* Split a mov with long immediate instruction into smaller, size
+   friendly instructions.  */
+#if 0
+bool
+arc64_split_mov_const (rtx *operands)
+{
+  unsigned HOST_WIDE_INT ival;
+  HOST_WIDE_INT shimm;
+  machine_mode mode = GET_MODE (operands[0]);
+
+  /* Manage a constant.  */
+  gcc_assert (CONST_INT_P (operands[1]));
+  ival = INTVAL (operands[1]) & 0xffffffff;
+
+  if (SIGNED_INT12 (ival))
+    return false;
+
+  /* 1. Check if we can just rotate limm by 8 but using ROR8.  */
+  if (TARGET_BARREL_SHIFTER && ((ival & ~0x3f000000) == 0))
+    {
+      shimm = (ival >> 24) & 0x3f;
+      emit_insn (gen_rtx_SET (operands[0],
+			      gen_rtx_ROTATERT (mode, GEN_INT (shimm),
+						GEN_INT (8))));
+      return true;
+    }
+  /* 2. Check if we can just shift by 8 to fit into the u6 of LSL8.  */
+  if (TARGET_BARREL_SHIFTER && ((ival & ~0x3f00) == 0))
+    {
+      shimm = (ival >> 8) & 0x3f;
+      emit_insn (gen_rtx_SET (operands[0],
+			      gen_rtx_ASHIFT (mode, GEN_INT (shimm),
+					      GEN_INT (8))));
+      return true;
+    }
+
+  /* 3. Check if we can just shift by 16 to fit into the u6 of LSL16.  */
+  if (TARGET_BARREL_SHIFTER && ((ival & ~0x3f0000) == 0))
+    {
+      shimm = (ival >> 16) & 0x3f;
+      emit_insn (gen_rtx_SET (operands[0],
+			      gen_rtx_ASHIFT (mode, GEN_INT (shimm),
+					      GEN_INT (16))));
+      return true;
+    }
+
+  /* 4. Check if we can do something like mov_s h,u8 / asl_s ra,h,#nb.  */
+  if (((ival >> (__builtin_ffs (ival) - 1)) & 0xffffff00) == 0
+      && TARGET_BARREL_SHIFTER)
+    {
+      HOST_WIDE_INT shift = __builtin_ffs (ival);
+      shimm = (ival >> (shift - 1)) & 0xff;
+      emit_insn (gen_rtx_SET (operands[0], GEN_INT (shimm)));
+      emit_insn (gen_rtx_SET (operands[0],
+			      gen_rtx_ASHIFT (mode, operands[0],
+					      GEN_INT (shift - 1))));
+      return true;
+    }
+
+  /* 5. Check if we can just rotate the limm, useful when no barrel
+     shifter is present.  */
+  if ((ival & ~0x8000001f) == 0)
+    {
+      shimm = (ival * 2 + 1) & 0x3f;
+      emit_insn (gen_rtx_SET (operands[0],
+			      gen_rtx_ROTATERT (mode, GEN_INT (shimm),
+						const1_rtx)));
+      return true;
+    }
+
+  /* 6. Check if we can do something with bmask.  */
+  if (IS_POWEROF2_P (ival + 1))
+    {
+      emit_insn (gen_rtx_SET (operands[0], constm1_rtx));
+      emit_insn (gen_rtx_SET (operands[0],
+			      gen_rtx_AND (mode, operands[0],
+					   GEN_INT (ival))));
+      return true;
+    }
+
+  return false;
+}
+
+/* Helper to check Cax constraint.  */
+
+bool
+arc64_check_mov_const (HOST_WIDE_INT ival)
+{
+  ival = ival & 0xffffffff;
+
+  if ((ival & ~0x8000001f) == 0)
+    return true;
+
+  if (IS_POWEROF2_P (ival + 1))
+    return true;
+
+  /* The next rules requires a barrel shifter.  */
+  if (!TARGET_BARREL_SHIFTER)
+    return false;
+
+  if (((ival >> (__builtin_ffs (ival) - 1)) & 0xffffff00) == 0)
+    return true;
+
+  if ((ival & ~0x3f00) == 0)
+    return true;
+
+  if ((ival & ~0x3f0000) == 0)
+    return true;
+
+  if ((ival & ~0x3f000000) == 0)
+    return true;
+
+  return false;
+}
+#endif
+
+/* This function is used by the call expanders of the machine description.
+   RESULT is the register in which the result is returned.  It's NULL for
+   "call" and "sibcall".
+   MEM is the location of the function call.
+   SIBCALL indicates whether this function call is normal call or sibling call.
+   It will generate different pattern accordingly.  */
+
+void
+arc64_expand_call (rtx result, rtx mem, bool sibcall)
+{
+  rtx call, callee, tmp;
+  rtvec vec;
+  machine_mode mode;
+
+  gcc_assert (MEM_P (mem));
+  callee = XEXP (mem, 0);
+  mode = GET_MODE (callee);
+  gcc_assert (mode == Pmode || CONST_INT_P (callee));
+
+  /* Decide if we should generate indirect calls by loading the
+     address of the callee into a register before performing the
+     branch-and-link.  */
+  if (arc64_is_long_call_p (callee) && !REG_P (callee))
+    XEXP (mem, 0) = force_reg (mode, callee);
+
+  call = gen_rtx_CALL (VOIDmode, mem, const0_rtx);
+
+  if (result != NULL_RTX)
+    call = gen_rtx_SET (result, call);
+
+  if (sibcall)
+    tmp = ret_rtx;
+  else
+    tmp = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, BLINK_REGNUM));
+
+  vec = gen_rtvec (2, call, tmp);
+  call = gen_rtx_PARALLEL (VOIDmode, vec);
+
+  emit_call_insn (call);
+}
+
+/* Return nonzero if this function is known to have a null epilogue.
+   This allows the optimizer to omit jumps to jumps if no stack
+   was created.  */
+
+bool
+arc64_can_use_return_insn_p (void)
+{
+  return (reload_completed && cfun->machine->frame.frame_size == 0
+	  && !ARC_INTERRUPT_P (cfun->machine->fn_type));
+}
+
+
+/* Return 1 if the register is used by the epilogue.  We need to say the
+   return register is used, but only after epilogue generation is complete.
+   Note that in the case of sibcalls, the values "used by the epilogue" are
+   considered live at the start of the called function.  */
+
+int
+arc64_epilogue_uses (int regno)
+{
+#ifdef HAVE_AS_TLS
+  if (regno == R30_REGNUM)
+    return 1;
+#endif
+
+  if (epilogue_completed)
+    {
+      if (regno == BLINK_REGNUM)
+	return 1;
+
+      /* An interrupt restores more registers.  */
+      if (ARC_INTERRUPT_P (cfun->machine->fn_type)
+	  && (df_regs_ever_live_p (regno)
+	      || (!crtl->is_leaf && call_used_or_fixed_reg_p (regno))))
+	return 1;
+    }
+
+  return 0;
+}
+
+/* Return 1 if we use TP because it is alivel on entry to an exception
+   edge.  */
+
+int
+arc64_eh_uses (int regno ATTRIBUTE_UNUSED)
+{
+#ifdef HAVE_AS_TLS
+  if (regno == R30_REGNUM)
+    return 1;
+#endif
+  return 0;
+}
+
+
+/* Implement INITIAL_ELIMINATION_OFFSET.  FROM is either the frame pointer
+   or argument pointer.  TO is either the stack pointer or hard frame
+   pointer.  */
+
+HOST_WIDE_INT
+arc64_initial_elimination_offset (unsigned from, unsigned to)
+{
+  struct arc64_frame *frame = &cfun->machine->frame;
+
+  if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
+    return frame->saved_regs_size;
+
+  if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
+    return (frame->saved_regs_size + frame->saved_locals_size
+	    + frame->saved_outargs_size);
+
+  if ((from == FRAME_POINTER_REGNUM) && (to == STACK_POINTER_REGNUM))
+    return (frame->saved_locals_size + frame->saved_outargs_size);
+
+  if ((from == FRAME_POINTER_REGNUM) && (to == HARD_FRAME_POINTER_REGNUM))
+    return 0;
+
+  gcc_unreachable ();
+}
+
+/* Helper for INIT_EXPANDERS macro called to initialize any target
+   specific information.  */
+
+void arc64_init_expanders (void)
+{
+  init_machine_status = arc64_init_machine_status;
+}
+
+/* Given a comparison code (EQ, NE, etc.) and the first operand of a
+   COMPARE, return the mode to be used for the comparison.  */
+
+machine_mode
+arc64_select_cc_mode (enum rtx_code op,
+		      rtx x,
+		      rtx y)
+{
+  machine_mode mode = GET_MODE (x);
+
+  /* Matches all instructions which can do .f and clobbers only Z flag.  */
+  if (GET_MODE_CLASS (mode) == MODE_INT
+      && y == const0_rtx
+      && GET_CODE (x) == MULT
+      && (op == EQ || op == NE))
+    return CC_Zmode;
+
+  /* Matches all instructions which can do .f and clobbers Z and N
+     flags.  Because we compare with zero, for LT we can use "mi" and
+     for GT we can use "pl".  We cannot use GT with "pnz" because it
+     cannot be reversed.  */
+  if (GET_MODE_CLASS (mode) == MODE_INT
+      && y == const0_rtx
+      && (op == EQ || op == NE || op == LT || op == GE))
+    return CC_ZNmode;
+
+  /* All floating point compares return CC_FPU if it is an equality
+     comparison, and CC_FPUE otherwise.  N.B. LTGT and UNEQ cannot be
+     directly mapped to fcmp instructions.  */
+  if (GET_MODE_CLASS (mode) == MODE_FLOAT)
+    {
+      switch (op)
+	{
+	case EQ:
+	case NE:
+	case UNORDERED:
+	case ORDERED:
+	case UNLT:
+	case UNLE:
+	case UNGT:
+	case UNGE:
+	case UNEQ:
+	  return CC_FPUmode;
+
+	case LT:
+	case LE:
+	case GT:
+	case GE:
+	case LTGT:
+	  return CC_FPUEmode;
+
+	default:
+	  gcc_unreachable ();
+	}
+    }
+  return CCmode;
+}
+
+/* Implement RETURN_ADDR_RTX.  We do not support moving back to a
+   previous frame.  */
+
+rtx
+arc64_return_addr (int count , rtx frame ATTRIBUTE_UNUSED)
+{
+  if (count != 0)
+    return const0_rtx;
+  return get_hard_reg_initial_val (Pmode, BLINK_REGNUM);
+}
+
+/* Expand the "prologue" pattern.  */
+
+void
+arc64_expand_prologue (void)
+{
+  HOST_WIDE_INT frame_allocated;
+  struct arc64_frame *frame = &cfun->machine->frame;
+
+  if (flag_stack_usage_info)
+    current_function_static_stack_size = frame->frame_size;
+
+  if (ARC_NAKED_P (cfun->machine->fn_type))
+    return;
+
+  frame_allocated = frame->frame_size;
+
+  frame_allocated -= arc64_save_callee_saves ();
+
+  /* If something left, allocate.  */
+  if (frame_allocated > 0)
+    frame_stack_add ((HOST_WIDE_INT) 0 - frame_allocated);
+
+  /* Emit a blockage.  */
+  emit_insn (gen_blockage ());
+}
+
+/* Expand "epilogue" pattern.  */
+
+void
+arc64_expand_epilogue (bool sibcall_p)
+{
+  HOST_WIDE_INT frame_deallocated;
+  struct arc64_frame *frame = &cfun->machine->frame;
+
+  if (ARC_NAKED_P (cfun->machine->fn_type))
+    {
+      emit_jump_insn (gen_return ());
+      return;
+    }
+
+  frame_deallocated = frame->frame_size;
+  frame_deallocated -= arc64_restore_callee_saves (sibcall_p);
+
+  if (frame_deallocated != 0)
+    frame_stack_add (frame_deallocated);
+
+  /* For frames that use __builtin_eh_return, the register defined by
+     EH_RETURN_STACKADJ_RTX is set to 0 for all standard return paths.
+     On eh_return paths however, the register is set to the value that
+     should be added to the stack pointer in order to restore the
+     correct stack pointer for the exception handling frame.
+
+     For ARC64 we are going to use r4 for EH_RETURN_STACKADJ_RTX, add
+     this onto the stack for eh_return frames.  */
+  if (crtl->calls_eh_return)
+    emit_insn (gen_add2_insn (stack_pointer_rtx,
+			      EH_RETURN_STACKADJ_RTX));
+
+  if (ARC_INTERRUPT_P (cfun->machine->fn_type))
+    emit_jump_insn (gen_rtie ());
+  else if (!sibcall_p)
+    emit_jump_insn (gen_simple_return ());
+}
+
+/* Helper used to determine if an address requires a long immediate.
+   To be used in computing the length of an load/store
+   instruction.  */
+
+bool
+arc64_limm_addr_p (rtx op)
+{
+  rtx addr = XEXP (op, 0);
+
+  if (!MEM_P (op))
+    return false;
+
+  switch (GET_CODE (addr))
+    {
+    case SYMBOL_REF:
+    case LABEL_REF:
+    case CONST_INT:
+    case CONST:
+    case UNSPEC:
+    case LO_SUM:
+      return true;
+
+    case PRE_INC:
+    case PRE_DEC:
+    case POST_INC:
+    case POST_DEC:
+    case PRE_MODIFY:
+    case POST_MODIFY:
+    case PLUS:
+      /* legitimate address doesn't recognize [b,limm] variant of st.
+	 Hence, use it to determine if we have limm or not in
+	 address.  */
+      return !arc64_legitimate_address_1_p (GET_MODE (op), addr,
+					    false, false, true);
+    default:
+      break;
+    }
+  return false;
+}
+
+/* Used by move_dest_operand predicate.  */
+
+bool
+arc64_legitimate_store_address_p (machine_mode mode, rtx addr)
+{
+  return arc64_legitimate_address_1_p (mode, addr, true, false, true);
+}
+
+/* Return true if an address fits a short load/store instruction.  */
+
+bool
+arc64_short_access_p (rtx op, machine_mode mode, bool load_p)
+{
+  rtx addr, plus0, plus1;
+  bool f0, f1;
+
+  /* Eliminate non-memory operations.  */
+  if (GET_CODE (op) != MEM)
+    return 0;
+
+  /* FIXME! remove it when "uncached" attribute is added.  */
+  if (MEM_VOLATILE_P (op) && TARGET_VOLATILE_DI)
+    return false;
+
+  if (mode == VOIDmode)
+    mode = GET_MODE (op);
+
+  /* Decode the address now.  */
+  addr = XEXP (op, 0);
+  switch (GET_CODE (addr))
+    {
+    case REG:
+      return check_short_insn_register_p (addr, false);
+
+    case PLUS:
+      plus0 = XEXP (addr, 0);
+      plus1 = XEXP (addr, 1);
+
+      f0 = check_short_insn_register_p (plus0, false);
+      f1 = check_short_insn_constant_p (plus1, mode);
+
+      /* Check for [Rb + shimm].  */
+      if (f0 && f1)
+	return true;
+
+      if (!load_p)
+	return false;
+
+      /* Check for [Rb + Ri].  */
+      f1 = check_short_insn_register_p (plus1, false);
+
+      if (f0 && f1)
+	return true;
+
+    default:
+      break;
+    }
+  return false;
+}
+
+/* Return true if an address fits a floating point load/store
+   instruction.  The next formats are allowed [b, s9], [b], [s32limm],
+   and scaled [b, s9].  */
+
+bool
+arc64_fp_access_p (rtx op, machine_mode mode)
+{
+  rtx addr;
+
+  /* Eliminate non-memory operations.  */
+  if (GET_CODE (op) != MEM)
+    return 0;
+
+  /* FIXME! remove it when "uncached" attribute is added.  */
+  if (MEM_VOLATILE_P (op) && TARGET_VOLATILE_DI)
+    return false;
+
+  if (mode == VOIDmode)
+    mode = GET_MODE (op);
+
+  /* Decode the address now.  */
+  addr = XEXP (op, 0);
+
+  return arc64_legitimate_address_1_p (mode, addr, true, false, false);
+}
+
+/* Implement EH_RETURN_HANDLER_RTX.  EH returns need to either return
+   normally or return to a previous frame after unwinding.
+
+   An EH return uses a single shared return sequence.  The epilogue is
+   exactly like a normal epilogue except that it has an extra input
+   register (EH_RETURN_STACKADJ_RTX) which contains the stack
+   adjustment that must be applied after the frame has been destroyed.
+   An extra label is inserted before the epilogue which initializes
+   this register to zero, and this is the entry point for a normal
+   return.
+
+   An actual EH return updates the return address, initializes the
+   stack adjustment and jumps directly into the epilogue (bypassing
+   the zeroing of the adjustment).  Since the return address is
+   typically saved on the stack when a function makes a call, the
+   saved BLINK must be updated outside the epilogue.
+
+   This poses problems as the store is generated well before the
+   epilogue, so the offset of BLINK is not known yet.  Also
+   optimizations will remove the store as it appears dead, even after
+   the epilogue is generated (as the base or offset for loading BLINK
+   is different in many cases).
+
+   To avoid these problems this implementation forces the frame
+   pointer in eh_return functions so that the location of BLINK is
+   fixed and known early.  It also marks the store volatile, so no
+   optimization is permitted to remove the store.  */
+
+rtx
+arc64_eh_return_handler_rtx (void)
+{
+  rtx tmp = gen_frame_mem (Pmode,
+    plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
+
+  /* Mark the store volatile, so no optimization is permitted to remove it.  */
+  MEM_VOLATILE_P (tmp) = true;
+  return tmp;
+}
+
+/* Select a format to encode pointers in exception handling data.  */
+
+int
+arc64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
+{
+   int type;
+
+   if (!flag_pic)
+     return DW_EH_PE_absptr;
+
+   switch (arc64_cmodel_var)
+     {
+    case ARC64_CMODEL_SMALL:
+    case ARC64_CMODEL_MEDIUM:
+       /* text+got+data < 4Gb.  4-byte signed relocs are sufficient
+	  for everything.  */
+       type = DW_EH_PE_sdata4;
+       break;
+     default:
+       /* No assumptions here.  8-byte relocs required.  */
+       type = DW_EH_PE_sdata8;
+       break;
+     }
+   return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
+}
+
+/* Emit a (pre) memory barrier around an atomic sequence according to
+   MODEL.  */
+
+void
+arc64_pre_atomic_barrier (enum memmodel model)
+{
+  if (need_atomic_barrier_p (model, true))
+    emit_insn (gen_memory_barrier ());
+}
+
+/* Emit a (post) memory barrier around an atomic sequence according to
+   MODEL.  */
+
+void
+arc64_post_atomic_barrier (enum memmodel model)
+{
+  if (need_atomic_barrier_p (model, false))
+    emit_insn (gen_memory_barrier ());
+}
+
+/* Expand an atomic fetch-and-operate pattern.  CODE is the binary operation
+   to perform.  MEM is the memory on which to operate.  VAL is the second
+   operand of the binary operator.  BEFORE and AFTER are optional locations to
+   return the value of MEM either before of after the operation.  MODEL_RTX
+   is a CONST_INT containing the memory model to use.  */
+
+void
+arc64_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
+			 rtx orig_before, rtx orig_after, rtx model_rtx)
+{
+  enum memmodel model = (enum memmodel) INTVAL (model_rtx);
+  machine_mode mode = GET_MODE (mem);
+  rtx label, x, cond;
+  rtx before = orig_before, after = orig_after;
+
+  /* ARC atomic ops work only with 32-bit aligned memories.  */
+  gcc_assert (mode == SImode || mode == DImode);
+
+  arc64_pre_atomic_barrier (model);
+
+  label = gen_label_rtx ();
+  emit_label (label);
+  label = gen_rtx_LABEL_REF (VOIDmode, label);
+
+  if (before == NULL_RTX)
+    before = gen_reg_rtx (mode);
+
+  if (after == NULL_RTX)
+    after = gen_reg_rtx (mode);
+
+  /* Load exclusive.  */
+  if(mode == SImode)
+    emit_insn (gen_arc_load_exclusivesi (before, mem));
+  else /* DImode */
+    emit_insn (gen_arc_load_exclusivedi (before, mem));
+
+  switch (code)
+    {
+    case NOT:
+      x = gen_rtx_AND (mode, before, val);
+      emit_insn (gen_rtx_SET (after, x));
+      x = gen_rtx_NOT (mode, after);
+      emit_insn (gen_rtx_SET (after, x));
+      break;
+
+    case MINUS:
+      if (CONST_INT_P (val))
+	{
+	  val = GEN_INT (-INTVAL (val));
+	  code = PLUS;
+	}
+
+      /* FALLTHRU.  */
+    default:
+      x = gen_rtx_fmt_ee (code, mode, before, val);
+      emit_insn (gen_rtx_SET (after, x));
+      break;
+   }
+
+  /* Exclusively store new item.  Store clobbers CC reg.  */
+  if(mode == SImode)
+    emit_insn (gen_arc_store_exclusivesi (mem, after));
+  else /* DImode */
+    emit_insn (gen_arc_store_exclusivedi (mem, after));
+
+  /* Check the result of the store.  */
+  cond = gen_rtx_REG (CC_Zmode, CC_REGNUM);
+  x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
+  x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
+			    label, pc_rtx);
+  emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
+
+  arc64_post_atomic_barrier (model);
+}
+
+/* Helper function used by "atomic_compare_and_swap" expand
+   pattern.  */
+
+void
+arc64_expand_compare_and_swap (rtx operands[])
+{
+  rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
+  machine_mode mode;
+
+  bval = operands[0];
+  rval = operands[1];
+  mem = operands[2];
+  oldval = operands[3];
+  newval = operands[4];
+  is_weak = operands[5];
+  mod_s = operands[6];
+  mod_f = operands[7];
+  mode = GET_MODE (mem);
+
+  if (reg_overlap_mentioned_p (rval, oldval))
+    oldval = copy_to_reg (oldval);
+
+  if (mode == SImode || mode == DImode)
+    {
+      if (mode == SImode)
+	emit_insn (gen_atomic_compare_and_swapsi_1 (rval, mem, oldval, newval,
+						    is_weak, mod_s, mod_f));
+      else /* DImode */
+	emit_insn (gen_atomic_compare_and_swapdi_1 (rval, mem, oldval, newval,
+						    is_weak, mod_s, mod_f));
+
+      x = gen_rtx_REG (CC_Zmode, CC_REGNUM);
+      x = gen_rtx_EQ (SImode, x, const0_rtx);
+      emit_insn (gen_rtx_SET (bval, x));
+    }
+  else
+    {
+      arc_expand_compare_and_swap_qh (bval, rval, mem, oldval, newval,
+				      is_weak, mod_s, mod_f);
+    }
+}
+
+/* Helper function used by the "atomic_compare_and_swapsdi_1"
+   pattern.  */
+
+void
+arc64_split_compare_and_swap (rtx operands[])
+{
+  rtx rval, mem, oldval, newval;
+  machine_mode mode, mode_cc;
+  enum memmodel mod_s, mod_f;
+  bool is_weak;
+  rtx label1, label2, x, cond;
+
+  rval = operands[0];
+  mem = operands[1];
+  oldval = operands[2];
+  newval = operands[3];
+  is_weak = (operands[4] != const0_rtx);
+  mod_s = (enum memmodel) INTVAL (operands[5]);
+  mod_f = (enum memmodel) INTVAL (operands[6]);
+  mode = GET_MODE (mem);
+
+  /* ARC atomic ops work only with 32-bit or 64-bit aligned memories.  */
+  gcc_assert (mode == SImode || mode == DImode);
+
+  arc64_pre_atomic_barrier (mod_s);
+
+  label1 = NULL_RTX;
+  if (!is_weak)
+    {
+      label1 = gen_label_rtx ();
+      emit_label (label1);
+    }
+  label2 = gen_label_rtx ();
+
+  /* Load exclusive.  */
+  if(mode == SImode)
+    emit_insn (gen_arc_load_exclusivesi (rval, mem));
+  else /* DImode */
+    emit_insn (gen_arc_load_exclusivedi (rval, mem));
+
+  /* Check if it is oldval.  */
+  mode_cc = SELECT_CC_MODE (NE, rval, oldval);
+  cond = gen_rtx_REG (mode_cc, CC_REGNUM);
+  emit_insn (gen_rtx_SET (cond, gen_rtx_COMPARE (mode_cc, rval, oldval)));
+
+  x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
+  x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
+			    gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
+  emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
+
+  /* Exclusively store new item.  Store clobbers CC reg.  */
+  if(mode == SImode)
+    emit_insn (gen_arc_store_exclusivesi (mem, newval));
+  else /* DImode */
+    emit_insn (gen_arc_store_exclusivedi (mem, newval));
+
+  if (!is_weak)
+    {
+      /* Check the result of the store.  */
+      cond = gen_rtx_REG (CC_Zmode, CC_REGNUM);
+      x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
+      x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
+				gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
+      emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
+    }
+
+  if (mod_f != MEMMODEL_RELAXED)
+    emit_label (label2);
+
+  arc64_post_atomic_barrier (mod_s);
+
+  if (mod_f == MEMMODEL_RELAXED)
+    emit_label (label2);
+}
+
+/* Expander for casesi.  The vector table is always PC-relative, and
+   it is made up of branch instructions.  When we have CODE_DENSITY
+   option enabled, we use BI instruction, otherwise, depending on the
+   memory model, an emulation of it.  We use the same emulation
+   contruction, for PIC or LARGE memory model.  For a non-pic
+   SMALL/MEDIUM memory model, we make use of a single add2 instruction
+   which has one input the address of the start dispatch table, and
+   the other input indicates where we jump in the table.  */
+
+void arc64_expand_casesi (rtx operands[])
+{
+  rtx reg;
+
+  if (operands[1] != const0_rtx)
+    {
+      reg = gen_reg_rtx (SImode);
+      operands[1] = GEN_INT (trunc_int_for_mode (-INTVAL (operands[1]),
+						 SImode));
+      emit_insn (gen_addsi3 (reg, operands[0], operands[1]));
+      operands[0] = reg;
+    }
+  emit_unlikely_jump (gen_cbranchsi4 (gen_rtx_GTU (SImode, operands[0],
+						   operands[2]),
+				      operands[0], operands[2], operands[4]));
+
+  if (!TARGET_CODE_DENSITY)
+    {
+      switch (arc64_cmodel_var)
+	{
+	case ARC64_CMODEL_SMALL:
+	  if (!flag_pic)
+	    {
+	      reg = gen_reg_rtx (SImode);
+	      emit_insn (gen_casesi_addaddr (reg, operands[0], operands[3]));
+	      operands[0] = reg;
+	      break;
+	    }
+	  /* Fall through */
+	case ARC64_CMODEL_MEDIUM:
+	case ARC64_CMODEL_LARGE:
+	  {
+	    gcc_assert (word_mode == DImode);
+	    /* Same code is used for PIC and large memory model.  */
+	    rtx lbl = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
+	    rtx tmp = gen_reg_rtx (DImode);
+	    reg = gen_reg_rtx (DImode);
+	    emit_insn (gen_rtx_SET (reg,
+				    gen_rtx_UNSPEC (DImode,
+						    gen_rtvec (1, lbl),
+						    ARC64_UNSPEC_PCREL)));
+	    emit_insn (gen_casesi_addaddrdi (tmp, operands[0], reg));
+	    emit_jump_insn (gen_casesi_dispatchdi (tmp, operands[3]));
+	    return;
+	  }
+	default:
+	  gcc_unreachable ();
+	}
+    }
+
+  emit_jump_insn (gen_casesi_dispatch (operands[0], operands[3]));
+}
+
+bool
+arc64_allow_direct_access_p (rtx op)
+{
+  return (arc64_get_symbol_type (op) == ARC64_LO32);
+}
+
+/* Decide if mov simd instruction needs to be split.  Return TRUE if
+   so.  This procedure is required when the vector length is larger
+   than 64 bit.  */
+bool
+arc64_split_double_move_p (rtx *operands, machine_mode mode)
+{
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+
+  /* Split only double moves.  */
+  if (GET_MODE_SIZE (mode) < (UNITS_PER_WORD * 2))
+    return false;
+
+  if (register_operand (op0, mode) && register_operand (op1, mode))
+    {
+      /* Check if we can use vadd2 instruction as a mov.  */
+      if (TARGET_SIMD
+	  && !FLOAT_MODE_P (mode)
+	  && !TARGET_64BIT
+	  && (GET_MODE_SIZE (mode) == (2 * UNITS_PER_WORD)))
+	{
+	  /* If both registers are even-numbered, fallback to vadd2.  */
+	  if (((REGNO (op0) & 0x01) == 0) && ((REGNO (op1) & 0x01) == 0))
+	    return false;
+	  else
+	    return true;
+	}
+
+      /* Check for r-reg to f-reg moves.  */
+      if (GP_REGNUM_P (REGNO (op0)) || GP_REGNUM_P (REGNO (op1)))
+	return true;
+
+      /* Sanity check for vfmov instruction.  */
+      gcc_assert (arc64_fsimd_register (op0, mode)
+		  && arc64_fsimd_register (op1, mode));
+      return false;
+    }
+
+  /* Check if we have 64/128bit moves.  */
+  if (DOUBLE_LOAD_STORE
+      && ((memory_operand (op0, mode) && REG_P (op1))
+	  || (memory_operand (op1, mode) && REG_P (op0))))
+    {
+      gcc_assert (GET_MODE_SIZE (mode) == (UNITS_PER_WORD * 2));
+      /* Sanity check for wide st/ld instructions.  */
+      if (REG_P (op0) && ((REGNO (op0) & 0x01) != 0))
+	return true;
+      if (REG_P (op1) && ((REGNO (op1) & 0x01) != 0))
+	return true;
+      return false;
+    }
+
+  /* Evereything else is going for a split.  */
+  return true;
+}
+
+/* This is the actual routine which splits a move simd to smaller
+   bits.  */
+void
+arc64_split_double_move (rtx *operands, machine_mode mode)
+{
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+  rtx lo, hi, mem_lo, mem_hi, src, dst;
+  unsigned int rdst, rsrc, i;
+  unsigned iregs = CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
+  bool swap_p = false;
+  machine_mode mvmode = smallest_int_mode_for_size (BITS_PER_WORD);
+
+  /* Maximum size handled is twice UNITS_PER_WORD.  */
+  gcc_assert (iregs <= 2);
+
+  /* This procedure works as long as the width of the fp regs is the
+     same as the width of r regs.  */
+  if (FLOAT_MODE_P (mode))
+    {
+      gcc_assert (UNITS_PER_WORD == UNITS_PER_FP_REG);
+      mvmode = float_mode_for_size (BITS_PER_WORD).require ();
+    }
+
+  /* Split reg-reg move.  */
+  if (REG_P (op0) && REG_P (op1))
+    {
+      rdst = REGNO (op0);
+      rsrc = REGNO (op1);
+
+      if (!reg_overlap_mentioned_p (op0, op1)
+	  || rdst < rsrc)
+	/* The fp regs will never overlap r-regs.  However, this
+	   procedure can be used also for r-reg to r-regs splits.  */
+	for (i = 0; i < iregs; i++)
+	  emit_move_insn (gen_rtx_REG (mvmode, rdst + i),
+			  gen_rtx_REG (mvmode, rsrc + i));
+      else
+	for (i = 0; i < iregs; i++)
+	  emit_move_insn (gen_rtx_REG (mvmode, rdst + iregs - i - 1),
+			  gen_rtx_REG (mvmode, rsrc + iregs - i - 1));
+      return;
+    }
+
+  /* Split mem-reg moves.  */
+  gcc_assert (REG_P (op0) || REG_P (op1));
+
+  if (REG_P (op1))
+    {
+      src = op1;
+      dst = op0;
+    }
+  else
+    {
+      src = op0;
+      dst = op1;
+    }
+
+  lo = gen_lowpart (mvmode, src);
+  hi = gen_highpart_mode (mvmode, mode, src);
+
+  if (auto_inc_p (XEXP (dst, 0)))
+    {
+      rtx offset, reg, next, addr = XEXP (dst, 0);
+      enum rtx_code code = GET_CODE (addr);
+
+      switch (code)
+	{
+	case PRE_INC:
+	  offset = GEN_INT (GET_MODE_SIZE (mode));
+	  code = PRE_MODIFY;
+	  break;
+	case PRE_DEC:
+	  offset = GEN_INT (-GET_MODE_SIZE (mode));
+	  code = PRE_MODIFY;
+	  break;
+	case POST_MODIFY:
+	case PRE_MODIFY:
+	  offset =  XEXP (XEXP (addr, 1), 1);
+	  break;
+	case POST_INC:
+	  offset = GEN_INT (GET_MODE_SIZE (mode));
+	  code = POST_MODIFY;
+	  break;
+	case POST_DEC:
+	  offset = GEN_INT (-GET_MODE_SIZE (mode));
+	  code = POST_MODIFY;
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+
+      reg = XEXP (addr, 0);
+      next = gen_rtx_fmt_ee (code, Pmode, reg,
+			     gen_rtx_PLUS (Pmode, reg, offset));
+
+      switch (code)
+	{
+	case POST_MODIFY:
+	  /* We need to swap lo/hi order such that we emit first the
+	     hi-load with an offset, and last the post modify
+	     instruction.  Thus the code can handle any type of auto
+	     increment address.  */
+	  mem_lo = adjust_automodify_address (dst, mvmode, next, 0);
+	  next = plus_constant (Pmode, reg, GET_MODE_SIZE (mvmode));
+	  mem_hi = adjust_automodify_address (dst, mvmode, next,
+					      GET_MODE_SIZE (mvmode));
+	  swap_p = true;
+	  break;
+	case PRE_MODIFY:
+	  mem_lo = adjust_automodify_address (dst, mvmode, next, 0);
+	  next = plus_constant (Pmode, reg, GET_MODE_SIZE (mvmode));
+	  mem_hi = adjust_automodify_address (dst, mvmode, next,
+					      GET_MODE_SIZE (mvmode));
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+    }
+  else if (GET_CODE (XEXP (dst, 0)) == UNSPEC)
+    {
+      /* For rare situations when we need to split a PIC address.  */
+      rtx addr = XEXP (dst, 0);
+      switch (XINT (addr, 1))
+	{
+	case ARC64_UNSPEC_PCREL:
+	  addr = XVECEXP (addr, 0, 0);
+	  addr = plus_constant (Pmode, addr, GET_MODE_SIZE (mvmode));
+	  addr = gen_sym_unspec (addr, ARC64_UNSPEC_PCREL);
+	  break;
+
+	default:
+	  /* Fail for anything else.  */
+	  gcc_unreachable ();
+	}
+
+      mem_lo = adjust_address (dst, mvmode, 0);
+      mem_hi = adjust_automodify_address (mem_lo, GET_MODE (mem_lo),
+					  addr, GET_MODE_SIZE (mvmode));
+    }
+  else
+    {
+      mem_lo = adjust_address (dst, mvmode, 0);
+      mem_hi = arc64_move_pointer (mem_lo, GET_MODE_SIZE (mvmode));
+      /* Catching scenarios like:
+	 ld r0, [r0, 4]	  (ld lo, [mem_lo])
+	 ld r1, [r0, 8]	  (ld hi, [mem_hi])
+
+	 And setting the trigger (swap_p) to convert them to:
+	 ld r1, [r0, 8]
+	 ld r0, [r0, 4]  */
+      if (reg_overlap_mentioned_p (lo, mem_lo))
+	swap_p = true;
+    }
+
+  if (REG_P (op1))
+    {
+      if (!swap_p)
+	emit_move_insn (mem_lo, lo);
+      emit_move_insn (mem_hi, hi);
+      if (swap_p)
+	emit_move_insn (mem_lo, lo);
+    }
+  else
+    {
+      if (!swap_p)
+	emit_move_insn (lo, mem_lo);
+      emit_move_insn (hi, mem_hi);
+      if (swap_p)
+	emit_move_insn (lo, mem_lo);
+    }
+}
+
+/* What mode to use when copying N-bits of data.
+
+   HS5x
+     n >= 64: copy_mode()
+     n >= 32: SFmode	      if FP_MOVE
+	      SImode	      otherwise
+     n >= 16: HFmode	      if FP_MOVE
+	      HImode	      otherwise
+     n >=  8: QImode
+
+   HS6x
+     n >= 128: copy_mode()
+     n >=  64: DFmode	      if FP_MOVE
+i	       DImode	      otherwise
+     n >=  32: SFmode	      if FP_MOVE
+i	       SImode	      otherwise
+     n >=  16: HFmode	      if FP_MOVE
+i	       HImode	      otherwise
+     n >=   8: QImode
+
+  Note about the "return ((machine_mode) (FP ? Fmode : Imode))":
+  GCC 8.3 gives a warning about "int to machine_mode" conversion if we
+  don't use the explicit "((machine_mode) ...)" casting, while it is
+  absolutely OK with "retun [F|I]mode;" separately.
+*/
+
+static machine_mode
+cpymem_copy_mode_for_n (int n)
+{
+  /* HS6x.  */
+  if (TARGET_64BIT)
+    {
+      if (n >= 128)
+	return cpymem_copy_mode ();
+      else if (n >= 64)
+	return ((machine_mode) (TARGET_FP_MOVE ? DFmode : DImode));
+      /* fall-thru.  */
+    }
+  /* HS5x.  */
+  else
+    {
+      if (n >= 64)
+	return cpymem_copy_mode ();
+      /* fall-thru.  */
+    }
+
+  if (n >= 32)
+    return ((machine_mode) (TARGET_FP_MOVE ? SFmode : SImode));
+  else if (n >= 16)
+    return ((machine_mode) (TARGET_FP_MOVE ? HFmode : HImode));
+  else
+    return QImode;
+}
+
+/* Returns the bit size (of a mode) that is big enough to
+   handle the remaining N-bits of data.
+
+   This function is not expected to be called for Ns that
+   are too big for the architecture to swallow.  e.g. for
+   an HS5x target without 64-bit load/store support, any
+   N > 32 is not expected.  */
+
+static int
+cpymem_smallest_bigger_mode_bitsize (int n)
+{
+  if (n <= 8)
+    return 8;		      /* QImode.  */
+  else if (n <= 16)
+    return 16;		      /* H{I|F}mode.  */
+  else if (n <= 32)
+    return 32;		      /* S{I|F}mode.  */
+  else if (n <= 64)
+    {
+      /* a 64-bit arch or a 32-bit arch with double load/stores.  */
+      if (TARGET_64BIT || TARGET_LL64)
+	return 64;	      /* {DI|DF|V2SF}mode.  */
+
+      /* This functions mustn't have been called.  */
+      gcc_unreachable ();
+    }
+  else if (n <= 128)
+    {
+      if (TARGET_64BIT && TARGET_WIDE_LDST)
+	return 128;	      /* {TI|V2DF}mode.  */
+      /* Fall-thru.  */
+    }
+
+  gcc_unreachable ();
+}
+
+/* Expand cpymem, as if from a __builtin_memcpy.  Return true if
+   we succeed, otherwise return false.  */
+
+bool
+arc64_expand_cpymem (rtx *operands)
+{
+  int n, mode_bits;
+  rtx dst = operands[0];
+  rtx src = operands[1];
+  rtx base;
+  machine_mode cur_mode;
+  bool speed_p = !optimize_function_for_size_p (cfun);
+
+  /* When optimizing for size, give a better estimate of the length of a
+     memcpy call, but use the default otherwise.  Moves larger than 8 bytes
+     will always require an even number of instructions to do now.  And each
+     operation requires both a load+store, so devide the max number by 2.  */
+  int max_num_moves = (speed_p ? 16 : ARC64_CALL_RATIO) / 2;
+  /* In case of double moves, double the threshold.  */
+  if (DOUBLE_LOAD_STORE)
+    max_num_moves *= 2;
+
+  /* We can't do anything smart if the amount to copy is not constant.  */
+  if (!CONST_INT_P (operands[2]))
+    return false;
+
+  n = INTVAL (operands[2]);
+
+  /* Try to keep the number of instructions low.  For all cases we will do at
+     most two moves for the residual amount, since we'll always overlap the
+     remainder.  */
+  const int divisor = GET_MODE_SIZE (cpymem_copy_mode ());
+  if (((n / divisor) + (n % divisor ? 2 : 0)) > max_num_moves)
+    return false;
+
+  base = copy_to_mode_reg (Pmode, XEXP (dst, 0));
+  dst = adjust_automodify_address (dst, VOIDmode, base, 0);
+
+  base = copy_to_mode_reg (Pmode, XEXP (src, 0));
+  src = adjust_automodify_address (src, VOIDmode, base, 0);
+
+  /* Convert n to bits to make the rest of the code simpler.  */
+  n = n * BITS_PER_UNIT;
+
+  while (n > 0)
+    {
+      cur_mode = cpymem_copy_mode_for_n (n);
+
+      mode_bits = GET_MODE_BITSIZE (cur_mode);
+      arc64_copy_one_block_and_progress_pointers (&src, &dst, cur_mode);
+
+      n -= mode_bits;
+
+      /* Do certain trailing copies as overlapping if it's going to be
+	 cheaper.  i.e. less instructions to do so.  For instance doing a 15
+	 byte copy it's more efficient to do two overlapping 8 byte copies than
+	 8 + 4 + 2 + 1.  */
+      if (n > 0 && n < (BITS_PER_UNIT * divisor))
+	{
+	  int n_bits = cpymem_smallest_bigger_mode_bitsize (n);
+	  src = arc64_move_pointer (src, (n - n_bits) / BITS_PER_UNIT);
+	  dst = arc64_move_pointer (dst, (n - n_bits) / BITS_PER_UNIT);
+	  n = n_bits;
+	}
+    }
+
+  return true;
+}
+
+/* Provide a mapping from gcc register numbers to dwarf register numbers.  */
+unsigned
+arc64_dbx_register_number (unsigned regno)
+{
+  if (GP_REGNUM_P (regno))
+    return regno;
+  else if (FP_REGNUM_P (regno))
+    return 128 + regno - F0_REGNUM;
+
+  /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
+     equivalent DWARF register.  */
+   return DWARF_FRAME_REGISTERS;
+}
+
+#if 0
+/* Expand fp vector shift right pattern.  Can handle maximum 128bit
+   SIMD vectors.
+
+   +----+----+----+----+----+----+----+----+
+   | h7 | h6 | h5 | h4 | h3 | h2 | h1 | h0 |
+   |    s3   |    s2   |    s1   |    s0   |
+   |         d1        |         d0        |
+   +----+----+----+----+----+----+----+----+
+
+ */
+
+bool
+arc64_expand_fvect_shr (rtx *operands)
+{
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+  rtx op2 = operands[2];
+  rtx t0;
+  machine_mode mode = GET_MODE (op0);
+  scalar_int_mode imode = int_mode_for_mode (mode).require ();
+  unsigned int ival = INTVAL (op2);
+
+  if (ARC64_VFP_128 && (ival == 64))
+    {
+      emit_move_insn (gen_lowpart (DFmode, op0), gen_highpart (DFmode, op1));
+      return true;
+    }
+  else if (ARC64_VFP_64 && (ival == 32))
+    {
+      t0 = gen_reg_rtx (SFmode);
+
+      emit_insn (gen_vec_extractv2sfsf (t0,
+				      gen_lowpart (V2SFmode, op1),
+				      GEN_INT (1)));
+      emit_insn (gen_vec_setv2sf (gen_lowpart (V2SFmode, op0),
+				  t0, GEN_INT (0)));
+      return true;
+    }
+  else if (ARC64_VFP_32 && (ival == 16))
+    {
+      t0 = gen_reg_rtx (HFmode);
+
+      emit_insn (gen_vec_extractv2hfhf (t0, op1, GEN_INT (1)));
+      emit_insn (gen_vec_setv2hf (op0, t0, GEN_INT (0)));
+      return true;
+    }
+
+  t0 = gen_reg_rtx (imode);
+  rtx shift = expand_binop (imode, lshr_optab,
+			    gen_lowpart (imode, op1), op2,
+			    NULL_RTX, true, OPTAB_DIRECT);
+  emit_move_insn (t0, shift);
+  emit_move_insn (op0, gen_lowpart (mode, t0));
+  return true;
+}
+#endif
+
+/* Return TRUE if SYM requires a PLT34 reloc.  The instruction is
+   valid, hence any symbol which its type is LPIC is valid for
+   instruction, see arc64_is_long_call_p.  */
+
+bool
+arc64_use_plt34_p (rtx sym)
+{
+  return (arc64_get_symbol_type (sym) == ARC64_LPIC);
+}
+
+/* Determine if it's legal to put X into the constant pool.  By all means, it is
+   not ok to put a symbol in a constant pool.  We arive here in the case of a
+   TLS symbol which needs to be precomputed.  We force this in
+   legitimize_constant_p.  */
+
+static bool
+arc64_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED,
+			      rtx x)
+{
+  return contains_symbol_ref_p (x) || tls_referenced_p (x);
+}
+
+/* Generate RTL for conditional branch with rtx comparison CODE in mode
+   CC_MODE.  */
+
+void
+arc64_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
+			    rtx label_ref)
+{
+  rtx x;
+  x = gen_rtx_fmt_ee (code, VOIDmode, gen_rtx_REG (cc_mode, CC_REGNUM),
+		      const0_rtx);
+  x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
+			    gen_rtx_LABEL_REF (VOIDmode, label_ref),
+			    pc_rtx);
+  emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
+}
+
+/* True if the dependency between OUT_INSN and IN_INSN is on the accumulator
+   register.  IN_INSN is a mac type of instruction.  */
+
+int
+accumulator_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
+{
+  rtx in_set = single_set (in_insn);
+  rtx out_set = single_set (out_insn);
+
+  if (!in_set || !out_set)
+    return false;
+
+  if (!REG_P (SET_DEST (out_set)) || (REGNO (SET_DEST (out_set)) != R58_REGNUM))
+    return false;
+
+  rtx tmp = SET_SRC (in_set);
+  if (GET_CODE (tmp) == PLUS && GET_CODE (XEXP (tmp, 0)) == MULT)
+    return true;
+  return true;
+}
+
+/* True if IN_INSN is setting the accumulator.  */
+
+int
+set_accumulator_p (rtx_insn *out_insn ATTRIBUTE_UNUSED,
+		   rtx_insn *in_insn)
+{
+  rtx in_set = single_set (in_insn);
+  if (!in_set)
+    return false;
+
+  if (REG_P (SET_DEST (in_set)) && (REGNO (SET_DEST (in_set)) == R58_REGNUM))
+    return true;
+  return false;
+}
+
+/* Return 'return' instruction.  */
+
+const char *
+arc64_output_return (void)
+{
+  if (ARC_NAKED_P (cfun->machine->fn_type))
+    return "";
+
+  return "j_s%*\t[blink]";
+}
+
+/* Return nonzero if register FROM_REGNO can be renamed to register
+   TO_REGNO.  */
+
+bool
+arc64_hard_regno_rename_ok (unsigned from_regno ATTRIBUTE_UNUSED,
+			    unsigned to_regno)
+{
+  /* Interrupt functions can only use registers that have already been saved by
+     the prologue, even if they would normally be call-clobbered.  */
+  return (!ARC_INTERRUPT_P (cfun->machine->fn_type)
+	  || df_regs_ever_live_p (to_regno));
+}
+
+/* Emit the RTX necessary to initialize the vector TARGET with values in
+   VALS.  */
+
+void
+arc64_expand_vector_init (rtx target, rtx vals)
+{
+  machine_mode mode = GET_MODE (target);
+  machine_mode inner_mode = GET_MODE_INNER (mode);
+  int n_elts = GET_MODE_NUNITS (mode);
+  int i;
+  rtx elem[4], tmp[2];
+
+  gcc_assert (n_elts <= 4);
+  for (i = 0; i < n_elts; i++)
+    {
+      elem[i] = XVECEXP (vals, 0, i);
+      if (!register_operand (elem[i], GET_MODE (elem[i])))
+	elem[i] = force_reg (inner_mode, elem[i]);
+    }
+
+  switch (mode)
+    {
+    case V4HImode:
+      tmp[0] = gen_reg_rtx (mode);
+      tmp[1] = gen_reg_rtx (mode);
+      emit_insn (gen_arc64_vpack_v4hihi (tmp[0], elem[0], elem[1]));
+      emit_insn (gen_arc64_vpack_v4hihi (tmp[1], elem[2], elem[3]));
+      emit_insn (gen_arc64_sel_lane2_0v4hi (target, tmp[0], tmp[1]));
+      break;
+
+    case V2SImode:
+      emit_insn (gen_arc64_vpack_v2sisi (target, elem[0], elem[1]));
+      break;
+
+    case V2HImode:
+      emit_insn (gen_arc64_vpack_v2hihi (target, elem[0], elem[1]));
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Target hooks.  */
+
+#undef TARGET_ASM_ALIGNED_DI_OP
+#define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
+
+#undef TARGET_ASM_ALIGNED_HI_OP
+#define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
+
+#undef TARGET_ASM_ALIGNED_SI_OP
+#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
+
+#undef  TARGET_ASM_CAN_OUTPUT_MI_THUNK
+#define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
+  hook_bool_const_tree_hwi_hwi_const_tree_true
+
+#undef TARGET_ASM_OUTPUT_MI_THUNK
+#define TARGET_ASM_OUTPUT_MI_THUNK arc64_output_mi_thunk
+
+#undef TARGET_CAN_ELIMINATE
+#define TARGET_CAN_ELIMINATE arc64_can_eliminate
+
+#undef TARGET_FRAME_POINTER_REQUIRED
+#define TARGET_FRAME_POINTER_REQUIRED arc64_frame_pointer_required
+
+#undef TARGET_LEGITIMATE_ADDRESS_P
+#define TARGET_LEGITIMATE_ADDRESS_P arc64_legitimate_address_p
+
+#undef TARGET_LEGITIMATE_CONSTANT_P
+#define TARGET_LEGITIMATE_CONSTANT_P arc64_legitimate_constant_p
+
+#undef TARGET_RETURN_IN_MEMORY
+#define TARGET_RETURN_IN_MEMORY arc64_return_in_memory
+
+/* Passing arguments.  */
+#undef TARGET_PASS_BY_REFERENCE
+#define TARGET_PASS_BY_REFERENCE arc64_pass_by_reference
+
+#undef TARGET_SETUP_INCOMING_VARARGS
+#define TARGET_SETUP_INCOMING_VARARGS arc64_setup_incoming_varargs
+
+#undef TARGET_MUST_PASS_IN_STACK
+#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
+
+#undef TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE arc64_function_value
+
+#undef TARGET_FUNCTION_VALUE_REGNO_P
+#define TARGET_FUNCTION_VALUE_REGNO_P arc64_function_value_regno_p
+
+#undef TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG arc64_function_arg
+
+#undef TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE arc64_function_arg_advance
+
+#undef TARGET_ARG_PARTIAL_BYTES
+#define TARGET_ARG_PARTIAL_BYTES arc64_arg_partial_bytes
+
+#undef TARGET_STRICT_ARGUMENT_NAMING
+#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
+
+#undef TARGET_COMPUTE_FRAME_LAYOUT
+#define TARGET_COMPUTE_FRAME_LAYOUT arc64_compute_frame_info
+
+#undef TARGET_HARD_REGNO_NREGS
+#define TARGET_HARD_REGNO_NREGS arc64_hard_regno_nregs
+
+#undef TARGET_HARD_REGNO_MODE_OK
+#define TARGET_HARD_REGNO_MODE_OK arc64_hard_regno_mode_ok
+
+#undef TARGET_MODES_TIEABLE_P
+#define TARGET_MODES_TIEABLE_P arc64_modes_tieable_p
+
+#undef  TARGET_PRINT_OPERAND
+#define TARGET_PRINT_OPERAND arc64_print_operand
+
+#undef  TARGET_PRINT_OPERAND_ADDRESS
+#define TARGET_PRINT_OPERAND_ADDRESS arc64_print_operand_address
+
+#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
+#define TARGET_PRINT_OPERAND_PUNCT_VALID_P arc64_print_operand_punct_valid_p
+
+#undef TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT arc64_initialize_trampoline
+
+#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
+#define TARGET_ASM_TRAMPOLINE_TEMPLATE arc64_asm_trampoline_template
+
+#undef TARGET_HAVE_SPECULATION_SAFE_VALUE
+#define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed
+
+#undef  TARGET_FUNCTION_OK_FOR_SIBCALL
+#define TARGET_FUNCTION_OK_FOR_SIBCALL arc64_function_ok_for_sibcall
+
+#undef TARGET_INIT_LIBFUNCS
+#define TARGET_INIT_LIBFUNCS arc64_init_libfuncs
+
+#undef TARGET_ASM_FILE_END
+#define TARGET_ASM_FILE_END file_end_indicate_exec_stack
+
+#undef  TARGET_ASM_FUNCTION_PROLOGUE
+#define TARGET_ASM_FUNCTION_PROLOGUE arc64_output_function_prologue
+
+#undef TARGET_CONSTANT_ALIGNMENT
+#define TARGET_CONSTANT_ALIGNMENT constant_alignment_word_strings
+
+#undef TARGET_PROMOTE_FUNCTION_MODE
+#define TARGET_PROMOTE_FUNCTION_MODE \
+  default_promote_function_mode_always_promote
+
+/* To be checked if it is better without it.  */
+#undef TARGET_PROMOTE_PROTOTYPES
+#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
+
+#undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
+#define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arc64_output_addr_const_extra
+
+#undef  TARGET_INIT_BUILTINS
+#define TARGET_INIT_BUILTINS  arc64_init_builtins
+
+#undef  TARGET_EXPAND_BUILTIN
+#define TARGET_EXPAND_BUILTIN arc64_expand_builtin
+
+#undef  TARGET_BUILTIN_DECL
+#define TARGET_BUILTIN_DECL arc64_builtin_decl
+
+/* Having TLS support, we turn R30 fixed as well.  */
+#ifdef HAVE_AS_TLS
+#undef TARGET_HAVE_TLS
+#define TARGET_HAVE_TLS HAVE_AS_TLS
+#endif
+
+#undef TARGET_LRA_P
+#define TARGET_LRA_P hook_bool_void_true
+
+#undef  TARGET_INSN_COST
+#define TARGET_INSN_COST arc64_insn_cost
+
+#undef  TARGET_MACHINE_DEPENDENT_REORG
+#define TARGET_MACHINE_DEPENDENT_REORG arc64_reorg
+
+#undef TARGET_CONDITIONAL_REGISTER_USAGE
+#define TARGET_CONDITIONAL_REGISTER_USAGE arc64_conditional_register_usage
+
+#undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
+#define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
+arc64_libgcc_floating_mode_supported_p
+
+#undef TARGET_SCALAR_MODE_SUPPORTED_P
+#define TARGET_SCALAR_MODE_SUPPORTED_P arc64_scalar_mode_supported_p
+
+#undef TARGET_SPLIT_COMPLEX_ARG
+#define TARGET_SPLIT_COMPLEX_ARG arc64_split_complex_arg
+
+/* Vectors.  */
+#undef TARGET_VECTOR_MODE_SUPPORTED_P
+#define TARGET_VECTOR_MODE_SUPPORTED_P arc64_vector_mode_supported_p
+
+#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
+#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arc64_preferred_simd_mode
+
+#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
+#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES	\
+  arc64_autovectorize_vector_modes
+
+#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
+#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST	\
+  arc64_builtin_vectorization_cost
+
+#undef TARGET_REGISTER_MOVE_COST
+#define TARGET_REGISTER_MOVE_COST arc64_register_move_cost
+
+#undef TARGET_VECTORIZE_VEC_PERM_CONST
+#define TARGET_VECTORIZE_VEC_PERM_CONST arc64_vectorize_vec_perm_const
+
+#undef TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS arc64_rtx_costs_wrapper
+
+#undef TARGET_ADDRESS_COST
+#define TARGET_ADDRESS_COST arc64_address_cost
+
+/* Scheduling.  */
+#undef TARGET_SCHED_MACRO_FUSION_P
+#define TARGET_SCHED_MACRO_FUSION_P arc64_macro_fusion_p
+
+#undef TARGET_SCHED_MACRO_FUSION_PAIR_P
+#define TARGET_SCHED_MACRO_FUSION_PAIR_P arc64_macro_fusion_pair_p
+
+/* Disable the speculation when filling delay slots.  In general we get better
+   (speed) results but not for EEMBC's text01 benchmark.  Disabling delay slot
+   filler speculation is needed to conserve the loops body size as calculated in
+   machine reorg phase.  More info see github issue#416.  */
+#undef TARGET_NO_SPECULATION_IN_DELAY_SLOTS_P
+#define TARGET_NO_SPECULATION_IN_DELAY_SLOTS_P hook_bool_void_true
+
+#undef TARGET_CANNOT_FORCE_CONST_MEM
+#define TARGET_CANNOT_FORCE_CONST_MEM arc64_cannot_force_const_mem
+
+#undef TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE arc64_override_options
+
+/* CC regs optimizations.  */
+#undef TARGET_FIXED_CONDITION_CODE_REGS
+#define TARGET_FIXED_CONDITION_CODE_REGS arc64_fixed_condition_code_regs
+
+#undef TARGET_FLAGS_REGNUM
+#define TARGET_FLAGS_REGNUM CC_REGNUM
+
+#undef  TARGET_SET_CURRENT_FUNCTION
+#define TARGET_SET_CURRENT_FUNCTION arc64_set_current_function
+
+#undef TARGET_ATTRIBUTE_TABLE
+#define TARGET_ATTRIBUTE_TABLE arc64_attribute_table
+
+#undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
+#define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arc64_allocate_stack_slots_for_args
+
+#undef TARGET_WARN_FUNC_RETURN
+#define TARGET_WARN_FUNC_RETURN arc64_warn_func_return
+
+#undef TARGET_CAN_FOLLOW_JUMP
+#define TARGET_CAN_FOLLOW_JUMP arc64_can_follow_jump
+
+#undef TARGET_SCHED_ISSUE_RATE
+#define TARGET_SCHED_ISSUE_RATE arc64_sched_issue_rate
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+#include "gt-arc64.h"
diff --git a/gcc/config/arc64/arc64.h b/gcc/config/arc64/arc64.h
new file mode 100644
index 0000000000000..e95bf2037251a
--- /dev/null
+++ b/gcc/config/arc64/arc64.h
@@ -0,0 +1,736 @@
+/* Machine description for ARC64 architecture.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_ARC64_H
+#define GCC_ARC64_H
+
+/* Bits are always numbered from the LSBit.  */
+#define BITS_BIG_ENDIAN 0
+
+/* Define this if most significant byte of a word is the lowest numbered.  */
+#define BYTES_BIG_ENDIAN 0
+
+/* Define this if most significant word of a multiword number is the lowest
+   numbered.  */
+#define WORDS_BIG_ENDIAN 0
+
+/* Is the 64bit or 32bit variant of the CPU used?  */
+#define TARGET_64BIT arc64_target_64bit
+
+/* Determine TARGET_ARCH64 in all possible cases. */
+#ifdef IN_LIBGCC2
+#if defined(__ARC64_ARCH64__)
+#define TARGET_ARCH64 1
+#else
+#define TARGET_ARCH64 0
+#endif
+#else /* not IN_LIBGCC2 */
+#define TARGET_ARCH64 TARGET_64BIT
+#endif
+
+#define MAX_BITS_PER_WORD 64
+
+/* Width of a word, in units (bytes).  */
+#define UNITS_PER_WORD  (TARGET_ARCH64 ? 8 : 4)
+#ifndef IN_LIBGCC2
+#define MIN_UNITS_PER_WORD 4
+#endif
+
+/* Width of a fp register, in bytes.  */
+#define UNITS_PER_FP_REG ((arc64_fp_model == 2) ? 8 : 4)
+
+/* Maximum number of registers that can appear in a valid memory
+   address.  N.B. The ld insn allows 2, but the st insn only allows
+   1.  */
+#define MAX_REGS_PER_ADDRESS 2
+
+/* Addressing modes.  */
+#define HAVE_PRE_INCREMENT 1
+#define HAVE_PRE_DECREMENT 1
+#define HAVE_POST_INCREMENT 1
+#define HAVE_POST_DECREMENT 1
+#define HAVE_PRE_MODIFY_DISP 1
+#define HAVE_POST_MODIFY_DISP 1
+#define HAVE_PRE_MODIFY_REG 1
+#define HAVE_POST_MODIFY_REG 1
+
+/* The number of registers used for parameter passing.  Local to this
+   file.  */
+#define MAX_ARC64_PARM_REGS 8
+
+/* 1 if N is a possible register number for function argument
+   passing.  */
+/* Hard floats: r0-r7, and f0-f7.  */
+#define FUNCTION_ARG_REGNO_P(N)						\
+  (IN_RANGE ((N), R0_REGNUM, R7_REGNUM)					\
+   || (ARC64_HAS_FP_BASE && IN_RANGE ((N), F0_REGNUM, F7_REGNUM)))
+
+/* Boundaries.  */
+#define PARM_BOUNDARY		BITS_PER_WORD
+#define STACK_BOUNDARY		POINTER_SIZE
+#define FUNCTION_BOUNDARY	32
+#define EMPTY_FIELD_BOUNDARY	32
+#define STRUCTURE_SIZE_BOUNDARY 8
+
+/* Look at the fundamental type that is used for a bit-field and use
+   that to impose alignment on the enclosing structure.  struct s {int
+   a:8}; should have same alignment as "int", not "char".  */
+#define	PCC_BITFIELD_TYPE_MATTERS	1
+
+/* Alignments.  */
+#define FASTEST_ALIGNMENT       BITS_PER_WORD
+/* pr64242.c is one interesting test which changing BIGGEST_ALIGNMENT triggers
+   errors.  */
+#define BIGGEST_ALIGNMENT	BITS_PER_WORD
+#define ARC64_EXPAND_ALIGNMENT(COND, EXP, ALIGN)			\
+  (((COND) && ((ALIGN) < FASTEST_ALIGNMENT)				\
+    && (TREE_CODE (EXP) == ARRAY_TYPE)) ? FASTEST_ALIGNMENT : (ALIGN))
+
+/* Align global data.  */
+#define DATA_ALIGNMENT(EXP, ALIGN)			\
+  ARC64_EXPAND_ALIGNMENT (!optimize_size, EXP, ALIGN)
+
+/* Similarly, make sure that objects on the stack are sensibly
+   aligned.  */
+#define LOCAL_ALIGNMENT(EXP, ALIGN)				\
+  ARC64_EXPAND_ALIGNMENT (!flag_conserve_stack, EXP, ALIGN)
+
+/* Set this nonzero if move instructions will actually fail to work
+   when given unaligned data.  */
+#define STRICT_ALIGNMENT        (!unaligned_access)
+
+/* Default unaligned accesses.  */
+#ifndef UNALIGNED_ACCESS_DEFAULT
+#define UNALIGNED_ACCESS_DEFAULT 0
+#endif
+
+/* Layout of Source Language Data Types.  */
+#define SHORT_TYPE_SIZE         16
+#define INT_TYPE_SIZE           32
+#define LONG_LONG_TYPE_SIZE     64
+#define POINTER_SIZE            (TARGET_ARCH64 ? 64 : 32)
+#define LONG_TYPE_SIZE          POINTER_SIZE
+
+/* Defined for convenience.  */
+#define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
+
+#define FLOAT_TYPE_SIZE		32
+#define DOUBLE_TYPE_SIZE	64
+#define LONG_DOUBLE_TYPE_SIZE	64
+
+/* Defined by ABI.  */
+#define WCHAR_TYPE "int"
+#define WCHAR_TYPE_SIZE 32
+
+#define DEFAULT_SIGNED_CHAR 0
+
+#undef SIZE_TYPE
+#define SIZE_TYPE       (POINTER_SIZE == 64 ? "long unsigned int" : "unsigned int")
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE	(POINTER_SIZE == 64 ? "long int" : "int")
+
+/* Specify the machine mode that the hardware addresses have.
+   After generation of rtl, the compiler makes no further distinction
+   between pointers and any other objects of this machine mode.  */
+
+#define Pmode word_mode
+
+/* Mode of a function address in a call instruction (for indexing purposes).  */
+#define FUNCTION_MODE	Pmode
+
+#ifdef HAVE_AS_TLS
+#define ARC64_TLS_REGNO 1
+#else
+#define ARC64_TLS_REGNO 0
+#endif
+
+/* Register usage:
+   R0-R3    Parameter/result registers
+   R4-R7    Parameter registers
+   R8-R13   Temporary registers
+   R14-R26  Callee-saved registers
+   R27      FP (frame pointer)
+   R28      SP (stack pointer)
+   R29      ILINK (Interrupt link register)
+   R30      GP/TP Global pointer, also it is used as thread pointer;
+            otherwise can be used  as a temporary register.
+   R31      BLINK (return register)
+   R32-R57  Extension registers
+   R58      ACC (accumulator)
+   R59      Reserved
+   --- Special registers ---
+   R60      sign-extended 32-bit indicator
+   R61      Reserved
+   R62      zero extended 32-bit immediate indicator
+   R63      PCL (program counter)
+   --- Floating point registers ---
+   F0       Parameter/result register
+   F1-F7    Parameter registers
+   F8-F13   Temporary registers
+   F14-F31  Callee-saved registers
+   -- Fake registers --
+   AP       Argument pointer
+   SFP      Soft frame pointer
+   CC       Status register.
+ */
+
+/* 1 for registers that are not available for the register
+   allocator.  */
+#define FIXED_REGISTERS							\
+  {									\
+    0, 0, 0, 0,   0, 0, 0, 0,  /* R0 - R7 */				\
+    0, 0, 0, 0,   0, 0, 0, 0,  /* R8 - R15 */				\
+    0, 0, 0, 0,   0, 0, 0, 0,  /* R16 - R23 */				\
+    0, 0, 0, 0,   1, 1, ARC64_TLS_REGNO, 1,  /* R24 - R26, FP, SP, ILINK, R30, BLINK */ \
+    									\
+    1, 1, 1, 1,   1, 1, 1, 1,  /* R32 - R39 */				\
+    1, 1, 1, 1,   1, 1, 1, 1,  /* R40 - R47 */				\
+    1, 1, 1, 1,   1, 1, 1, 1,  /* R48 - R55 */				\
+    1, 1, 1, 1,   1, 1, 1, 1,  /* R56, R57, ACCL, R59, Specials */	\
+    									\
+    1, 1, 1, 1,   1, 1, 1, 1,  /* F0 - F7 */				\
+    1, 1, 1, 1,   1, 1, 1, 1,  /* F8 - F15 */				\
+    1, 1, 1, 1,   1, 1, 1, 1,  /* F16 - F23 */				\
+    1, 1, 1, 1,   1, 1, 1, 1,  /* F24 - F31 */				\
+    									\
+    1, 1, 1,                   /* AP, SFP, CC */			\
+  }
+
+/* 1 for registers not available across function calls.  */
+#define CALL_USED_REGISTERS						\
+  {									\
+   1, 1, 1, 1,   1, 1, 1, 1,  /* R0 - R7 */				\
+   1, 1, 1, 1,   1, 1, 0, 0,  /* R8 - R15 */				\
+   0, 0, 0, 0,   0, 0, 0, 0,  /* R16 - R23 */				\
+   0, 0, 0, 0,   1, 1, 1, 1,  /* R24 - R26, FP, SP, ILINK, R30, BLINK */ \
+									\
+   1, 1, 1, 1,   1, 1, 1, 1,  /* R32 - R39 */				\
+   1, 1, 1, 1,   1, 1, 1, 1,  /* R40 - R47 */				\
+   1, 1, 1, 1,   1, 1, 1, 1,  /* R48 - R55 */				\
+   1, 1, 1, 1,   1, 1, 1, 1,  /* R56, R57, ACCL, R59, Specials */	\
+									\
+   1, 1, 1, 1,   1, 1, 1, 1,  /* F0 - F7 */				\
+   1, 1, 1, 1,   1, 1, 1, 1,  /* F8 - F15 */				\
+   1, 1, 1, 1,   1, 1, 1, 1,  /* F16 - F23 */				\
+   1, 1, 1, 1,   1, 1, 1, 1,  /* F24 - F31 */				\
+    									\
+   1, 1, 1,                   /* AP, SFP, CC */				\
+  }
+
+#define REGISTER_NAMES							\
+  {									\
+   "r0",  "r1",  "r2",  "r3",     "r4",   "r5",    "r6",  "r7",		\
+   "r8",  "r9",  "r10", "r11",    "r12",  "r13",   "r14", "r15",	\
+   "r16", "r17", "r18", "r19",    "r20",  "r21",   "r22", "r23",	\
+   "r24", "r25", "r26", "r27",    "sp",   "ilink", "r30", "blink",	\
+   "r32", "r33", "r34", "r35",    "r36",  "r37",   "r38", "r39",	\
+   "r40", "r41", "r42", "r43",    "r44",  "r45",   "r46", "r47",	\
+   "r48", "r49", "r50", "r51",    "r52",  "r53",   "r54", "r55",	\
+   "r56", "r57", "r58", "r59",    "ximm", "rez",   "limm", "pcl",	\
+									\
+   "f0",  "f1",  "f2",  "f3",     "f4",   "f5",    "f6",  "f7",		\
+   "f8",  "f9",  "f10", "f11",    "f12",  "f13",   "f14", "f15",	\
+   "f16", "f17", "f18", "f19",    "f20",  "f21",   "f22", "f23",	\
+   "f24", "f25", "f26", "f27",    "f28",  "f29",   "f30", "f31",	\
+   "ap", "sfp", "cc",							\
+  }
+
+#define ADDITIONAL_REGISTER_NAMES		\
+  {						\
+   { "fp", 27 },				\
+   { "gp", 30 },				\
+   { "acc", 58 },				\
+  }
+
+#define EPILOGUE_USES(REGNO) (arc64_epilogue_uses (REGNO))
+
+/* EXIT_IGNORE_STACK should be nonzero if, when returning from a function,
+   the stack pointer does not matter.  This is only true if the function
+   uses alloca.  */
+#define EXIT_IGNORE_STACK (cfun->calls_alloca)
+
+#define STATIC_CHAIN_REGNUM		R11_REGNUM
+#define HARD_FRAME_POINTER_REGNUM	R27_REGNUM
+#define FRAME_POINTER_REGNUM		SFP_REGNUM
+#define STACK_POINTER_REGNUM		SP_REGNUM
+#define ARG_POINTER_REGNUM		AP_REGNUM
+#define FIRST_PSEUDO_REGISTER		(CC_REGNUM + 1)
+
+enum reg_class
+{
+   NO_REGS,
+   AC16_REGS,
+   SIBCALL_REGS,
+   CORE_REGS,
+   GENERAL_REGS,
+   FP_REGS,
+   ALL_REGS,
+   LIM_REG_CLASSES
+};
+
+#define N_REG_CLASSES	((int) LIM_REG_CLASSES)
+
+#define REG_CLASS_NAMES				\
+{						\
+    "NO_REGS",					\
+    "AC16_REGS",				\
+    "SIBCALL_REGS",				\
+    "CORE_REGS",				\
+    "GENERAL_REGS",				\
+    "FP_REGS",					\
+    "ALL_REGS"					\
+}
+
+#define REG_CLASS_CONTENTS					\
+{								\
+  { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, /* NO_REGS */	\
+  { 0x0000f00f, 0x00000000, 0x00000000, 0x00000000 }, /* AC16_REGS */	\
+  { 0x00001fff, 0x00000000, 0x00000000, 0x00000000 }, /* SIBCALL_REGS */ \
+  { 0x0000ffff, 0x00000000, 0x00000000, 0x00000000 }, /* CORE_REGS */ \
+  { 0xdfffffff, 0x0fffffff, 0x00000000, 0x00000003 }, /* GENERAL_REGS */ \
+  { 0x00000000, 0x00000000, 0xffffffff, 0x00000000 }, /* FP_REGS */	\
+  { 0xffffffff, 0xffffffff, 0xffffffff, 0x00000007 }, /* ALL_REGS */	\
+}
+
+/* A C expression whose value is a register class containing hard
+   register REGNO.  In general there is more that one such class;
+   choose a class which is "minimal", meaning that no smaller class
+   also contains the register.  */
+
+#define REGNO_REG_CLASS(REGNO) arc64_regno_to_regclass[ (REGNO) ]
+
+/* A C expression that is nonzero if it is OK to rename a hard register FROM to
+   another hard register TO.  */
+
+#define HARD_REGNO_RENAME_OK(FROM, TO) arc64_hard_regno_rename_ok (FROM, TO)
+
+/* The class value for valid base registers. A base register is one used in
+   an address which is the register value plus a displacement.  */
+
+#define BASE_REG_CLASS GENERAL_REGS
+#define INDEX_REG_CLASS GENERAL_REGS
+
+/* Definitions for register eliminations.
+
+   This is an array of structures.  Each structure initializes one pair
+   of eliminable registers.  The "from" register number is given first,
+   followed by "to".  Eliminations of the same "from" register are listed
+   in order of preference.
+
+   We have two registers that can be eliminated on the ARC.  First, the
+   argument pointer register can always be eliminated in favor of the stack
+   pointer register or frame pointer register.  Secondly, the frame pointer
+   register can often be eliminated in favor of the stack pointer register.
+*/
+
+#define ELIMINABLE_REGS						\
+  {								\
+    { ARG_POINTER_REGNUM,   STACK_POINTER_REGNUM },		\
+    { ARG_POINTER_REGNUM,   HARD_FRAME_POINTER_REGNUM }, 	\
+    { FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM },      	\
+    { FRAME_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM }  	\
+  }
+
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET)		\
+  (OFFSET) = arc64_initial_elimination_offset (FROM, TO)
+
+/* RTL generation support.  */
+#define INIT_EXPANDERS arc64_init_expanders ()
+
+/* Stack layout; function entry, exit and calling.  */
+#define STACK_GROWS_DOWNWARD	1
+
+/* Addresses of local variables slots are at negative offsets from the
+   frame pointer.  */
+#define FRAME_GROWS_DOWNWARD	1
+
+/* If defined, the maximum amount of space required for outgoing
+   arguments will be computed and placed into the variable
+   `crtl->outgoing_args_size'.  No space will be pushed onto the stack
+   for each call; instead, the function prologue should increase the
+   stack frame size by this amount.  */
+#define ACCUMULATE_OUTGOING_ARGS	1
+
+/* Offset of first parameter from the argument pointer register
+   value.  */
+#define FIRST_PARM_OFFSET(FNDECL) 0
+
+/* Define how to find the value returned by a library function
+   assuming the value has mode MODE.  */
+#define LIBCALL_VALUE(MODE)						\
+  gen_rtx_REG (MODE, arc64_use_fp_regs (MODE) ? F0_REGNUM : R0_REGNUM)
+
+/* Tell GCC to use RETURN_IN_MEMORY.  */
+#define DEFAULT_PCC_STRUCT_RETURN 0
+
+/* To be check: WORD_REGISTER_OPERATIONS, ARC64 has 32bit
+   opertations.  */
+#define WORD_REGISTER_OPERATIONS 1
+
+/* Define if loading from memory in MODE, an integral mode narrower than
+   BITS_PER_WORD will either zero-extend or sign-extend.  The value of this
+   macro should be the code that says which one of the two operations is
+   implicitly done, or UNKNOWN if none.  */
+#define LOAD_EXTEND_OP(MODE) (((MODE) == SImode) ? SIGN_EXTEND : ZERO_EXTEND)
+
+/* Enable wide bitfield accesses for more efficient bitfield code.  */
+#define SLOW_BYTE_ACCESS 1
+
+#define NO_FUNCTION_CSE	1
+
+/* Conditional info.  */
+#define SELECT_CC_MODE(OP, X, Y) arc64_select_cc_mode (OP, X, Y)
+
+/* Restrictions apply to floating-point comparisons.  */
+#define REVERSIBLE_CC_MODE(MODE) ((MODE) != CC_FPUmode && (MODE) != CC_FPUEmode)
+
+/* Returning.  */
+#define INCOMING_RETURN_ADDR_RTX gen_rtx_REG (Pmode, BLINK_REGNUM)
+
+#define RETURN_ADDR_RTX arc64_return_addr
+
+/* Define this to be nonzero if shift instructions ignore all but the
+   low-order few bits.  */
+#define SHIFT_COUNT_TRUNCATED 1
+
+/* Defines if the CLZ result is undefined or has a useful value.  */
+#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE)			\
+  ((VALUE) = GET_MODE_SIZE((MODE)) * BITS_PER_UNIT - 1, 2)
+
+/* Defines if the CTZ result is undefined or has a useful value.  */
+#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE)			\
+  ((VALUE) = GET_MODE_SIZE((MODE)) * BITS_PER_UNIT - 1, 2)
+
+/* Function argument passing.  */
+
+/* Define a data type for recording info about an argument list during
+   the scan of that argument list.  This data type should hold all
+   necessary information about the function itself and about the args
+   processed so far, enough to enable macros such as FUNCTION_ARG to
+   determine where the next arg should go.  */
+#define CUMULATIVE_ARGS struct arc64_args
+struct arc64_args
+{
+  /* Number of integer registers used so far.  */
+  int iregs;
+
+  /* Number of floating-point registers used so far.  */
+  int fregs;
+};
+
+/* Initialize a variable CUM of type CUMULATIVE_ARGS
+   for a call to a function whose data type is FNTYPE.
+   For a library call, FNTYPE is 0.  */
+#define INIT_CUMULATIVE_ARGS(CUM,FNTYPE,LIBNAME,INDIRECT,N_NAMED_ARGS) \
+  ((CUM).iregs = 0, (CUM).fregs = 0)
+
+/* An integer expression for the size in bits of the largest integer machine
+   mode that should actually be used.  We allow pairs of registers.  */
+#define MAX_FIXED_MODE_SIZE GET_MODE_BITSIZE (TARGET_ARCH64 ? TImode : DImode)
+
+/* Maximum bytes moved by a single instruction (load/store pair).  */
+#define MOVE_MAX (2*UNITS_PER_WORD)
+#define MAX_MOVE_MAX 16
+
+/* The base cost overhead of a memcpy call, for MOVE_RATIO and friends.  */
+#define ARC64_CALL_RATIO 8
+
+/* MOVE_RATIO dictates when we will use the move_by_pieces infrastructure.
+   move_by_pieces will continually copy the largest safe chunks.  So a
+   7-byte copy is a 4-byte + 2-byte + byte copy.  This proves inefficient
+   for both size and speed of copy, so we will instead use the "cpymem"
+   standard name to implement the copy.  This logic does not apply when
+   targeting -mstrict-align, so keep a sensible default in that case.  */
+#define MOVE_RATIO(speed) \
+  (!STRICT_ALIGNMENT ? 2 : ((speed) ? 15 : ARC64_CALL_RATIO))
+
+#ifndef USED_FOR_TARGET
+extern const enum reg_class arc64_regno_to_regclass[];
+#endif
+
+#define SIGNED(X,V)							\
+  ((unsigned long long) ((X) + (1ULL << (V - 1))) < (1ULL << V))
+#define UNSIGNED(X,V) ((unsigned long long) (X) < (1ULL << V))
+#define VERIFY_SHIFT(X,S) ((X & ((1 << S) - 1)) == 0)
+
+#define UNSIGNED_INT3(X) (UNSIGNED(X,3))
+#define UNSIGNED_INT5(X) (UNSIGNED(X,5))
+#define UNSIGNED_INT6(X) (UNSIGNED(X,6))
+#define UNSIGNED_INT7(X) (UNSIGNED(X,7))
+#define UNSIGNED_INT8(X) (UNSIGNED(X,8))
+#define UNSIGNED_INT9(X) (UNSIGNED(X,9))
+#define UNSIGNED_INT10(X) (UNSIGNED(X,10))
+#define UNSIGNED_INT12(X) (UNSIGNED(X,12))
+#define UNSIGNED_INT16(X) (UNSIGNED(X,16))
+// TODO: Fix for 32 bit compiler host architecture.
+#define UNSIGNED_INT32(X) (UNSIGNED(X,32))
+
+#define SIGNED_INT3(X) (SIGNED(X,3))
+#define SIGNED_INT6(X) (SIGNED(X,6))
+#define SIGNED_INT7(X) (SIGNED(X,7))
+#define SIGNED_INT8(X) (SIGNED(X,8))
+#define SIGNED_INT9(X) (SIGNED(X,9))
+#define SIGNED_INT10(X) (SIGNED(X,10))
+#define SIGNED_INT11(X) (SIGNED(X,11))
+#define SIGNED_INT12(X) (SIGNED(X,12))
+#define SIGNED_INT13(X) (SIGNED(X,13))
+#define SIGNED_INT16(X) (SIGNED(X,16))
+#define SIGNED_INT21(X) (SIGNED(X,21))
+#define SIGNED_INT25(X) (SIGNED(X,25))
+
+// TODO: Fix for 32 bit compiler host architecture.
+#define SIGNED_INT32(X) (SIGNED(X,32))
+
+#define UNSIGNED_INT7_SHIFTED(X,S) (VERIFY_SHIFT(X,S) && UNSIGNED_INT6(X >> S))
+#define UNSIGNED_INT8_SHIFTED(X,S) (VERIFY_SHIFT(X,S) && UNSIGNED_INT6(X >> S))
+#define UNSIGNED_INT9_SHIFTED(X,S) (VERIFY_SHIFT(X,S) && UNSIGNED_INT6(X >> S))
+
+#define SIGNED_INT13_SHIFTED(X,S) (VERIFY_SHIFT(X,S) && SIGNED_INT12(X >> S))
+#define SIGNED_INT14_SHIFTED(X,S) (VERIFY_SHIFT(X,S) && SIGNED_INT12(X >> S))
+#define SIGNED_INT15_SHIFTED(X,S) (VERIFY_SHIFT(X,S) && SIGNED_INT12(X >> S))
+
+#define  IS_POWEROF2_P(X) (! ( (X) & ((X) - 1)) && (X))
+
+/* These assume that REGNO is a hard or pseudo reg number.
+   They give nonzero only if REGNO is a hard reg of the suitable class
+   or a pseudo reg currently allocated to a suitable hard reg.
+   Since they use reg_renumber, they are safe only once reg_renumber
+   has been allocated, which happens in local-alloc.c.  */
+#define REGNO_OK_FOR_BASE_P(REGNO)					\
+  (GP_REGNUM_P (REGNO)							\
+   || ((REGNO) == AP_REGNUM)						\
+   || ((REGNO) == SFP_REGNUM))
+
+#define REGNO_OK_FOR_INDEX_P(REGNO) REGNO_OK_FOR_BASE_P(REGNO)
+
+/* Return true if regno is FP register.  */
+#define FP_REGNUM_P(REGNO)						\
+  (((unsigned) (REGNO - F0_REGNUM)) <= (F31_REGNUM - F0_REGNUM))
+
+#define GP_REGNUM_P(REGNO)						\
+  (((unsigned) (REGNO - R0_REGNUM)) <= (BLINK_REGNUM - R0_REGNUM))
+
+/* Trampolines, used for entering nested functions, are a block of code
+   followed by two pointers.  The sizes here are in bytes.  */
+#define TRAMPOLINE_CODE_SIZE	  \
+   ((Pmode == SImode)		  \
+    ? 8	  /* ld_s, ld, j_s     */ \
+    : 16) /* nop, ldl, ldl, j  */
+#define TRAMPOLINE_SIZE (TRAMPOLINE_CODE_SIZE + 2 * POINTER_BYTES)
+/* Alignment required for a trampoline in bits .  */
+#define TRAMPOLINE_ALIGNMENT POINTER_SIZE
+
+/* Names to predefine in the preprocessor for this target machine.  */
+#define TARGET_CPU_CPP_BUILTINS() arc64_cpu_cpp_builtins (pfile)
+
+/* Dispatch tables.  */
+#define JUMP_TABLES_IN_TEXT_SECTION 1
+#define CASE_VECTOR_MODE SImode
+#define CASE_VECTOR_PC_RELATIVE 1
+#define ADDR_VEC_ALIGN(VEC_INSN) 0
+
+/* Define this macro if it is advisable to hold scalars in registers
+   in a wider mode than that declared by the program.  In such cases,
+   the value is constrained to be within the bounds of the declared
+   type, but kept valid in the wider mode.  The signedness of the
+   extension may differ from that of the type.  */
+#define PROMOTE_MODE(MODE,UNSIGNEDP,TYPE)	\
+  if (GET_MODE_CLASS (MODE) == MODE_INT		\
+      && GET_MODE_SIZE (MODE) < 4)		\
+    {						\
+      (MODE) = SImode;				\
+    }
+
+
+/* A C string constant describing how to begin a comment in the target
+   assembler language.  The compiler assumes that the comment will
+   end at the end of the line.  */
+#define ASM_COMMENT_START "#"
+
+#define ASM_OUTPUT_ALIGN(FILE,LOG) \
+  fprintf(FILE, "\t.align\t%d\n", 1 << (int)LOG)
+
+/* Output to assembler file text saying following lines
+   may contain character constants, extra white space, comments, etc.  */
+#undef ASM_APP_ON
+#define ASM_APP_ON ""
+
+/* Output to assembler file text saying following lines
+   no longer contain unusual constructs.  */
+#undef ASM_APP_OFF
+#define ASM_APP_OFF ""
+
+/* This is how to output a reference to a symbol_ref / label_ref as
+   (part of) an operand.  To disambiguate from register names like a1
+   / a2 / status etc, symbols are preceded by '@'.  */
+#define ASM_OUTPUT_SYMBOL_REF(FILE,SYM) \
+  ASM_OUTPUT_LABEL_REF ((FILE), XSTR ((SYM), 0))
+#define ASM_OUTPUT_LABEL_REF(FILE,STR)			\
+  do							\
+    {							\
+      fputs ("@", (FILE));				\
+      assemble_name ((FILE), (STR));			\
+    }							\
+  while (0)
+
+#define LOCAL_LABEL_PREFIX     "."
+
+/* This is how to output an element of a PIC case-vector. */
+#define ASM_OUTPUT_ADDR_DIFF_ELT(STREAM, BODY, VALUE, REL)	       \
+  fprintf (STREAM, "\tb\t@%sL%d\n",				       \
+          LOCAL_LABEL_PREFIX, VALUE)
+
+/* Defined to also emit an .align in elfos.h.  We don't want that.  */
+#undef ASM_OUTPUT_CASE_LABEL
+
+/* Section selection.  */
+
+/* Globalizing directive for a label.  */
+#define GLOBAL_ASM_OP "\t.global\t"
+
+#define TEXT_SECTION_ASM_OP	"\t.section\t.text"
+#define DATA_SECTION_ASM_OP	"\t.section\t.data"
+
+#define BSS_SECTION_ASM_OP	"\t.section\t.bss"
+#define SDATA_SECTION_ASM_OP	"\t.section\t.sdata"
+#define SBSS_SECTION_ASM_OP	"\t.section\t.sbss"
+
+/* Expression whose value is a string, including spacing, containing
+   the assembler operation to identify the following data as
+   initialization/termination code.  If not defined, GCC will assume
+   such a section does not exist. */
+#define INIT_SECTION_ASM_OP "\t.section\t.init"
+#define FINI_SECTION_ASM_OP "\t.section\t.fini"
+
+/* All the work done in PROFILE_HOOK, but still required.  */
+#undef FUNCTION_PROFILER
+#define FUNCTION_PROFILER(STREAM, LABELNO) do { } while (0)
+
+#define NO_PROFILE_COUNTERS  1
+
+/* Tell crtstuff.c we're using ELF.  */
+#define OBJECT_FORMAT_ELF
+
+/* Called by crtstuff.c to make calls to function FUNCTION that are defined in
+   SECTION_OP, and then to switch back to text section.  */
+#undef CRT_CALL_STATIC_FUNCTION
+#ifdef __ARC64_ARCH32__
+#define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC)		\
+  asm (SECTION_OP "\n\t"					\
+       "mov\tr12,@" USER_LABEL_PREFIX #FUNC "\n\t" \
+       "jl\t[r12]\n"						\
+       TEXT_SECTION_ASM_OP);
+#elif (defined __ARC64_ARCH64__)
+#define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC)		\
+  asm (SECTION_OP "\n\t"					\
+       "addl\tr12,pcl,@" USER_LABEL_PREFIX #FUNC "@pcl\n\t" \
+       "jl\t[r12]\n"						\
+       TEXT_SECTION_ASM_OP);
+#endif
+
+/* ATOMIC options.  */
+/* FIXME: is 0 okay or should it be -1 like DEFAULT_arc_mpy_option?  */
+/* Default atomic option value.  */
+#undef DEFAULT_ARC64_ATOMIC_OPTION
+#define DEFAULT_ARC64_ATOMIC_OPTION 1
+
+#define ARC64_HAS_ATOMIC_1 (arc64_atomic_option > 0)
+#define ARC64_HAS_ATOMIC_2 (arc64_atomic_option > 1)
+#define ARC64_HAS_ATOMIC_3 (arc64_atomic_option > 2)
+
+/* DIVREM options.  */
+#undef TARGET_ARC64_DIVREM_DEFAULT
+#define TARGET_ARC64_DIVREM_DEFAULT 1
+
+/* FP options.  */
+#define ARC64_HAS_FP_BASE (arc64_fp_model > 0)
+#define ARC64_HAS_FPUH    (arc64_fp_model > 0)
+#define ARC64_HAS_FPUS    (arc64_fp_model > 0)
+#define ARC64_HAS_FPUD    (arc64_fp_model > 1)
+
+#define TARGET_HARD_FLOAT ARC64_HAS_FP_BASE
+
+/* Vector SIMD length.  */
+#define ARC64_VFP_32    (arc64_fp_model == 1)
+#define ARC64_VFP_64    ((arc64_fp_model == 2) && !TARGET_WIDE_SIMD)
+#define ARC64_VFP_128   ((arc64_fp_model == 2) && TARGET_WIDE_SIMD)
+
+/* IFCVT macros.  */
+#define STORE_FLAG_VALUE 1
+#define MAX_CONDITIONAL_EXECUTE 12
+#define BRANCH_COST(speed_p, predictable_p) 10
+
+/* DWARF macros.  */
+#define DWARF2_DEBUGGING_INFO 1
+/* The mapping from gcc register number to DWARF2 CFA column number.  */
+#define DWARF_FRAME_REGNUM(REGNO) DBX_REGISTER_NUMBER(REGNO)
+/* DWARF2 CFA column which tracks the return address.  */
+#define DWARF_FRAME_RETURN_COLUMN BLINK_REGNUM
+/* DWARF registers encodings.  */
+#define DBX_REGISTER_NUMBER(REGNO) arc64_dbx_register_number (REGNO)
+/* The DWARF 2 CFA column which tracks the return address from a signal handler
+   context.  This value must not correspond to a hard register and must be out
+   of the range of DWARF_FRAME_REGNUM().  The unwind-dw2.c file is using
+   DWARF_REG_TO_UNWIND_COLUMN and DWARF_FRAME_REGISTERS macros.  The
+   DWARF_FRAME_REGNUM macro returns no equivalent DWARF register for
+   AP_REGNUM. Thus, we should be safe using AP_REGNUM.  */
+#define DWARF_ALT_FRAME_RETURN_COLUMN AP_REGNUM
+
+/* Exception Handling support.  */
+/* Use R0 through R3 to pass exception handling information.  */
+#define EH_RETURN_DATA_REGNO(N) \
+  ((N) < 4 ? ((unsigned int) R0_REGNUM + (N)) : INVALID_REGNUM)
+#define EH_RETURN_STACKADJ_RTX	gen_rtx_REG (Pmode, R4_REGNUM)
+#define EH_RETURN_HANDLER_RTX  arc64_eh_return_handler_rtx ()
+#define EH_USES(REGNO) (arc64_eh_uses((REGNO)))
+
+/* Select a format to encode pointers in exception handling data.  */
+#define ASM_PREFERRED_EH_DATA_FORMAT(CODE, GLOBAL) \
+  arc64_asm_preferred_eh_data_format ((CODE), (GLOBAL))
+
+/* Specs.  */
+
+/* Support for a compile-time defalt CPU or FPU.  */
+#define OPTION_DEFAULT_SPECS			\
+  { "fpu", "%{!mfpu=*:-mfpu=%(VALUE)}"},	\
+  { "cpu", "%{!mcpu=*:-mcpu=%(VALUE)}"}
+
+#define CPP_SPEC "%(subtarget_cpp_spec)"
+
+#define EXTRA_SPECS				      \
+  { "subtarget_cpp_spec",	SUBTARGET_CPP_SPEC }, \
+  SUBTARGET_EXTRA_SPECS
+
+#undef  ASM_SPEC
+#define ASM_SPEC                                \
+  "%{mcpu=*:-mcpu=%*}"
+
+#ifndef SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS
+#endif
+
+#ifndef SUBTARGET_CPP_SPEC
+#define SUBTARGET_CPP_SPEC ""
+#endif
+
+#undef ARC64_SUBTARGET_DEFAULT
+#define ARC64_SUBTARGET_DEFAULT 0
+
+#endif /* GCC_ARC64_H */
diff --git a/gcc/config/arc64/arc64.md b/gcc/config/arc64/arc64.md
new file mode 100644
index 0000000000000..1eaee6fb05277
--- /dev/null
+++ b/gcc/config/arc64/arc64.md
@@ -0,0 +1,3238 @@
+;; Register numbers
+(define_constants
+  [
+    (R0_REGNUM		0)
+    (R1_REGNUM		1)
+    (R2_REGNUM		2)
+    (R3_REGNUM		3)
+    (R4_REGNUM		4)
+    (R5_REGNUM		5)
+    (R6_REGNUM		6)
+    (R7_REGNUM		7)
+    (R8_REGNUM		8)
+    (R9_REGNUM		9)
+    (R10_REGNUM		10)
+    (R11_REGNUM		11)
+    (R12_REGNUM		12)
+    (R13_REGNUM		13)
+    (R14_REGNUM		14)
+    (R15_REGNUM		15)
+    (R16_REGNUM		16)
+    (R17_REGNUM		17)
+    (R18_REGNUM		18)
+    (R19_REGNUM		19)
+    (R20_REGNUM		20)
+    (R21_REGNUM		21)
+    (R22_REGNUM		22)
+    (R23_REGNUM		23)
+    (R24_REGNUM		24)
+    (R25_REGNUM		25)
+    (R26_REGNUM		26)
+    (R27_REGNUM		27)
+    (SP_REGNUM		28)
+    (ILINK_REGNUM	29)
+    (R30_REGNUM		30)
+    (BLINK_REGNUM	31)
+    (R32_REGNUM		32)
+    (R33_REGNUM		33)
+    (R34_REGNUM		34)
+    (R35_REGNUM		35)
+    (R36_REGNUM		36)
+    (R37_REGNUM		37)
+    (R38_REGNUM		38)
+    (R39_REGNUM		39)
+    (R40_REGNUM		40)
+    (R41_REGNUM		41)
+    (R42_REGNUM		42)
+    (R43_REGNUM		43)
+    (R44_REGNUM		44)
+    (R45_REGNUM		45)
+    (R46_REGNUM		46)
+    (R47_REGNUM		47)
+    (R48_REGNUM		48)
+    (R49_REGNUM		49)
+    (R50_REGNUM		50)
+    (R51_REGNUM		51)
+    (R52_REGNUM		52)
+    (R53_REGNUM		53)
+    (R54_REGNUM		54)
+    (R55_REGNUM		55)
+    (R56_REGNUM		56)
+    (R57_REGNUM		57)
+    (R58_REGNUM		58)
+    (R59_REGNUM		59)
+
+    (R60_REGNUM		60)
+    (R61_REGNUM		61)
+    (R62_REGNUM		62)
+    (R63_REGNUM		63)
+
+    (F0_REGNUM		64)
+    (F1_REGNUM		65)
+    (F2_REGNUM		66)
+    (F3_REGNUM		67)
+    (F4_REGNUM		68)
+    (F5_REGNUM		69)
+    (F6_REGNUM		70)
+    (F7_REGNUM		71)
+    (F8_REGNUM		72)
+    (F9_REGNUM		73)
+    (F10_REGNUM		74)
+    (F11_REGNUM		75)
+    (F12_REGNUM		76)
+    (F13_REGNUM		77)
+    (F14_REGNUM		78)
+    (F15_REGNUM		79)
+    (F16_REGNUM		80)
+    (F17_REGNUM		81)
+    (F18_REGNUM		82)
+    (F19_REGNUM		83)
+    (F20_REGNUM		84)
+    (F21_REGNUM		85)
+    (F22_REGNUM		86)
+    (F23_REGNUM		87)
+    (F24_REGNUM		88)
+    (F25_REGNUM		89)
+    (F26_REGNUM		90)
+    (F27_REGNUM		91)
+    (F28_REGNUM		92)
+    (F29_REGNUM 	93)
+    (F30_REGNUM		94)
+    (F31_REGNUM 	95)
+
+    (AP_REGNUM		96)
+    (SFP_REGNUM		97)
+    (CC_REGNUM		98)
+  ]
+  )
+
+(define_c_enum "unspec"
+  [
+   ARC64_UNSPEC_PCREL
+   ARC64_UNSPEC_GOT
+   ARC64_UNSPEC_GOT32
+   ARC64_UNSPEC_TLS_GD
+   ARC64_UNSPEC_TLS_IE
+   ARC64_UNSPEC_TLS_OFF
+   ARC64_VUNSPEC_BLOCKAGE
+
+   ARC64_VUNSPEC_LR
+   ARC64_VUNSPEC_SR
+   ARC64_VUNSPEC_LRL
+   ARC64_VUNSPEC_SRL
+   ARC64_VUNSPEC_FLAG
+   ARC64_VUNSPEC_BRK
+   ARC64_VUNSPEC_NOP
+   ARC64_VUNSPEC_TRAP_S
+
+   ARC64_VUNSPEC_EX
+   ARC64_VUNSPEC_CAS
+   ARC64_VUNSPEC_SC
+   ARC64_VUNSPEC_LL
+   ARC64_VUNSPEC_SYNC
+   ARC64_VUNSPEC_ATOOPS
+   ARC64_VUNSPEC_RTIE
+
+   ARC64_UNSPEC_MEMBAR
+   ARC64_UNSPEC_FLS
+   ARC64_UNSPEC_COPYSIGN
+   ARC64_UNSPEC_XORSIGN
+   ARC64_UNSPEC_ROUND
+   ARC64_UNSPEC_BTRUNC
+   ARC64_UNSPEC_CASESI
+   ARC64_UNSPEC_VECINIT
+   ARC64_UNSPEC_QMPYH
+   ARC64_UNSPEC_QMACH
+   ARC64_UNSPEC_DMPYWH
+   ARC64_UNSPEC_DMPYWHU
+   ARC64_UNSPEC_DMACWH
+   ARC64_UNSPEC_DMACWHU
+   ARC64_UNSPEC_VPACK4HL
+   ARC64_UNSPEC_VPACK4HM
+   ARC64_UNSPEC_VPACK2WL
+   ARC64_UNSPEC_SWAPL
+   ARC64_UNSPEC_SWAP
+   ARC64_UNSPEC_VEC_SHR
+   ARC64_UNSPEC_VEC_SHL
+   ARC64_UNSPEC_HEXCH
+   ARC64_UNSPEC_SEXCH
+   ARC64_UNSPEC_DEXCH
+   ARC64_UNSPEC_HUNPKL
+   ARC64_UNSPEC_SUNPKL
+   ARC64_UNSPEC_DUNPKL
+   ARC64_UNSPEC_HUNPKM
+   ARC64_UNSPEC_SUNPKM
+   ARC64_UNSPEC_DUNPKM
+   ARC64_UNSPEC_HPACKL
+   ARC64_UNSPEC_SPACKL
+   ARC64_UNSPEC_DPACKL
+   ARC64_UNSPEC_HPACKM
+   ARC64_UNSPEC_SPACKM
+   ARC64_UNSPEC_DPACKM
+   ARC64_UNSPEC_HBFLYL
+   ARC64_UNSPEC_SBFLYL
+   ARC64_UNSPEC_DBFLYL
+   ARC64_UNSPEC_HBFLYM
+   ARC64_UNSPEC_SBFLYM
+   ARC64_UNSPEC_DBFLYM
+   ARC64_UNSPEC_VFADDSUB
+   ARC64_UNSPEC_VFSUBADD
+   ARC64_UNSPEC_VADDSUB
+   ARC64_UNSPEC_VSUBADD
+   ])
+
+(include "constraints.md")
+(include "predicates.md")
+
+;; -------------------------------------------------------------------
+;; Mode Iterators
+;; -------------------------------------------------------------------
+
+;; Iterator for General Purpose Integer registers (32- and 64-bit modes)
+(define_mode_iterator GPI [SI (DI "TARGET_64BIT")])
+
+;; For doubling width of an integer mode
+(define_mode_attr DWI [(QI "HI") (HI "SI") (SI "DI") (DI "TI")])
+
+;; Iterator for QI and HI modes
+(define_mode_iterator SHORT [QI HI])
+
+;; Iterator for QI HI and SI modes
+(define_mode_iterator EXT [QI HI SI])
+
+;; Iterator for all integer modes (up to 64-bit)
+(define_mode_iterator ALLI [QI HI SI (DI "TARGET_64BIT")])
+(define_mode_iterator MV_ALLI [QI HI SI (DI "TARGET_64BIT || TARGET_LL64")])
+
+;; Iterator for HI SI and DI modes
+(define_mode_iterator EPI [HI SI (DI "TARGET_64BIT")])
+
+;; Iterator for HI and SI modes
+(define_mode_iterator HI_SI [HI SI])
+
+;; This mode iterator allows :P to be used for patterns that operate on
+;; pointer-sized quantities.  Exactly one of the two alternatives will match.
+(define_mode_iterator P [(SI "Pmode == SImode") (DI "Pmode == DImode")])
+
+;; Iterator for integer modes which map into a pair of registers.
+(define_mode_iterator DBLI [DI (TI "TARGET_64BIT")])
+
+;; Iterator for General Purpose Floating-point registers (16 -, 32-
+;; and 64-bit modes)
+(define_mode_iterator GPF_HF [(HF "ARC64_HAS_FPUH")
+			      (SF "ARC64_HAS_FPUS") (DF "ARC64_HAS_FPUD")])
+
+;; Iterator for General Purpose Floating-point registers (32- and 64-bit modes)
+(define_mode_iterator GPF [(SF "ARC64_HAS_FPUS") (DF "ARC64_HAS_FPUD")])
+
+;; Iterator for General Purpose Floating-point registers (16- and 32-bit modes)
+(define_mode_iterator HF_SF [(HF "ARC64_HAS_FPUH") (SF "ARC64_HAS_FPUS")])
+
+;; All int vectors
+(define_mode_iterator VALL [V2HI V4HI V2SI])
+
+;; All 64b int vectors
+(define_mode_iterator V64I [V4HI V2SI])
+
+;; All fp vectors
+(define_mode_iterator VALLF [(V2HF "ARC64_VFP_32")
+			     (V4HF "ARC64_VFP_64") (V2SF "ARC64_VFP_64")
+			     (V8HF "ARC64_VFP_128") (V4SF "ARC64_VFP_128")
+			     (V2DF "ARC64_VFP_128")])
+
+;; ALl fp vectors up to 64bit
+(define_mode_iterator VALLF_64 [(V2HF "ARC64_VFP_32")
+				(V4HF "ARC64_VFP_64") (V2SF "ARC64_VFP_64")])
+
+;; All 128b fp vectos
+(define_mode_iterator VALLF_128 [(V8HF "ARC64_VFP_128") (V4SF "ARC64_VFP_128")
+				 (V2DF "ARC64_VFP_128")])
+
+;; All 2xfp Vectors
+(define_mode_iterator V2xF [(V2HF "ARC64_VFP_32") (V2SF "ARC64_VFP_64")
+			    (V2DF "ARC64_VFP_128")])
+
+;; All 4xfp Vectors
+(define_mode_iterator V4xF [(V4HF "ARC64_VFP_64") (V4SF "ARC64_VFP_128")])
+
+;; All 2xreg wide vectors
+;; All 2xfp Vectors
+(define_mode_iterator W2xF [(V2DF "ARC64_VFP_128")])
+
+;; All HF and SF vectors
+(define_mode_iterator V1FRF [(V2HF "ARC64_VFP_32")
+			     (V4HF "ARC64_VFP_64") (V2SF "ARC64_VFP_64")
+			     (V8HF "ARC64_VFP_128") (V4SF "ARC64_VFP_128")])
+
+;; All HF vectors
+(define_mode_iterator VxHF [(V2HF "ARC64_VFP_32")
+			    (V4HF "ARC64_VFP_64")
+			    (V8HF "ARC64_VFP_128")])
+
+;; -------------------------------------------------------------------
+;; Code Iterators
+;; -------------------------------------------------------------------
+
+;; Code iterator for sign/zero extension
+(define_code_iterator ANY_EXTEND [sign_extend zero_extend])
+
+;; This code iterator allows the shifts supported in arithmetic instructions
+(define_code_iterator ASHIFT [ashift ashiftrt lshiftrt])
+
+;; Only logical shifts
+(define_code_iterator LSHIFT [ashift lshiftrt])
+
+;; Iterates over the SETcc instructions
+(define_code_iterator SETCC [eq ne gt lt ge le ltu geu])
+(define_code_iterator ALLCC [eq ne gt lt ge le ltu geu gtu leu])
+
+;; Three operand arithmetic operations
+(define_code_iterator ARITH [plus minus mult])
+(define_code_iterator ADDSUB [plus minus] )
+
+;; Three operand logic operations
+(define_code_iterator LOGIC [and ior xor smin smax])
+
+;; Two operand logic operations
+(define_code_iterator NOT_ABS [not abs])
+
+;; Two operand logic operations extended, used for zero_extend
+;; patterns
+(define_code_iterator LOP2EX [not abs neg])
+
+;; Min/Max iterator
+(define_code_iterator MINMAX [smin smax])
+
+;; Three operand floating point arithmetic instructions
+(define_code_iterator DOPF [plus minus mult div smin smax])
+
+;; Vector operations
+(define_code_iterator VOPS [plus minus mult div])
+
+;; Comutative VF operations
+(define_code_iterator VCOP [plus mult])
+
+;; Emulated 1 operand vector operations
+(define_code_iterator ABS_NEG [abs neg])
+
+;; Code iterator for unary negate and bitwise complement.
+(define_code_iterator NEG_NOT [neg not])
+
+;; Code iterator for bit logic ops.
+(define_code_iterator BIT [ior xor])
+
+;; Code iterator for div/mod ops.
+(define_code_iterator DIVREM [div udiv mod umod])
+
+;; Comutative operations
+(define_code_iterator COMMUTATIVE [and ior xor])
+(define_code_iterator COMMUTATIVEF [plus and ior xor])
+
+;; -------------------------------------------------------------------
+;; Mode Attributes
+;; -------------------------------------------------------------------
+
+;; Map rtl mode to ARC mnemonic suffixes used in sign extend
+;; instructions.
+(define_mode_attr exttab [(QI "b") (HI "h") (SI "w")])
+
+;; Map rtl mode to ARC mnemonic suffixes
+(define_mode_attr sfxtab [(QI "b") (HI "h") (SI "") (DI "l")
+			  (HF "h") (SF "s") (DF "d")
+			  (V2HI "2h") (V4HI "4h") (V2SI "2")
+			  (V2HF "h") (V4HF "h") (V2SF "s")
+			  (V8HF "h") (V4SF "s") (V2DF "d")])
+
+;; Used by FPABS patterns.
+(define_mode_attr fptab [(SF "") (DF "l")])
+
+;; Same as above but to be used by mov conditional
+(define_mode_attr mcctab [(QI "") (HI "") (SI "") (DI "l")
+			  (HF "") (SF "") (DF "l")
+			  (V2HI "") (V4HI "l") (V2SI "l")
+			  (V2HF "") (V4HF "l") (V2SF "l")])
+
+(define_mode_attr slfp [(HF "h") (SF "") (DF "l")
+			(V2HF "") (V4HF "l") (V2SF "l")])
+
+(define_mode_attr fmvftab [(HF "s") (SF "s") (DF "d")
+			   (V2HF "s") (V4HF "d") (V2SF "d")])
+(define_mode_attr fmvitab [(HF "i") (SF "i") (DF "l")
+			   (V2HF "i") (V4HF "l") (V2SF "l")])
+
+;; To be used by vector exch instructions emitted by reduction
+;; patterns.
+(define_mode_attr fmextab [(V4HF "s") (V4SF "d")])
+
+;; Used to implement cadd{90,270} functions
+(define_mode_attr cplxtab [(V2HF "H")
+			   (V4HF "H")
+			   (V2SF "S")
+			   (V8HF "H")
+			   (V4SF "S")
+			   (V2DF "D")])
+
+;; Give the number of bits-1 in the mode
+(define_mode_attr sizen [(QI "7") (HI "15") (SI "31") (DI "63")
+			 (HF "15") (SF "31") (DF "63")])
+
+;; Same like above but without -1 used for fp loads/stores
+(define_mode_attr sizef [(HF "16") (SF "32") (DF "64")
+			 (V2HF "32") (V4HF "64") (V2SF "64")
+			 (V8HF "d64") (V4SF "d64") (V2DF "d64")])
+
+;; Used to implement predicated sign extension patterns
+(define_mode_attr sexsft [(QI "24") (HI "16") (SI "8")])
+
+;; Used by float conv patterns.
+(define_mode_attr f2tab [(SI "int") (DI "l")])
+
+;; Define element mode for each vector mode.
+(define_mode_attr VEL [(V2HI "HI") (V4HI "HI") (V2SI "SI")
+		       (V2HF "HF") (V4HF "HF") (V2SF "SF")
+		       (V8HF "HF") (V4SF "SF") (V2DF "DF")])
+(define_mode_attr vel [(V2HI "hi") (V4HI "hi") (V2SI "si")
+		       (V2HF "hf") (V4HF "hf") (V2SF "sf")
+		       (V8HF "hf") (V4SF "sf") (V2DF "df")])
+
+;; Define element mode for each double-r mode.
+(define_mode_attr REL [(DI "SI") (TI "DI")])
+(define_mode_attr rel [(DI "si") (TI "di")])
+
+;; Used by vector extract pattern
+(define_mode_attr vextrsz [(V2HI "16") (V4HI "16") (V2SI "32")])
+(define_mode_attr vextrmsk [(V2HI "0x1f") (V4HI "0x3f") (V2SI "0x3f")])
+(define_mode_attr vextrsh [(V2HI "5") (V4HI "6") (V2SI "6")])
+
+;; -------------------------------------------------------------------
+;; Code Attributes
+;; -------------------------------------------------------------------
+;; Map rtl objects to optab names
+(define_code_attr optab [(ashift "ashl")
+			 (ashiftrt "ashr")
+			 (lshiftrt "lshr")
+			 (rotatert "rotr")
+			 (sign_extend "extend")
+			 (zero_extend "zero_extend")
+			 (sign_extract "extv")
+			 (zero_extract "extzv")
+			 (fix "fix")
+			 (unsigned_fix "fixuns")
+			 (float "float")
+			 (unsigned_float "floatuns")
+			 (popcount "popcount")
+			 (and "and")
+			 (ior "ior")
+			 (xor "xor")
+			 (not "one_cmpl")
+			 (neg "neg")
+			 (plus "add")
+			 (minus "sub")
+			 (mult "mul")
+			 (div "div")
+			 (udiv "udiv")
+			 (mod "mod")
+			 (umod "umod")
+			 (ss_plus "qadd")
+			 (us_plus "qadd")
+			 (ss_minus "qsub")
+			 (us_minus "qsub")
+			 (ss_neg "qneg")
+			 (ss_abs "qabs")
+			 (smin "smin")
+			 (smax "smax")
+			 (umin "umin")
+			 (umax "umax")
+			 (eq "eq")
+			 (ne "ne")
+			 (lt "lt")
+			 (ge "ge")
+			 (le "le")
+			 (gt "gt")
+			 (ltu "ltu")
+			 (leu "leu")
+			 (geu "geu")
+			 (gtu "gtu")
+			 (abs "abs")
+			 (sqrt "sqrt")])
+
+;; map rtl to ARC's cc-mnemonic names, slightly different than above.
+(define_code_attr cctab [(eq "eq")
+			 (ne "ne")
+			 (lt "lt")
+			 (ge "ge")
+			 (le "le")
+			 (gt "gt")
+			 (ltu "lo")
+			 (leu "NA")
+			 (geu "hs")
+			 (gtu "NA")])
+
+;; used for inverting predicated SET instructions.
+(define_code_attr CCTAB [(eq "EQ")
+			 (ne "NE")
+			 (lt "LT")
+			 (ge "GE")
+			 (le "LE")
+			 (gt "GT")
+			 (ltu "LTU")
+			 (leu "NA")
+			 (geu "GEU")
+			 (gtu "NA")])
+
+;; Sign- or zero-extend data-op
+(define_code_attr su [(sign_extend "s") (zero_extend "u")])
+
+;; Optab prefix for sign/zero-extending operations
+(define_code_attr su_optab [(sign_extend "") (zero_extend "u")])
+
+;; Map rtl objects to arc instuction names
+(define_code_attr mntab [(abs "abs")
+			 (not "not")
+			 (neg "neg")
+			 (ashift   "asl")
+			 (ashiftrt "asr")
+			 (sign_extend "sex")
+			 (zero_extend "ext")
+			 (div      "div")
+			 (udiv     "divu")
+			 (mult     "mul")
+			 (mod      "rem")
+			 (umod     "remu")
+			 (lshiftrt "lsr")
+			 (and      "and")
+			 (ior      "or")
+			 (xor      "xor")
+			 (plus     "add")
+			 (minus    "sub")
+			 (smax      "max")
+			 (smin      "min")])
+
+;; Map rtl objects to arc's bit operation instructions
+(define_code_attr bit_optab [(ior    "bset")
+			     (xor    "bxor")])
+
+;; -------------------------------------------------------------------
+;; Int Iterators.
+;; -------------------------------------------------------------------
+(define_int_iterator PERMUTED [ARC64_UNSPEC_DUNPKL
+			       ARC64_UNSPEC_DUNPKM
+			       ARC64_UNSPEC_DPACKL
+			       ARC64_UNSPEC_DPACKM
+			       ARC64_UNSPEC_DBFLYL
+			       ARC64_UNSPEC_DBFLYM])
+(define_int_iterator PERMUTES [ARC64_UNSPEC_SUNPKL
+			       ARC64_UNSPEC_SUNPKM
+			       ARC64_UNSPEC_SPACKL
+			       ARC64_UNSPEC_SPACKM
+			       ARC64_UNSPEC_SBFLYL
+			       ARC64_UNSPEC_SBFLYM])
+(define_int_iterator PERMUTEH [ARC64_UNSPEC_HUNPKL
+			       ARC64_UNSPEC_HUNPKM
+			       ARC64_UNSPEC_HPACKL
+			       ARC64_UNSPEC_HPACKM
+			       ARC64_UNSPEC_HBFLYL
+			       ARC64_UNSPEC_HBFLYM])
+
+;; -------------------------------------------------------------------
+;; Int Iterators Attributes.
+;; -------------------------------------------------------------------
+(define_int_attr perm_pat [(ARC64_UNSPEC_HUNPKL "unpkl")
+			   (ARC64_UNSPEC_SUNPKL "unpkl")
+			   (ARC64_UNSPEC_DUNPKL "unpkl")
+			   (ARC64_UNSPEC_HUNPKM "unpkm")
+			   (ARC64_UNSPEC_SUNPKM "unpkm")
+			   (ARC64_UNSPEC_DUNPKM "unpkm")
+			   (ARC64_UNSPEC_HPACKL "packl")
+			   (ARC64_UNSPEC_SPACKL "packl")
+			   (ARC64_UNSPEC_DPACKL "packl")
+			   (ARC64_UNSPEC_HPACKM "packm")
+			   (ARC64_UNSPEC_SPACKM "packm")
+			   (ARC64_UNSPEC_DPACKM "packm")
+			   (ARC64_UNSPEC_HBFLYL "bflyl")
+			   (ARC64_UNSPEC_SBFLYL "bflyl")
+			   (ARC64_UNSPEC_DBFLYL "bflyl")
+			   (ARC64_UNSPEC_HBFLYM "bflym")
+			   (ARC64_UNSPEC_SBFLYM "bflym")
+			   (ARC64_UNSPEC_DBFLYM "bflym")])
+
+;; -------------------------------------------------------------------
+;; Instruction types and attributes
+;; -------------------------------------------------------------------
+
+;; What is the insn_cost for this insn?  The target hook can still
+;; override this.  For optimizing for size the "length" attribute is
+;; used instead.
+(define_attr "cost" "" (const_int 0))
+
+(define_attr "type" "abs, adc, adcl, add, addhl, addl, and, andl, asl,
+asll, asr, asrl, atldlop, atldop, bbit, bclr, bi, bic, bl, block,
+bmsk, branch, branchcc, brcc, brk, bset, bsetl, btst, bxor, bxorl,
+cmp, dbnz, div, divl, dmb, dmpywh, ex, ext, fadd, fcmp, fd2s, fdiv,
+ffs, fh2s, flag, fls, fmadd, fmax, fmin, fmov, fmsub, fmul, fnmadd,
+fnmsub, fp2int, fp2uint, frnd, fs2d, fs2h, fsgnj, fsgnjn, fsgnjx,
+fsqrt, fsub, int2fp, jl, jump, ld, llock, lr, lsr, lsrl, mac, max,
+maxl, min, minl, mod, modl, move, movecc, mpy, mpyl, neg, nop, norm,
+normh, norml, not, notl, or, orl, qmach, qmpyh, return, rol, ror,
+rtie, sbc, sbcl, scond, setcc, sex, sr, st, sub, subl, swap, swape,
+swapel, swapl, sync, trap, tst, udiv, udivl, uint2fp, umod, umodl,
+unknown, vadd, vaddsub, vfadd, vfaddsub, vfbflyl, vfbflym, vfdiv,
+vfexch, vfext, vfins, vfmul, vfpackl, vfpackm, vfrep, vfsub, vfsubadd,
+vfunpkl, vfunpkm, vmac2h, vmpy2h, vpack, vsub, vsubadd, xbfu, xor,
+xorl"
+  (const_string "unknown"))
+
+(define_attr "iscompact" "yes,no,maybe" (const_string "no"))
+
+(define_attr "predicable" "yes,no" (const_string "no"))
+
+(define_attr "length" ""
+  (cond
+   [(eq_attr "iscompact" "yes")
+    (const_int 2)
+
+    (eq_attr "type" "ld")
+    (if_then_else
+     (match_operand 1 "limm_ldst_operand" "")
+     (const_int 8) (const_int 4))
+
+    (eq_attr "type" "st")
+    (if_then_else
+     (ior (match_operand 0 "limm_ldst_operand" "")
+	  (and (not (match_operand 1 "S06S0_immediate_operand" ""))
+	       (match_operand 1 "immediate_operand" "")))
+     (const_int 8) (const_int 4))
+
+    (eq_attr "type" "bl")
+    (if_then_else
+     (ior (match_operand 0 "plt34_symbol_p" "")
+	  (match_operand 1 "plt34_symbol_p" ""))
+     (const_int 6) (const_int 4))
+
+    (eq_attr "iscompact" "maybe")
+    (cond
+     [(match_test "GET_CODE (PATTERN (insn)) == COND_EXEC")
+      (const_int 4)
+
+      (eq_attr "type" "and")
+      (const_int 2)
+
+      (eq_attr "type" "or")
+      (const_int 2)
+
+      (match_operand:DI 0 "" "")
+      (const_int 4)
+      ]
+     (const_int 2))
+    ]
+   (const_int 8)))
+
+;; Select various CPU features.
+(define_attr "cpu_facility" "std,cd,ncd"
+  (const_string "std"))
+
+(define_attr "enabled" "no,yes"
+  (cond [(and (eq_attr "cpu_facility" "cd")
+	      (not (match_test ("TARGET_CODE_DENSITY"))))
+	 (const_string "no")
+	 (and (eq_attr "cpu_facility" "ncd")
+	      (match_test ("TARGET_CODE_DENSITY")))
+	 (const_string "no")
+        ]
+       (const_string "yes")))
+
+;; -------------------------------------------------------------------
+;; Delay slots
+;; -------------------------------------------------------------------
+
+;; Define what can go in a delay slot, generic.
+(define_attr "slottable" "false,true"
+  (cond
+  [(eq_attr "type" "jump,branch,jl,bl,bi,branchcc,dbnz,return,bbit,brcc")
+   (const_string "false")
+
+   (eq_attr "length" "2,4")
+   (const_string "true")
+   ]
+  (const_string "false")))
+
+;; Define what can go in a call delay slot.
+(define_attr "call_slottable" "false,true"
+  (cond
+   [(eq_attr "slottable" "false")
+    (const_string "false")
+
+    (match_test "regno_clobbered_p (BLINK_REGNUM, insn, Pmode, 1)")
+    (const_string "false")
+    ]
+   (const_string "true")))
+
+;; Calls delay slots
+(define_delay (and (eq_attr "type" "jl,bl,return")
+		   (eq_attr "length" "2,4,8"))
+  [(eq_attr "call_slottable" "true") (nil) (nil)])
+
+;; Jumps delay slots
+(define_delay (ior (eq_attr "type" "jump,branch,branchcc,dbnz,bbit")
+;; Accordingly to PRM jumps with LIMM and delay slots are illegal.
+		   (and (eq_attr "type" "brcc")
+			(eq_attr "length" "4,12")))
+  [(eq_attr "slottable" "true") (nil) (nil)])
+
+;; Is there an instruction that we are actually putting into the delay
+;; slot?  N.B. Until after delay slot filler consider full insn size.
+;; This is required for computing a correct loop body size.
+(define_attr "delay_slot_filled" "no,yes"
+  (cond [(match_test "!crtl->dbr_scheduled_p")
+	 (const_string "yes")
+	 (match_test "NEXT_INSN (PREV_INSN (insn)) == insn")
+	 (const_string "no")
+	 (match_test "JUMP_P (insn)
+		      && INSN_ANNULLED_BRANCH_P (insn)
+		      && !INSN_FROM_TARGET_P (NEXT_INSN (insn))")
+	 (const_string "no")]
+	(const_string "yes")))
+
+(define_attr "delay_slot_length" ""
+  (cond [(match_test "NEXT_INSN (PREV_INSN (insn)) == insn")
+	 (const_int 0)]
+	(symbol_ref "get_attr_length (NEXT_INSN (PREV_INSN (insn)))
+		     - get_attr_length (insn)")))
+
+;; -------------------------------------------------------------------
+;; Pipeline descriptions and scheduling
+;; -------------------------------------------------------------------
+
+(include "hs6x.md")
+
+;; -------------------------------------------------------------------
+;; Moves
+;; -------------------------------------------------------------------
+
+(define_expand "mov<mode>"
+  [(set (match_operand:MV_ALLI 0 "nonimmediate_operand")
+	(match_operand:MV_ALLI 1 "general_operand"))]
+  ""
+  "
+  if (arc64_prepare_move_operands (operands[0], operands[1], <MODE>mode))
+    DONE;
+  "
+  )
+
+(define_expand "movti"
+  [(set (match_operand:TI 0 "nonimmediate_operand")
+	(match_operand:TI 1 "general_operand"))]
+  "TARGET_WIDE_LDST"
+  {
+    if (CONSTANT_P (operands[1]))
+      {
+	emit_move_insn (gen_lowpart (DImode, operands[0]),
+			gen_lowpart (DImode, operands[1]));
+	emit_move_insn (gen_highpart (DImode, operands[0]),
+			gen_highpart_mode (DImode, TImode, operands[1]));
+	DONE;
+      }
+    else if (!register_operand (operands[0], TImode)
+	     && !register_operand (operands[1], TImode))
+      operands[1] = force_reg (TImode, operands[1]);
+    arc64_prepare_move_operands (operands[0], operands[1], TImode);
+    DONE;
+
+  })
+
+;; We use movsf for soft and hard floats.
+(define_expand "movsf"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "")
+	(match_operand:SF 1 "general_operand"))]
+  ""
+  {
+   if (arc64_prepare_move_operands (operands[0], operands[1], SFmode))
+      DONE;
+   })
+
+(define_expand "movhf"
+  [(set (match_operand:HF 0 "nonimmediate_operand" "")
+	(match_operand:HF 1 "general_operand"))]
+  "ARC64_HAS_FPUH"
+  {
+   if (arc64_prepare_move_operands (operands[0], operands[1], HFmode))
+      DONE;
+   })
+
+(define_expand "movdf"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "")
+	(match_operand:DF 1 "general_operand"))]
+  "ARC64_HAS_FPUD"
+  {
+   if (arc64_prepare_move_operands (operands[0], operands[1], DFmode))
+      DONE;
+   })
+
+;; mov<.f>        b, c
+;; mov<.f>        b, s12
+;; mov_s          b, u8
+;; mov_s          g, h
+;; mov_s          h, s3
+;;
+;; ld             a, [b, s9]
+;; ld             a, [b,  c]
+;; ld             a, [limm ]
+;;
+;; ldb_s          a, [b,  c]
+;; ldb_s          c, [b,  u5]
+;;
+;; st<zz>         c   , [b , s9]
+;; st<zz>         limm, [b , s9]
+;; stb_s          b   , [sp, u7]
+;; stb_s          c   , [b , u5]
+(define_insn "*arc64_movqi"
+  [(set
+    (match_operand:QI 0 "arc64_dest_operand"   "=qh,    q, r,    q,Ustms,Ustor,Ucnst, r,Ustor")
+    (match_operand:QI 1 "general_operand" " qhS03MV,U08S0,ri,Uldms,    q,S06S0,    i, m, r"))
+   ]
+   ; in general, at least one of the operands must be a register
+   "register_operand (operands[0], QImode)
+   || register_operand (operands[1], QImode)
+   /* this is to match 'stb w6, [limm]' (S06S0 is the w6).  */
+   || (satisfies_constraint_S06S0 (operands[1])
+       && memory_operand (operands[0], QImode))
+   /* writing a byte into memory using limm variant.  */
+   || (immediate_operand (operands[1], QImode)
+       && memory_operand (operands[0], QImode))"
+   "@
+    mov_s\\t%0,%1
+    mov_s\\t%0,%1
+    mov\\t%0,%1
+    ldb_s\\t%0,%1
+    stb_s\\t%1,%0
+    stb%U0\\t%1,%0
+    stb%U0\\t%1,%0
+    ldb%U1\\t%0,%1
+    stb%U0\\t%1,%0"
+   [(set_attr "type" "move,move,move,ld,st,st,st,ld,st")
+    (set_attr "length" "2,2,4,2,2,*,8,*,*")]
+)
+
+(define_insn "*arc64_movhi"
+  [(set
+    (match_operand:HI 0 "arc64_dest_operand"  "=qh,r,    q,    r,h,r,   q,Ustms,Ustw6,Ucnst, r,Ustor")
+    (match_operand:HI 1 "general_operand" "qhS03MV,r,U08S0,S12S0,i,i,Uldms,   q,S06S0,    i, m, r"))
+   ]
+  "register_operand (operands[0], HImode)
+   || register_operand (operands[1], HImode)
+   || (satisfies_constraint_S06S0 (operands[1])
+       && memory_operand (operands[0], HImode))
+   || (CONST_INT_P (operands[1])
+       && satisfies_constraint_Ucnst (operands[0]))"
+   "@
+    mov_s\\t%0,%1
+    mov\\t%0,%1
+    mov_s\\t%0,%1
+    mov\\t%0,%1
+    mov_s\\t%0,%1
+    mov\\t%0,%1
+    ldh_s\\t%0,%1
+    sth_s\\t%1,%0
+    sth%U0\\t%1,%0
+    sth%U0\\t%1,%0
+    ldh%U1\\t%0,%1
+    sth%U0\\t%1,%0"
+   [(set_attr "type" "move,move,move,move,move,move,ld,st,st,st,ld,st")
+    (set_attr "length" "2,4,2,4,6,8,2,2,*,8,*,*")]
+)
+
+(define_insn "*arc64_movsi"
+  [(set
+    (match_operand:SI 0 "arc64_dest_operand"      "=qh,r,    q,    r,    r,h,r,    q,Ustms,Ustor,Ucnst, r,Ustor")
+    (match_operand:SI 1 "arc64_movl_operand"  "qhS03MV,r,U08S0,S12S0,SyPic,i,i,Uldms,    q,S06S0,    i, m,    r"))
+   ]
+  "register_operand (operands[0], SImode)
+   || register_operand (operands[1], SImode)
+   || (satisfies_constraint_S06S0 (operands[1])
+       && memory_operand (operands[0], SImode))
+   || (CONST_INT_P (operands[1])
+       && satisfies_constraint_Ucnst (operands[0]))"
+   "@
+    mov_s\\t%0,%1
+    mov\\t%0,%1
+    mov_s\\t%0,%1
+    mov\\t%0,%1
+    add\\t%0,pcl,%1
+    mov_s\\t%0,%1
+    mov\\t%0,%1
+    ld_s\\t%0,%1
+    st_s\\t%1,%0
+    st%U0\\t%1,%0
+    st%U0\\t%1,%0
+    ld%U1\\t%0,%1
+    st%U0\\t%1,%0"
+   [(set_attr "type" "move,move,move,move,add,move,move,ld,st,st,st,ld,st")
+    (set_attr "length" "2,4,2,4,8,6,8,2,2,*,8,*,*")]
+)
+
+(define_insn "*mov<mode>_cmp0"
+  [(set (reg:CC_ZN CC_REGNUM)
+	(compare:CC_ZN (match_operand:ALLI 1 "nonmemory_operand" "S12S0r,S32S0")
+		       (const_int 0)))
+   (set (match_operand:ALLI 0 "register_operand" "=r,r") (match_dup 1))]
+  ""
+  "mov<mcctab>.f\\t%0,%1"
+  [(set_attr "type" "move")
+   (set_attr "length" "4,8")])
+
+;; Softcore float move.
+(define_insn "*movsf_softfp"
+   [(set (match_operand:SF 0 "arc64_dest_operand" "=qh,r,qh,r,    q,Ustms,r,Ustor")
+	 (match_operand:SF 1 "general_operand"    "qhZ,r, E,E,Uldms,    q,m,r"))
+   ]
+   "!ARC64_HAS_FP_BASE
+   && (register_operand (operands[0], SFmode)
+       || register_operand (operands[1], SFmode))"
+   "@
+    mov_s\\t%0,%1
+    mov\\t%0,%1
+    mov_s\\t%0,%1
+    mov\\t%0,%1
+    ld_s\\t%0,%1
+    st_s\\t%1,%0
+    ld%U1\\t%0,%1
+    st%U0\\t%1,%0"
+   [(set_attr "type" "move,move,move,move,ld,st,ld,st")
+    (set_attr "length" "2,4,6,8,2,2,*,*")])
+
+;; For a fp move I use FSMOV.<cc> instruction. However, we can also
+;; use FSSGNJ.
+;; FIXME! add short instruction selection
+(define_insn "*mov<mode>_hardfp"
+  [(set (match_operand:GPF_HF 0 "arc64_dest_operand" "=w,    w,Ufpms,*r,*w,*r,*r,*r,*Ustor")
+	(match_operand:GPF_HF 1 "arc64_movf_operand"  "w,Ufpms,    w,*w,*r,*r,*G,*m,    *r"))]
+  "ARC64_HAS_FP_BASE
+   && (register_operand (operands[0], <MODE>mode)
+       || register_operand (operands[1], <MODE>mode))"
+  "@
+   f<sfxtab>mov\\t%0,%1
+   fld<sizef>%U1\\t%0,%1
+   fst<sizef>%U0\\t%1,%0
+   fmv<fmvftab>2<fmvitab>\\t%0,%1
+   fmv<fmvitab>2<fmvftab>\\t%0,%1
+   mov<mcctab>\\t%0,%1
+   mov<mcctab>\\t%0,%1
+   ld<slfp>%U1\\t%0,%1
+   st<slfp>%U0\\t%1,%0"
+  [(set_attr "type" "fmov,ld,st,move,move,move,move,ld,st")
+   (set_attr "length" "4,*,*,4,4,4,8,*,*")])
+
+;; move 128bit
+(define_insn_and_split "*arc64_movti"
+  [(set (match_operand:TI 0 "arc64_dest_operand"  "=r,r,Ustor")
+	(match_operand:TI 1 "nonimmediate_operand" "r,m,r"))]
+  "TARGET_WIDE_LDST
+   && (register_operand (operands[0], TImode)
+       || register_operand (operands[1], TImode))"
+  "@
+   #
+   lddl%U1\\t%0,%1
+   stdl%U0\\t%1,%0"
+   "&& reload_completed
+    && arc64_split_double_move_p (operands, TImode)"
+   [(const_int 0)]
+   {
+    arc64_split_double_move (operands, TImode);
+    DONE;
+   }
+  [(set_attr "type" "move,ld,st")
+   (set_attr "length" "8,*,*")])
+;;
+;; Short insns: movl_s g,h; movl_s b,u8
+;; Long insns: movl, stl, ldl
+;;
+(define_insn "*arc64_movdi"
+   [(set (match_operand:DI 0 "arc64_dest_operand" "=qh,    q,r,    r,         r,    r,Ucnst,    r,r,Ustk<,Ustor")
+	 (match_operand:DI 1 "arc64_movl_operand"  "qh,U08S0,r,S12S0,S32S0SymMV,SyPic,S32S0,Ustk>,m,    r, r"))]
+   "TARGET_64BIT
+    && (register_operand (operands[0], DImode)
+        || register_operand (operands[1], DImode)
+        || (CONST_INT_P (operands[1])
+            && satisfies_constraint_Ucnst (operands[0])))"
+   "@
+    movl_s\\t%0,%1
+    movl_s\\t%0,%1
+    movl\\t%0,%1
+    movl\\t%0,%1
+    movl\\t%0,%1
+    addl\\t%0,pcl,%1
+    stl%U0\\t%1,%0
+    popl_s\\t%0
+    ldl%U1\\t%0,%1
+    pushl_s\\t%1
+    stl%U0\\t%1,%0"
+   [(set_attr "type" "move,move,move,move,move,addl,st,ld,ld,st,st")
+    (set_attr "length" "2,2,4,4,8,8,8,2,*,2,*")]
+)
+
+;; Hi/Low moves for constant and symbol loading.
+
+(define_insn "*movdi_high"
+  [(set (match_operand:DI 0 "register_operand"   "=   r,   qh,    r,r")
+	(high:DI
+	 (match_operand:DI 1 "arc64_immediate_or_pic" "S12S0,SymIm,SymIm,SyPic")))]
+  ""
+  "@
+   movhl\\t%0,%H1
+   movhl_s\\t%0,%H1
+   movhl\\t%0,%H1
+   addhl\\t%0,pcl,%H1"
+  [(set_attr "type" "move")
+   (set_attr "length" "4,6,8,8")])
+
+;; The immediates are already trimmed to fit the 32 bit limm field.
+(define_insn "*movh_shift"
+  [(set (match_operand:DI 0 "register_operand"            "=     r,   qh,    r")
+	(ashift:DI (match_operand:DI 1 "nonmemory_operand" "rS12S0,S32S0,S32S0")
+		   (const_int 32)))]
+  ""
+  "@
+   movhl\\t%0,%1
+   movhl_s\\t%0,%1
+   movhl\\t%0,%1"
+  [(set_attr "type" "move")
+   (set_attr "length" "4,6,8")])
+
+;; N.B. All immediates needs to be unsiged to endup at most in u32.
+(define_insn "*movdi_lo_sum_iori"
+  [(set (match_operand:DI 0 "register_operand"            "=q,    r,    h,    r")
+	(lo_sum:DI (match_operand:DI 1 "register_operand"  "0,    0,    0,    r")
+		   (match_operand:DI 2 "immediate_operand" "q,U10S0,SymIm,SymIm")))]
+  ""
+  "@
+   orl%?\\t%0,%1,%2
+   orl%?\\t%0,%1,%L2
+   orl%?\\t%0,%1,%L2
+   orl%?\\t%0,%1,%L2"
+  [(set_attr "type" "or")
+   (set_attr "iscompact" "yes,no,yes,no")
+   (set_attr "length" "2,4,6,8")])
+
+(define_insn "*adddi_high"
+  [(set (match_operand:DI 0 "register_operand"          "=    qh,    r,    r,r,     r")
+	(plus:DI (match_operand:DI 1 "register_operand"   "    0,    0,    r,r,     r")
+		 (high:DI
+		  (match_operand:DI 2 "nonmemory_operand" "S32S0,S12S0,U06S0,r,S32S0"))))]
+  ""
+  "@
+   addhl_s\\t%0,%1,%2
+   addhl\\t%0,%1,%2
+   addhl\\t%0,%1,%2
+   addhl\\t%0,%1,%2
+   addhl\\t%0,%1,%2"
+  [(set_attr "type" "addhl")
+   (set_attr "iscompact" "yes,no,no,no,no")
+   (set_attr "length" "6,4,4,4,8")])
+
+; conditional execution patterns
+(define_insn "*mov<mode>_ce"
+  [(cond_exec
+    (match_operator 3 "arc64_comparison_operator"
+		    [(match_operand 2 "cc_register" "") (const_int 0)])
+   (set (match_operand:ALLI 0 "register_operand"  "=    r,r")
+	(match_operand:ALLI 1 "nonmemory_operand" "rU06S0,S32S0")))]
+  ""
+  "mov<mcctab>.%m3\\t%0,%1"
+  [(set_attr "type" "move")
+   (set_attr "length" "4,8")])
+
+(define_insn "*mov<mode>_ce"
+  [(cond_exec
+    (match_operator 3 "arc64_comparison_operator"
+		    [(match_operand 2 "cc_register" "") (const_int 0)])
+   (set (match_operand:GPF_HF 0 "register_operand"  "=w,*r,*r")
+	(match_operand:GPF_HF 1 "nonmemory_operand"  "w,*r,*E")))]
+  ""
+  "@
+  f<sfxtab>mov.%m3\\t%0,%1
+  mov<mcctab>.%m3\\t%0,%1
+  mov<mcctab>.%m3\\t%0,%1"
+  [(set_attr "type" "fmov,move,move")
+   (set_attr "length" "4,4,8")])
+
+;; 0 is dst
+;; 1 is src
+;; 2 is size of copy in bytes
+;; 3 is alignment
+
+(define_expand "cpymem<mode>"
+  [(match_operand:BLK 0 "memory_operand")
+   (match_operand:BLK 1 "memory_operand")
+   (match_operand:P 2 "immediate_operand")
+   (match_operand:P 3 "immediate_operand")]
+   "!STRICT_ALIGNMENT"
+{
+  if (arc64_expand_cpymem (operands))
+    DONE;
+  FAIL;
+}
+)
+
+;; -------------------------------------------------------------------
+;; Subroutine calls and sibcalls
+;; -------------------------------------------------------------------
+
+(define_expand "call"
+  [(parallel [(call (match_operand 0 "memory_operand")
+		    (match_operand 1 "general_operand"))
+	      (use (match_operand 2 "" ""))
+	     (clobber (reg BLINK_REGNUM))])]
+  ""
+  {
+   arc64_expand_call (NULL_RTX, operands[0], false);
+   DONE;
+  }
+)
+
+(define_insn "*call<mode>_insn"
+  [(call (mem:P (match_operand:P 0 "arc64_call_insn_operand" "q,r,BLsym,S12S0,S32S0"))
+	 (match_operand 1 "" ""))
+   (clobber (reg:P BLINK_REGNUM))]
+  ""
+  "@
+   jl_s%*\\t[%0]
+   jl%*\\t[%0]
+   bl%P0%*\\t%C0
+   jl%*\\t%0
+   jl%*\\t%0"
+  [(set_attr "type" "jl,jl,bl,jl,jl")
+   (set_attr "length" "2,4,*,4,8")])
+
+(define_expand "call_value"
+  [(parallel [(set (match_operand 0 "" "")
+		   (call (match_operand 1 "memory_operand")
+			 (match_operand 2 "general_operand")))
+	      (use (match_operand 3 "" ""))
+	      (clobber (reg BLINK_REGNUM))])]
+  ""
+  "
+  {
+    arc64_expand_call (operands[0], operands[1], false);
+    DONE;
+  }"
+)
+
+(define_insn "*call<mode>_value_insn"
+  [(set (match_operand 0 "" "")
+	(call (mem:P (match_operand:P 1 "arc64_call_insn_operand"
+					"q,r,BLsym,S12S0,S32S0"))
+	      (match_operand 2 "" "")))
+   (clobber (reg:P BLINK_REGNUM))]
+  ""
+  "@
+   jl_s%*\\t[%1]
+   jl%*\\t[%1]
+   bl%P1%*\\t%C1
+   jl%*\\t%1
+   jl%*\\t%1"
+  [(set_attr "type" "jl,jl,bl,jl,jl")
+   (set_attr "length" "2,4,*,4,8")])
+
+(define_expand "sibcall"
+  [(parallel [(call (match_operand 0 "memory_operand")
+		    (match_operand 1 "general_operand"))
+	      (return)
+	      (use (match_operand 2 "" ""))])]
+  ""
+  {
+    arc64_expand_call (NULL_RTX, operands[0], true);
+    DONE;
+  }
+  )
+
+(define_expand "sibcall_value"
+  [(parallel [(set (match_operand 0 "" "")
+		   (call (match_operand 1 "memory_operand")
+			 (match_operand 2 "general_operand")))
+	      (return)
+	      (use (match_operand 3 "" ""))])]
+  ""
+  {
+    arc64_expand_call (operands[0], operands[1], true);
+    DONE;
+  }
+)
+
+;FIXME! add short variant for jump
+(define_insn "*sibcall<mode>_insn"
+  [(call
+    (mem:P
+     (match_operand:P 0 "arc64_call_insn_operand" "Sbreg,BLsym,S12S0,S32S0"))
+    (match_operand 1 "" ""))
+  (return)]
+  "SIBLING_CALL_P (insn)"
+  "@
+   j%*\\t[%0]
+   b%*\\t%C0
+   j%*\\t%0
+   j%*\\t%0"
+  [(set_attr "type" "jump,branch,jump,jump")
+   (set_attr "length" "4,4,4,8")]
+)
+
+;FIXME! add short variant for jump
+(define_insn "*sibcall<mode>_value_insn"
+ [(set (match_operand 0 "" "")
+       (call
+	(mem:P
+	 (match_operand:P 1 "arc64_call_insn_operand" "Sbreg,BLsym,S12S0,S32S0"))
+	(match_operand 2 "" "")))
+  (return)]
+  "SIBLING_CALL_P (insn)"
+  "@
+   j%*\\t[%1]
+   b%*\\t%C1
+   j%*\\t%1
+   j%*\\t%1"
+  [(set_attr "type" "jump,branch,jump,jump")
+   (set_attr "length" "4,4,4,8")]
+)
+
+; conditional execution patterns
+(define_insn "*call<mode>_ce"
+  [(cond_exec
+    (match_operator 3 "arc64_comparison_operator"
+		    [(match_operand 2 "cc_register" "") (const_int 0)])
+    (parallel
+     [(call (mem:P
+	     (match_operand:P 0 "arc64_call_insn_operand" "r,BLsym,U06S0"))
+	    (match_operand 1 "" ""))
+      (clobber (reg:P BLINK_REGNUM))]))]
+  "(arc64_cmodel_var == ARC64_CMODEL_SMALL)
+    || register_operand (operands[0], Pmode)"
+  "@
+   jl%m3%*\\t[%0]
+   bl%m3%*\\t%C0
+   jl%m3%*\\t%0"
+  [(set_attr "type" "jl,bl,jl")
+   (set_attr "length" "4")])
+
+(define_insn "*callv<mode>_ce"
+  [(cond_exec
+    (match_operator 3 "arc64_comparison_operator"
+		    [(match_operand 4 "cc_register" "") (const_int 0)])
+    (parallel
+     [(set (match_operand 0 "" "")
+	   (call (mem:P (match_operand:P 1 "arc64_call_insn_operand"
+					   "r,BLsym,U06S0"))
+		 (match_operand 2 "" "")))
+      (clobber (reg:P BLINK_REGNUM))]))]
+  "(arc64_cmodel_var == ARC64_CMODEL_SMALL)
+    || register_operand (operands[1], Pmode)"
+  "@
+   jl%m3%*\\t[%1]
+   bl%m3%*\\t%C1
+   jl%m3%*\\t%1"
+  [(set_attr "type" "jl,bl,jl")
+   (set_attr "length" "4")])
+
+(define_insn "*sibcall<mode>_insn_ce"
+  [(cond_exec
+    (match_operator 3 "arc64_comparison_operator"
+		    [(match_operand 2 "cc_register" "") (const_int 0)])
+    (parallel
+     [(call (mem:P
+	     (match_operand:P 0 "arc64_call_insn_operand" "Sbreg,BLsym,U06S0"))
+	    (match_operand 1 "" ""))
+      (return)]))]
+  "SIBLING_CALL_P (insn)
+   && ((arc64_cmodel_var == ARC64_CMODEL_SMALL)
+       || register_operand (operands[0], Pmode))"
+  "@
+   j%m3%*\\t[%0]
+   b%m3%*\\t%C0
+   j%m3%*\\t%0"
+  [(set_attr "type" "jump,branch,jump")
+   (set_attr "length" "4")])
+
+(define_insn "*sibcall<mode>_value_insn_ce"
+  [(cond_exec
+    (match_operator 3 "arc64_comparison_operator"
+		    [(match_operand 4 "cc_register" "") (const_int 0)])
+    (parallel
+     [(set (match_operand 0 "" "")
+	   (call
+	    (mem:P
+	     (match_operand:P 1 "arc64_call_insn_operand" "Sbreg,BLsym,U06S0"))
+	    (match_operand 2 "" "")))
+      (return)]))]
+  "SIBLING_CALL_P (insn)
+   && ((arc64_cmodel_var == ARC64_CMODEL_SMALL)
+       || register_operand (operands[1], Pmode))"
+  "@
+   j%m3%*\\t[%1]
+   b%m3%*\\t%C1
+   j%m3%*\\t%1"
+  [(set_attr "type" "jump,branch,jump")
+   (set_attr "length" "4")])
+
+(define_expand "untyped_call"
+  [(parallel [(call (match_operand 0 "")
+		    (const_int 0))
+	      (match_operand 1 "")
+	      (match_operand 2 "")])]
+  ""
+{
+  int i;
+
+  emit_call_insn (gen_call (operands[0], const0_rtx, const0_rtx));
+
+  for (i = 0; i < XVECLEN (operands[2], 0); i++)
+    {
+      rtx set = XVECEXP (operands[2], 0, i);
+      emit_move_insn (SET_DEST (set), SET_SRC (set));
+    }
+
+  emit_insn (gen_blockage ());
+  DONE;
+})
+
+;; -------------------------------------------------------------------
+;; Jumps and other miscellaneous insns
+;; -------------------------------------------------------------------
+
+(define_expand "indirect_jump"
+  [(set (pc) (match_operand 0 "register_operand"))]
+  ""
+{
+  operands[0] = force_reg (Pmode, operands[0]);
+  if (Pmode == SImode)
+    emit_jump_insn (gen_indirect_jumpsi (operands[0]));
+  else
+    emit_jump_insn (gen_indirect_jumpdi (operands[0]));
+  DONE;
+})
+
+(define_insn "indirect_jump<mode>"
+  [(set (pc) (match_operand:P 0 "register_operand" "q,r"))]
+  ""
+  "j%?%*\\t[%0]"
+  [(set_attr "type" "jump")
+   (set_attr "length" "2,4")]
+)
+
+(define_insn "jump"
+  [(set (pc) (label_ref (match_operand 0 "" "")))]
+  ""
+  "b%?%*\\t%l0"
+  [(set_attr "type" "branch")
+   (set (attr "length")
+	(if_then_else
+	 (and (ge (minus (match_dup 0) (pc)) (const_int -512))
+	      (le (minus (match_dup 0) (pc)) (const_int 506))
+	      (match_test "!CROSSING_JUMP_P (insn)")
+	      (eq_attr "delay_slot_filled" "no"))
+	 (const_int 2)
+	 (const_int 4)))]
+)
+
+(define_expand "cbranch<mode>4"
+  [(set (pc) (if_then_else
+	      (match_operator 0 "arc64_comparison_operator"
+			      [(match_operand:GPI 1 "nonmemory_operand")
+			       (match_operand:GPI 2 "nonmemory_operand")])
+	      (label_ref (match_operand 3 "" ""))
+	      (pc)))]
+  ""
+  "
+  operands[1] = arc64_gen_compare_reg (GET_CODE (operands[0]), operands[1],
+					 operands[2]);
+  operands[2] = const0_rtx;
+  "
+  )
+
+(define_expand "cbranch<mode>4"
+  [(set (pc) (if_then_else (match_operator 0 "arc64_comparison_operator"
+			    [(match_operand:GPF_HF 1 "register_operand")
+			     (match_operand:GPF_HF 2 "register_operand")])
+			   (label_ref (match_operand 3 "" ""))
+			   (pc)))]
+  "ARC64_HAS_FP_BASE"
+  "
+  operands[1] = arc64_gen_compare_reg (GET_CODE (operands[0]), operands[1],
+					 operands[2]);
+  operands[2] = const0_rtx;
+  "
+)
+
+(define_expand "cbranchcc4"
+  [(set (pc) (if_then_else
+	      (match_operator 0 "arc64_comparison_operator"
+			      [(match_operand 1 "cc_register")
+			       (match_operand 2 "const0_operand")])
+	      (label_ref (match_operand 3 "" ""))
+	      (pc)))]
+  ""
+  "")
+
+(define_insn "condjump"
+  [(set (pc) (if_then_else
+	      (match_operator 0 "arc64_comparison_operator"
+			      [(match_operand 1 "cc_register" "")
+			       (const_int 0)])
+	      (label_ref (match_operand 2 "" ""))
+	      (pc)))]
+  ""
+  "b%m0%?%*\\t%l2"
+  [(set_attr "type" "branchcc")
+   (set (attr "length")
+	(cond
+	 [(eq_attr "delay_slot_filled" "yes")
+	  (const_int 4)
+
+	  (and (match_operand 0 "equality_comparison_operator" "")
+	       (and (ge (minus (match_dup 2) (pc)) (const_int -512))
+		    (le (minus (match_dup 2) (pc)) (const_int 506))))
+	   (const_int 2)
+
+	   (and (match_operand 0 "ccmode_comparison_operator" "")
+		(and (ge (minus (match_dup 2) (pc)) (const_int -60))
+		     (le (minus (match_dup 2) (pc)) (const_int 58))))
+	   (const_int 2)]
+	 (const_int 4)))])
+
+(define_expand "prologue"
+  [(clobber (const_int 0))]
+  ""
+  "
+  arc64_expand_prologue ();
+  DONE;
+  "
+)
+
+(define_expand "epilogue"
+  [(clobber (const_int 0))]
+  ""
+  "
+  arc64_expand_epilogue (false);
+  DONE;
+  "
+)
+
+(define_expand "sibcall_epilogue"
+  [(clobber (const_int 0))]
+  ""
+  "
+  arc64_expand_epilogue (true);
+  DONE;
+  "
+)
+
+(define_expand "return"
+  [(simple_return)]
+  "arc64_can_use_return_insn_p ()"
+  "")
+
+(define_insn "simple_return"
+  [(simple_return)]
+  ""
+  {
+   return arc64_output_return ();
+  }
+  [(set_attr "type" "return")
+   (set_attr "length" "2")])
+
+(define_insn "trap_s"
+  [(unspec_volatile [(match_operand:SI 0 "immediate_operand" "U06S0")]
+		   ARC64_VUNSPEC_TRAP_S)]
+  ""
+  "trap_s\\t%0"
+  [(set_attr "length" "2")
+  (set_attr "type" "trap")])
+
+(define_insn "trap"
+  [(trap_if (const_int 1) (const_int 0))]
+  ""
+  "trap_s\\t5"
+  [(set_attr "length" "2")
+  (set_attr "type" "trap")])
+
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  "nop_s"
+  [(set_attr "type" "nop")
+   (set_attr "length" "2")])
+
+(define_insn "blockage"
+  [(unspec_volatile [(const_int 0)] ARC64_VUNSPEC_BLOCKAGE)]
+  ""
+  ""
+  [(set_attr "length" "0")
+   (set_attr "type" "block")]
+  )
+
+(define_insn "rtie"
+  [(return)
+   (unspec_volatile [(const_int 0)] ARC64_VUNSPEC_RTIE)]
+  ""
+  "rtie"
+  [(set_attr "length" "4")
+   (set_attr "type" "rtie")]
+  )
+
+;; Don't need initialization instructions.
+(define_expand "doloop_begin"
+ [(use (match_operand 0 "" ""))        ; loop pseudo
+  (use (match_operand 1 "" ""))]       ; doloop_end pattern
+  ""
+  {
+    FAIL;
+  }
+)
+
+; operand 0 is the loop count pseudo register
+; operand 1 is the label to jump to at the top of the loop
+(define_expand "doloop_end"
+ [(use (match_operand 0 "" ""))        ; loop pseudo
+  (use (match_operand 1 "" ""))]       ; doloop_end pattern
+ ""
+ {
+  machine_mode mode = GET_MODE (operands[0]);
+  if (mode != Pmode)
+    FAIL;
+
+  operands[0] = force_reg (Pmode, operands[0]);
+
+  if (mode == SImode)
+    emit_jump_insn (gen_dbnzsi (operands[0], operands[1]));
+  else
+    emit_jump_insn (gen_dbnzdi (operands[0], operands[1]));
+  DONE;
+ })
+
+(define_insn_and_split "dbnz<mode>"
+  [(set (pc)
+	(if_then_else
+	 (ne (match_operand:P 0 "arc64_dest_operand" "+r,!Ustor")
+	     (const_int 1))
+	 (label_ref (match_operand 1 "" ""))
+	 (pc)))
+   (set (match_dup 0)
+	(plus:P (match_dup 0)
+		(const_int -1)))
+   (clobber (match_scratch:P 2 "=X,r"))]
+  ""
+  "*
+{
+  switch (which_alternative)
+    {
+    default:
+      return \"#\";
+
+    case 0:
+      switch (get_attr_length (insn))
+	{
+	case 4:
+	  /* This is the normal case.  */
+	  return \"dbnz%*\\t%0,%l1\";
+
+	case 8:
+	  /* The dbnz is too short, use sub.f/bne instructions.  */
+	  return \"sub<sfxtab>.f\\t%0,%0,1\\n\\tbne%*\\t%l1\";
+
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+    }
+}"
+  "reload_completed && memory_operand (operands[0], Pmode)"
+  [(set (match_dup 2) (match_dup 0))
+   (parallel
+    [(set (reg:CC_ZN CC_REGNUM)
+	  (compare:CC_ZN (plus:P (match_dup 2) (const_int -1))
+			 (const_int 0)))
+     (set (match_dup 2) (plus:P (match_dup 2) (const_int -1)))])
+   (set (match_dup 0) (match_dup 2))
+   (set (pc) (if_then_else (ne (reg:CC_ZN CC_REGNUM)
+			       (const_int 0))
+			   (label_ref (match_dup 1))
+			   (pc)))]
+  ""
+  [(set_attr "type" "dbnz")
+   (set (attr "length")
+	(cond [(eq_attr "alternative" "1")
+	       (const_int 20)
+	       (and (eq_attr "alternative" "0")
+		    (ge (minus (match_dup 1) (pc)) (const_int -4092))
+		    (le (minus (match_dup 1) (pc))
+			(minus (const_int 4094)
+			       (symbol_ref "get_attr_delay_slot_length (insn)"))))
+	       (const_int 4)]
+	      (const_int 8)))])
+
+; conditional execution
+(define_insn "*returnt_ce"
+  [(set (pc)
+	(if_then_else (match_operator 0 "arc64_comparison_operator"
+				      [(reg CC_REGNUM) (const_int 0)])
+		      (simple_return) (pc)))]
+  ""
+  "j%m0%*\\t[blink]"
+  [(set_attr "type" "return")
+   (set_attr "length" "4")])
+
+; Jump tables
+(define_expand "casesi"
+  [(match_operand:SI 0 "register_operand" "")  ; Index
+   (match_operand:SI 1 "const_int_operand" "")    ; Lower bound
+   (match_operand:SI 2 "const_int_operand" "")    ; Total range
+   (match_operand 3 "" "")             ; Table label
+   (match_operand 4 "" "")]            ; Out of range label
+  ""
+ {
+   arc64_expand_casesi (operands);
+   DONE;
+   })
+
+(define_insn "casesi_dispatch"
+  [(set (pc)
+       (unspec:DI [(match_operand:SI 0 "register_operand" "r,q,r")
+                   (label_ref (match_operand 1 "" ""))
+                   (const_int 0)]
+                  ARC64_UNSPEC_CASESI))]
+  ""
+  "@
+  bi\\t[%0]
+  j_s%*\\t[%0]
+  j%*\\t[%0]"
+  [(set_attr "type" "bi,jump,jump")
+   (set_attr "length" "4,2,4")
+   (set_attr "cpu_facility" "cd,ncd,ncd")])
+
+(define_insn "casesi_addaddr"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+       (unspec:SI [(match_operand:SI 1 "register_operand" "r")
+                   (label_ref (match_operand 2 "" ""))
+                   (const_int 1)]
+                  ARC64_UNSPEC_CASESI))]
+  ""
+  "add2\\t%0,%l2,%1"
+  [(set_attr "type" "add")
+   (set_attr "length" "8")])
+
+(define_insn "casesi_addaddrdi"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+       (unspec:DI [(match_operand:SI 1 "register_operand" "r")
+                   (match_operand:DI 2 "register_operand" "r")
+                   (const_int 2)]
+                  ARC64_UNSPEC_CASESI))]
+  ""
+  "add2l\\t%0,%2,%1"
+  [(set_attr "type" "addl")
+   (set_attr "length" "4")])
+
+(define_insn "casesi_dispatchdi"
+  [(set (pc) (match_operand:DI 0 "register_operand" "q,r"))
+   (use (label_ref (match_operand 1 "" "")))]
+  ""
+  "j%?%*\\t[%0]"
+  [(set_attr "type" "jump")
+   (set_attr "length" "2,4")])
+
+;; combiner patterns used to match bbit0/1 instructions.
+;; Unfortunately, I cannot use splitting for this pattern as the
+;; insn length is know very late during compilation process.
+(define_insn "*bbit_and"
+  [(set (pc)
+	(if_then_else
+	 (match_operator 3 "equality_comparison_operator"
+			 [(and:GPI
+			   (match_operand:GPI 1 "register_operand" "r")
+			   (match_operand 2 "bbitimm_operand" ""))
+			  (const_int 0)])
+	 (label_ref (match_operand 0 "" ""))
+	 (pc)))
+   (clobber (reg:CC_ZN CC_REGNUM))]
+  "!CROSSING_JUMP_P (insn) && (TARGET_BBIT || reload_completed)"
+  {
+   operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2])));
+   switch (get_attr_length (insn))
+     {
+     case 4:
+       return (GET_CODE (operands[3]) == EQ
+	       ? \"bbit0<sfxtab>%*\\t%1,%2,%l0\" : \"bbit1<sfxtab>%*\\t%1,%2,%l0\");
+     default:
+       return \"btst<sfxtab>\\t%1,%2\\n\\tb%m3%*\\t%l0\";
+     }
+  }
+  [(set_attr "type" "bbit")
+   (set (attr "length")
+	(if_then_else
+	 (and (ge (minus (match_dup 0) (pc)) (const_int -254))
+	      (le (minus (match_dup 0) (pc))
+		  (minus (const_int 248)
+			 (symbol_ref "get_attr_delay_slot_length (insn)"))))
+	 (const_int 4)
+	 (const_int 8)))])
+
+;; BBITx instructions need to be generated as late as possible.
+;; Hence, we need to postpone it untill 2nd peephole2 step.  However,
+;; this may need an upstream change.
+
+;;(define_peephole2
+;;  [(set (match_operand 0 "cc_register")
+;;	(compare:CC_ZN (and:GPI (match_operand:GPI 1 "register_operand" "")
+;;				(match_operand 2 "bbitimm_operand" ""))
+;;		       (const_int 0)))
+;;   (set (pc) (if_then_else
+;;	      (match_operator 3 "equality_comparison_operator"
+;;			      [(match_dup 0) (const_int 0)])
+;;	      (label_ref (match_operand 4 "" ""))
+;;	      (pc)))]
+;;  "(peephole2_instance == 1) && peep2_reg_dead_p (2, operands[0])"
+;;  [(parallel
+;;    [(set (pc)
+;;	  (if_then_else
+;;	   (match_op_dup 3 [(and:GPI (match_dup 1) (match_dup 2))
+;;			    (const_int 0)])
+;;	 (label_ref (match_operand 4 "" ""))
+;;	 (pc)))
+;;     (clobber (reg:CC_ZN CC_REGNUM))])])
+
+(define_insn "*bbit_zext"
+  [(set (pc)
+	(if_then_else
+	 (match_operator 3 "equality_comparison_operator"
+			 [(zero_extract:GPI
+			   (match_operand:GPI 1 "register_operand" "r")
+			   (const_int 1)
+			   (match_operand:GPI 2 "nonmemory_operand" "ir"))
+			  (const_int 0)])
+	 (label_ref (match_operand 0 "" ""))
+	 (pc)))
+   (clobber (reg:CC_ZN CC_REGNUM))]
+  "!CROSSING_JUMP_P (insn) && (TARGET_BBIT || reload_completed)"
+  {
+   switch (get_attr_length (insn))
+     {
+     case 4:
+       return (GET_CODE (operands[3]) == EQ
+	       ? \"bbit0<sfxtab>%*\\t%1,%2,%l0\" : \"bbit1<sfxtab>%*\\t%1,%2,%l0\");
+     default:
+       return \"btst<sfxtab>\\t%1,%2\\n\\tb%m3%*\\t%l0\";
+     }
+  }
+  [(set_attr "type" "bbit")
+   (set (attr "length")
+	(if_then_else
+	 (and (ge (minus (match_dup 0) (pc)) (const_int -254))
+	      (le (minus (match_dup 0) (pc))
+		  (minus (const_int 248)
+			 (symbol_ref "get_attr_delay_slot_length (insn)"))))
+	 (const_int 4)
+	 (const_int 8)))])
+
+;;(define_peephole2
+;;  [(set (match_operand 0 "cc_register")
+;;	(compare:CC_ZN (zero_extract:GPI
+;;			(match_operand:GPI 1 "register_operand" "")
+;;			(const_int 1)
+;;			(match_operand:GPI 2 "nonmemory_operand" ""))
+;;		       (const_int 0)))
+;;   (set (pc) (if_then_else
+;;	      (match_operator 3 "equality_comparison_operator"
+;;			      [(match_dup 0) (const_int 0)])
+;;	      (label_ref (match_operand 4 "" ""))
+;;	      (pc)))]
+;;  "(peephole2_instance == 1) && peep2_reg_dead_p (2, operands[0])"
+;;  [(parallel
+;;    [(set (pc)
+;;	  (if_then_else
+;;	   (match_op_dup 3 [(zero_extract:GPI
+;;			     (match_dup 1) (const_int 1) (match_dup 2))
+;;			    (const_int 0)])
+;;	 (label_ref (match_operand 4 "" ""))
+;;	 (pc)))
+;;     (clobber (reg:CC_ZN CC_REGNUM))])])
+
+;; combiner/instruction pattern for BRcc instructions.  We consider
+;; all BRcc supported comparisons but compare with zero. The positive
+;; range needs to take into account the limm size, and the pcl
+;; rounding.  This pattern is under an option as it may prohibit
+;; further optimizations like if-conversion.
+(define_insn "*brcc"
+  [(set (pc)
+	(if_then_else
+	 (match_operator 3 "brcc_comparison_operator"
+			 [(match_operand:GPI 1 "register_operand"      "q,     r,r")
+			  (match_operand:GPI 2 "nonmemory_operand" "U0000,U06S0r,S32S0")])
+	 (label_ref (match_operand 0 "" ""))
+	 (pc)))
+   (clobber (reg:CC CC_REGNUM))]
+  "!CROSSING_JUMP_P (insn) && (TARGET_BRCC || reload_completed)"
+  {
+   switch (get_attr_length (insn))
+    {
+     case 2:
+       return \"br%m3<sfxtab>_s\\t%1,%2,%l0\";
+     case 4:
+     case 8:
+       return \"br%m3<sfxtab>%*\\t%1,%2,%l0\";
+     default:
+       return \"cmp<sfxtab>\\t%1,%2\\n\\tb%m3%*\\t%l0\";
+     }
+  }
+  [(set_attr "type" "brcc")
+   (set (attr "length")
+	(cond [(and (match_operand 3 "equality_comparison_operator" "")
+		    (ge (minus (match_dup 0) (pc)) (const_int -126))
+		    (le (minus (match_dup 0) (pc)) (const_int 122))
+		    (eq (symbol_ref "which_alternative") (const_int 0))
+		    ;; no delay slot for short version.
+		    (eq_attr "delay_slot_filled" "no")
+		    (ior (and (match_operand:DI 1 "" "")
+			      (match_test "TARGET_64BIT"))
+			 (and (match_operand:SI 1 "" "")
+			      (match_test "!TARGET_64BIT"))))
+	       (const_int 2)
+	       (and (ge (minus (match_dup 0) (pc)) (const_int -254))
+		    (le (minus (match_dup 0) (pc)) (const_int 244))
+		    (ior (eq (symbol_ref "which_alternative") (const_int 0))
+			 (eq (symbol_ref "which_alternative") (const_int 1))))
+	       (const_int 4)
+	       (and (ge (minus (match_dup 0) (pc)) (const_int -254))
+		    (le (minus (match_dup 0) (pc)) (const_int 244))
+		    (eq_attr "delay_slot_filled" "no")
+		    (eq (symbol_ref "which_alternative") (const_int 2)))
+	       (const_int 8)
+	       ;; This should be variable as well...
+	       (eq (symbol_ref "which_alternative") (const_int 1))
+	       (const_int 12)]
+	      (const_int 12)))
+   ])
+
+;; BRcc is not complete, emulate missing variants:
+;; brgt rb,rc,label => brlt rc,rb,label
+;; brgt rb,u6,label => brge rb,u6+1,label
+;; brhi rb,rc,label => brlo rc,rb,label
+;; brhi rb,u6,label => brhs rb,u6+1,label
+;; brle rb,rc,label => brge rc,rb,label
+;; brle rb,u6,label => brlt rb,u6+1,label
+;; brls rb,rc,label => brhs rc,rb,label
+;; brls rb,u6,label => brlo rb,u6+1,label
+(define_insn "*emu_brcc"
+  [(set (pc)
+	(if_then_else
+	 (match_operator 3 "ebrcc_comparison_operator"
+			 [(match_operand:GPI 1 "register_operand"         "r,r,r")
+			  (match_operand:GPI 2 "arc64_nonmem_operand" "U06M1,r,n")])
+	 (label_ref (match_operand 0 "" ""))
+	 (pc)))
+   (clobber (reg:CC CC_REGNUM))]
+  "!CROSSING_JUMP_P (insn) && reload_completed"
+  {
+    switch (get_attr_length (insn))
+      {
+      case 4:
+      case 8:
+	if (which_alternative == 0)
+	  {
+	    return \"br%w3<sfxtab>%*\\t%1,%2 + 1,%l0\";
+	  }
+	return \"br%W3<sfxtab>%*\\t%2,%1,%l0\";
+      default:
+	return \"cmp<sfxtab>\\t%1,%2\\n\\tb%m3%*\\t%l0\";
+      }
+  }
+  [(set_attr "type" "brcc")
+   (set (attr "length")
+	(cond [(and (ge (minus (match_dup 0) (pc)) (const_int -254))
+		    (le (minus (match_dup 0) (pc)) (const_int 244))
+		    (ior (eq (symbol_ref "which_alternative") (const_int 0))
+			 (eq (symbol_ref "which_alternative") (const_int 1))))
+	       (const_int 4)
+	       (and (ge (minus (match_dup 0) (pc)) (const_int -254))
+		    (le (minus (match_dup 0) (pc)) (const_int 244))
+		    (eq_attr "delay_slot_filled" "no")
+		    (eq (symbol_ref "which_alternative") (const_int 2)))
+	       (const_int 8)]
+	      (const_int 12)))
+   ])
+
+;; Peephole pattern for matching BRcc instructions.
+(define_peephole2
+  [(set (match_operand 0 "cc_register")
+	(compare:CC (match_operand:GPI 1 "register_operand")
+		    (match_operand:GPI 2 "nonmemory_operand")))
+   (set (pc) (if_then_else
+	      (match_operator 3 "arc64_comparison_operator"
+			      [(match_dup 0) (const_int 0)])
+	      (label_ref (match_operand 4 ""))
+	      (pc)))]
+  "peep2_reg_dead_p (2, operands[0])"
+  [(parallel [(set (pc)
+		   (if_then_else
+		    (match_op_dup 3 [(match_dup 1) (match_dup 2)])
+		    (label_ref (match_dup 4))
+		    (pc)))
+	      (clobber (reg:CC CC_REGNUM))])])
+
+;; Similar like the one above.
+(define_peephole2
+  [(set (match_operand 0 "cc_register")
+	(compare:CC_ZN (match_operand:GPI 1 "register_operand")
+		       (const_int 0)))
+   (set (pc) (if_then_else
+	      (match_operator 2 "brcc_comparison_operator"
+			      [(match_dup 0) (const_int 0)])
+	      (label_ref (match_operand 3 ""))
+	      (pc)))]
+  "peep2_reg_dead_p (2, operands[0])"
+  [(parallel [(set (pc)
+		   (if_then_else
+		    (match_op_dup 2 [(match_dup 1) (const_int 0)])
+		    (label_ref (match_dup 3))
+		    (pc)))
+	      (clobber (reg:CC CC_REGNUM))])])
+
+;; -------------------------------------------------------------------
+;; Sign/Zero extension
+;; -------------------------------------------------------------------
+
+(define_expand "<optab>sidi2"
+  [(set (match_operand:DI 0 "register_operand")
+	(ANY_EXTEND:DI (match_operand:SI 1 "nonimmediate_operand")))]
+  "TARGET_64BIT"
+)
+
+(define_expand "<ANY_EXTEND:optab><SHORT:mode><GPI:mode>2"
+  [(set (match_operand:GPI 0 "register_operand")
+	(ANY_EXTEND:GPI (match_operand:SHORT 1 "nonimmediate_operand")))]
+  ""
+)
+
+;; TODO: Commented out this to fix issues in dejagnu.
+;;       NEEDS TO BE VERIFIED LATER ON.
+;; (define_expand "<optab>qihi2"
+;;   [(set (match_operand:HI 0 "register_operand")
+;; 	(ANY_EXTEND:HI (match_operand:QI 1 "nonimmediate_operand")))]
+;;   ""
+;; )
+
+(define_insn "*zero_extend<mode>si2"
+  [(set (match_operand:SI 0 "register_operand"        "=q,r,    q,r")
+	(zero_extend:SI
+	 (match_operand:SHORT 1 "nonimmediate_operand" "q,r,Uldms,m")))]
+   ""
+   "@
+   ext<exttab>_s\\t%0,%1
+   ext<exttab>\\t%0,%1
+   ld<sfxtab>_s\\t%0,%1
+   ld<sfxtab>%U1\\t%0,%1"
+  [(set_attr "type" "sex,sex,ld,ld")
+   (set_attr "length" "2,4,2,*")])
+
+(define_insn "*zero_extend<mode>di2"
+  [(set (match_operand:DI 0 "register_operand"      "=r,    q,r")
+	(zero_extend:DI
+	 (match_operand:EXT 1 "nonimmediate_operand" "r,Uldms,m")))]
+   "TARGET_64BIT"
+   "@
+   bmskl\\t%0,%1,<sizen>
+   ld<sfxtab>_s\\t%0,%1
+   ld<sfxtab>%U1\\t%0,%1"
+  [(set_attr "type" "and,ld,ld")
+   (set_attr "length" "4,2,*")]
+)
+
+(define_insn "*sign_extend<mode>di2"
+  [(set (match_operand:DI 0 "register_operand"       "=r,r")
+	(sign_extend:DI
+	 (match_operand:EXT 1 "nonimmediate_operand"  "r,m")))]
+   "((!TARGET_VOLATILE_DI) || (!MEM_VOLATILE_P (operands[1])))
+    && TARGET_64BIT"
+   "@
+   sex<exttab>l\\t%0,%1
+   ld<sfxtab>.x%U1\\t%0,%1"
+   [(set_attr "type" "sex,ld")
+    (set_attr "length" "4,*")])
+
+(define_insn "*sign_extend<mode>si2"
+  [(set (match_operand:SI 0 "register_operand" "=q,r,r")
+	(sign_extend:SI
+	 (match_operand:SHORT 1 "nonimmediate_operand" "q,r,m")))]
+  ""
+  "@
+  sex<exttab>_s\\t%0,%1
+  sex<exttab>\\t%0,%1
+  ld<sfxtab>.x%U1\\t%0,%1"
+  [(set_attr "type" "sex,sex,ld")
+   (set_attr "length" "2,4,8")])
+
+;; -------------------------------------------------------------------
+;; Simple arithmetic
+;; -------------------------------------------------------------------
+
+;; TODO: Allow symbols in LIMM field
+(define_expand "<optab>si3"
+  [(set (match_operand:SI 0 "register_operand")
+	(ADDSUB:SI (match_operand:SI 1 "register_operand")
+		   (match_operand:SI 2 "nonmemory_operand")))]
+  ""
+  {
+   if (!register_operand (operands[1], SImode)
+       && !register_operand (operands[2], SImode))
+     {
+       if (!CONST_INT_P (operands[1]))
+	 operands[1] = force_reg (SImode, operands[1]);
+       else
+	 operands[2] = force_reg (SImode, operands[2]);
+     }
+  })
+
+(define_expand "mul<mode>3"
+  [(set (match_operand:GPI 0 "register_operand")
+	(mult:GPI (match_operand:GPI 1 "register_operand")
+		  (match_operand:GPI 2 "nonmemory_operand")))]
+  ""
+  {
+   if (!register_operand (operands[2], <MODE>mode)
+       && !satisfies_constraint_S32S0 (operands[2]))
+      operands[2] = force_reg (<MODE>mode, operands[2]);
+  })
+
+;; The overflow patterns are tested using expensive tests and dg-torture.exp
+(define_expand "addv<mode>4"
+  [(match_operand:GPI 0 "register_operand")
+   (match_operand:GPI 1 "register_operand")
+   (match_operand:GPI 2 "register_operand")
+   (label_ref (match_operand 3 "" ""))]
+  ""
+  {
+    emit_insn (gen_add<mode>3_Vcmp (operands[0], operands[1], operands[2]));
+    arc64_gen_unlikely_cbranch (NE, CC_Vmode, operands[3]);
+    DONE;
+  })
+
+(define_insn "add<mode>3_Vcmp"
+  [(parallel
+    [(set
+      (reg:CC_V CC_REGNUM)
+      (compare:CC_V
+       (plus:<DWI>
+	(sign_extend:<DWI> (match_operand:GPI 1 "arc64_nonmem_operand" "    0,    r,r,S32S0,    r"))
+	(sign_extend:<DWI> (match_operand:GPI 2 "arc64_nonmem_operand" "S12S0,U06S0,r,    r,S32S0")))
+       (sign_extend:<DWI> (plus:GPI (match_dup 1) (match_dup 2)))))
+     (set (match_operand:GPI 0 "register_operand"                     "=    r,    r,r,    r,    r")
+	  (plus:GPI (match_dup 1) (match_dup 2)))])]
+  "register_operand (operands[1], <MODE>mode)
+   || register_operand (operands[2], <MODE>mode)"
+  "add<sfxtab>.f\\t%0,%1,%2"
+  [(set_attr "length"     "4,4,4,8,8")
+   (set_attr "type"       "add<sfxtab>")])
+
+(define_expand "uaddv<mode>4"
+  [(match_operand:GPI 0 "register_operand")
+   (match_operand:GPI 1 "register_operand")
+   (match_operand:GPI 2 "register_operand")
+   (label_ref (match_operand 3 "" ""))]
+  ""
+  {
+    emit_insn (gen_add<mode>3_Ccmp (operands[0], operands[1], operands[2]));
+    arc64_gen_unlikely_cbranch (LTU, CC_Cmode, operands[3]);
+    DONE;
+  })
+
+(define_expand "subv<GPI:mode>4"
+  [(match_operand:GPI 0 "register_operand")
+   (match_operand:GPI 1 "register_operand")
+   (match_operand:GPI 2 "register_operand")
+   (label_ref (match_operand 3 "" ""))]
+  ""
+  {
+    emit_insn (gen_sub<mode>3_Vcmp (operands[0], operands[1], operands[2]));
+    arc64_gen_unlikely_cbranch (NE, CC_Vmode, operands[3]);
+    DONE;
+  })
+
+(define_insn "sub<GPI:mode>3_Vcmp"
+  [(set
+    (reg:CC_V CC_REGNUM)
+    (compare:CC_V
+     (sign_extend:<DWI>
+      (minus:GPI
+       (match_operand:GPI 1 "arc64_nonmem_operand" "    0,    r,r,S32S0,    r")
+       (match_operand:GPI 2 "arc64_nonmem_operand" "S12S0,U06S0,r,    r,S32S0")))
+     (minus:<DWI> (sign_extend:<DWI> (match_dup 1))
+		  (sign_extend:<DWI> (match_dup 2)))))
+   (set (match_operand:GPI 0 "register_operand"   "=    r,    r,r,    r,    r")
+	(minus:GPI (match_dup 1) (match_dup 2)))]
+  "register_operand (operands[1], <MODE>mode)
+   || register_operand (operands[2], <MODE>mode)"
+  "sub<sfxtab>.f\\t%0,%1,%2"
+  [(set_attr "length" "4,4,4,8,8")
+   (set_attr "type"   "sub<sfxtab>")])
+
+(define_expand "negv<mode>3"
+  [(match_operand:GPI 0 "register_operand")
+   (match_operand:GPI 1 "register_operand")
+   (label_ref (match_operand 2 "" ""))]
+  ""
+  {
+    emit_insn (gen_neg<mode>2_Vcmp (operands[0], operands[1]));
+    arc64_gen_unlikely_cbranch (NE, CC_Vmode, operands[2]);
+    DONE;
+  })
+
+(define_insn "negsi2_Vcmp"
+  [(set (reg:CC_V CC_REGNUM)
+	(compare:CC_V
+	 (sign_extend:DI
+	  (neg:SI (match_operand:SI 1 "register_operand" "r")))
+	 (neg:DI (sign_extend:DI (match_dup 1)))))
+   (set (match_operand:SI 0 "register_operand" "=r")
+	(neg:SI (match_dup 1)))]
+  ""
+  "neg.f\\t%0,%1"
+  [(set_attr "type" "neg")
+   (set_attr "length" "4")])
+
+(define_insn "negdi2_Vcmp"
+  [(set (reg:CC_V CC_REGNUM)
+	(compare:CC_V
+	 (sign_extend:TI
+	  (neg:DI (match_operand:DI 1 "register_operand" "r")))
+	 (neg:TI (sign_extend:TI (match_dup 1)))))
+   (set (match_operand:DI 0 "register_operand" "=r")
+	(neg:DI (match_dup 1)))]
+  ""
+  "rsubl.f\\t%0,%1,0"
+  [(set_attr "type" "neg")
+   (set_attr "length" "4")])
+
+(define_expand "usubv<mode>4"
+  [(match_operand:GPI 0 "register_operand")
+   (match_operand:GPI 1 "register_operand")
+   (match_operand:GPI 2 "register_operand")
+   (label_ref (match_operand 3 "" ""))]
+  ""
+  {
+    emit_insn (gen_sub<mode>3_cmp (operands[0], operands[1], operands[2]));
+    arc64_gen_unlikely_cbranch (LTU, CCmode, operands[3]);
+    DONE;
+  })
+
+(define_expand "<su_optab>mulvsi4"
+  [(ANY_EXTEND:DI (match_operand:SI 0 "register_operand"))
+   (ANY_EXTEND:DI (match_operand:SI 1 "register_operand"))
+   (ANY_EXTEND:DI (match_operand:SI 2 "register_operand"))
+   (label_ref (match_operand 3 "" ""))]
+  ""
+  {
+    emit_insn (gen_<su_optab>mulsi3_Vcmp (operands[0], operands[1], operands[2]));
+    arc64_gen_unlikely_cbranch (NE, CC_Vmode, operands[3]);
+    DONE;
+  })
+
+(define_insn "<su_optab>mulsi3_Vcmp"
+  [(parallel
+    [(set
+      (reg:CC_V CC_REGNUM)
+      (compare:CC_V
+       (mult:DI
+	(ANY_EXTEND:DI (match_operand:SI 1 "register_operand"        "%0,    r,r,    r"))
+	(ANY_EXTEND:DI (match_operand:SI 2 "arc64_nonmem_operand" "S12S0,U06S0,r,S32S0")))
+       (ANY_EXTEND:DI (mult:SI (match_dup 1) (match_dup 2)))))
+     (set (match_operand:SI 0 "register_operand"                     "=r,    r,r,    r")
+	  (mult:SI (match_dup 1) (match_dup 2)))])]
+  "register_operand (operands[1], SImode)
+   || register_operand (operands[2], SImode)"
+  "mpy<su_optab>.f\\t%0,%1,%2"
+  [(set_attr "length" "4,4,4,8")
+   (set_attr "type"   "mpy")])
+
+;; -------------------------------------------------------------------
+;; Comparison insns
+;; -------------------------------------------------------------------
+
+(define_expand "cmp<mode>"
+  [(set (reg:CC CC_REGNUM)
+	(compare:CC (match_operand:GPI 0 "register_operand" "")
+		    (match_operand:GPI 1 "nonmemory_operand" "")))]
+  ""
+  {
+   if (!register_operand (operands[1], DImode))
+      operands[1] = force_reg (DImode, operands[1]);
+  })
+
+(define_insn "*cmp<mode>"
+  [(set (reg:CC CC_REGNUM)
+	(compare:CC
+	 (match_operand:GPI 0 "nonmemory_operand" " q,   qh,r,    r,    r,U06S0,S12S0,S32S0,r")
+	 (match_operand:GPI 1 "nonmemory_operand" "qh,S03MV,r,U06S0,S12S0,    r,    r,    r,S32S0")))]
+  "register_operand (operands[0], <MODE>mode)
+   || register_operand (operands[1], <MODE>mode)"
+  "@
+   cmp<sfxtab>%?\\t%0,%1
+   cmp<sfxtab>%?\\t%0,%1
+   cmp<sfxtab>%?\\t%0,%1
+   cmp<sfxtab>%?\\t%0,%1
+   cmp<sfxtab>%?\\t%0,%1
+   rcmp<sfxtab>%?\\t%1,%0
+   rcmp<sfxtab>%?\\t%1,%0
+   rcmp<sfxtab>%?\\t%1,%0
+   cmp<sfxtab>%?\\t%0,%1"
+  [(set_attr "type" "cmp")
+   (set_attr "iscompact" "maybe,maybe,no,no,no,no,no,no,no")
+   (set_attr "predicable" "no,no,yes,yes,no,yes,no,no,no")
+   (set_attr "length" "*,*,4,4,4,4,4,8,8")])
+
+
+(define_insn "*cmp<mode>_ce"
+  [(cond_exec
+    (match_operator 2 "arc64_comparison_operator"
+		    [(match_operand 3 "cc_register" "") (const_int 0)])
+    (set (reg:CC CC_REGNUM)
+	 (compare:CC
+	  (match_operand:GPI 0 "nonmemory_operand" "r,    r,U06S0,S32S0,r")
+	  (match_operand:GPI 1 "nonmemory_operand" "r,U06S0,    r,    r,S32S0"))))]
+  "register_operand (operands[0], <MODE>mode)
+   || register_operand (operands[1], <MODE>mode)"
+  "@
+   cmp<sfxtab>.%m2\\t%0,%1
+   cmp<sfxtab>.%m2\\t%0,%1
+   rcmp<sfxtab>.%m2\\t%1,%0
+   rcmp<sfxtab>.%m2\\t%1,%0
+   cmp<sfxtab>.%m2\\t%0,%1"
+  [(set_attr "type" "cmp")
+   (set_attr "length" "4,4,4,8,8")])
+
+(define_insn "*cmp<mode>_zn"
+  [(set (reg:CC_ZN CC_REGNUM)
+	(compare:CC_ZN (match_operand:GPI 0 "register_operand" "q,r")
+		       (const_int 0)))]
+  ""
+  "tst<mcctab>%?\\t%0,%0"
+  [(set_attr "type" "tst")
+   (set_attr "iscompact" "maybe,no")
+   (set_attr "length" "*,4")])
+
+(define_insn "*cmp<mode>_znce"
+  [(cond_exec
+    (match_operator 2 "arc64_comparison_operator"
+		    [(match_operand 1 "cc_register" "") (const_int 0)])
+   (set (reg:CC_ZN CC_REGNUM)
+	(compare:CC_ZN (match_operand:GPI 0 "register_operand" "r")
+		       (const_int 0))))]
+  ""
+  "tst<mcctab>.%m2\\t%0,%0"
+  [(set_attr "type" "tst")
+   (set_attr "length" "4")])
+
+(define_insn "fcmp<mode>"
+  [(set (reg:CC_FPU CC_REGNUM)
+	(compare:CC_FPU (match_operand:GPF_HF 0 "register_operand" "w")
+			(match_operand:GPF_HF 1 "register_operand" "w")))]
+  "ARC64_HAS_FP_BASE"
+  "f<sfxtab>cmp\\t%0,%1"
+  [(set_attr "length" "4")
+   (set_attr "type" "fcmp")])
+
+(define_insn "fcmpf<mode>"
+  [(set (reg:CC_FPUE CC_REGNUM)
+	(compare:CC_FPUE (match_operand:GPF_HF 0 "register_operand" "w")
+			 (match_operand:GPF_HF 1 "register_operand" "w")))]
+  "ARC64_HAS_FP_BASE"
+  "f<sfxtab>cmpf\\t%0,%1"
+  [(set_attr "length" "4")
+   (set_attr "type" "fcmp")])
+
+;; -------------------------------------------------------------------
+;; Store-flag and conditional select insns
+;; -------------------------------------------------------------------
+
+(define_expand "cstore<mode>4"
+  [(set (match_operand:SI 0 "register_operand")
+	(match_operator:SI 1 "arc64_comparison_operator"
+	 [(match_operand:GPI 2 "nonmemory_operand")
+	  (match_operand:GPI 3 "nonmemory_operand")]))]
+  ""
+  {
+   if (!register_operand (operands[2], <MODE>mode))
+     operands[2] = force_reg (<MODE>mode, operands[2]);
+   if (!arc64_nonmem_operand (operands[3], <MODE>mode))
+     operands[3] = force_reg (<MODE>mode, operands[3]);
+  })
+
+(define_expand "cstore<mode>4"
+  [(set (match_operand:SI 0 "register_operand")
+	(match_operator:SI 1 "arc64_comparison_operator"
+	 [(match_operand:GPF_HF 2 "register_operand")
+	  (match_operand:GPF_HF 3 "register_operand")]))]
+  "ARC64_HAS_FP_BASE"
+  "
+  operands[2] = arc64_gen_compare_reg (GET_CODE (operands[1]), operands[2],
+				      operands[3]);
+  operands[3] = const0_rtx;
+  "
+)
+
+(define_insn_and_split "*scc_insn"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(match_operator:SI 1 "arc64_comparison_operator"
+			   [(reg CC_REGNUM) (const_int 0)]))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (match_dup 0) (const_int 0))
+   (cond_exec
+     (match_dup 1)
+     (set (match_dup 0) (const_int 1)))]
+{
+  operands[1]
+    = gen_rtx_fmt_ee (GET_CODE (operands[1]),
+		      VOIDmode,
+		      XEXP (operands[1], 0), XEXP (operands[1], 1));
+}
+  [(set_attr "type" "movecc")])
+
+;; SETcc instructions
+(define_expand "set<optab><mode>"
+  [(set (match_operand:SI 0 "register_operand")
+	(ALLCC:SI
+	 (match_operand:GPI 1 "register_operand")
+	 (match_operand:GPI 2 "nonmemory_operand")))]
+  ""
+  {
+   if (!arc64_nonmem_operand (operands[2], <MODE>mode))
+      operands[2] = force_reg (<MODE>mode, operands[2]);
+   })
+
+(define_insn "*set<cctab><mode>"
+  [(set (match_operand:SI 0 "register_operand"      "=r,    r,    r,r")
+	(SETCC:SI
+	 (match_operand:GPI 1 "register_operand"     "r,    r,    0,r")
+	 (match_operand:GPI 2 "arc64_nonmem_operand" "r,U06S0,S12S0,n")))]
+  ""
+  "set<cctab><sfxtab>%?\\t%0,%1,%2"
+  [(set_attr "length" "4,4,4,8")
+   (set_attr "type" "setcc")])
+
+(define_insn "*set<cctab><mode>_cmp"
+  [(set (reg:CC CC_REGNUM)
+	(compare:CC
+	 (match_operand:GPI 1 "register_operand"     "r,    r,    0,r")
+	 (match_operand:GPI 2 "arc64_nonmem_operand" "r,U06S0,S12S0,n")))
+   (set (match_operand:SI 0 "register_operand"      "=r,    r,    r,r")
+	(SETCC:SI (match_dup 1) (match_dup 2)))]
+  ""
+  "set<cctab><sfxtab>.f\\t%0,%1,%2"
+  [(set_attr "length" "4,4,4,8")
+   (set_attr "type" "setcc")])
+
+;; Special cases of SETCC
+(define_insn_and_split "*sethi<mode>"
+  [(set (match_operand:SI 0 "register_operand"      "=r,    r,r")
+	(gtu:SI
+	 (match_operand:GPI 1 "register_operand"     "r,    r,r")
+	 (match_operand:GPI 2 "arc64_nonmem_operand" "r,U06M1,n")))]
+  ""
+  "setlo<sfxtab>%?\\t%0,%2,%1"
+  "reload_completed
+   && CONST_INT_P (operands[2])
+   && satisfies_constraint_U06M1 (operands[2])"
+  [(const_int 0)]
+  "{
+    /* sethi a,b,u6 => seths a,b,u6 + 1.  */
+    operands[2] = GEN_INT (INTVAL (operands[2]) + 1);
+    emit_insn (gen_setgeu<mode> (operands[0], operands[1], operands[2]));
+    DONE;
+   }"
+ [(set_attr "length" "4,4,8")
+   (set_attr "type" "setcc")])
+
+(define_insn_and_split "*setls<mode>"
+  [(set (match_operand:SI 0 "register_operand"      "=r,    r,r")
+	(leu:SI
+	 (match_operand:GPI 1 "register_operand"     "r,    r,r")
+	 (match_operand:GPI 2 "arc64_nonmem_operand" "r,U06M1,n")))]
+  ""
+  "seths<sfxtab>%?\\t%0,%2,%1"
+  "reload_completed
+   && satisfies_constraint_U06M1 (operands[2])"
+  [(const_int 0)]
+  "{
+    /* setls a,b,u6 => setlo a,b,u6 + 1.  */
+    operands[2] = GEN_INT (INTVAL (operands[2]) + 1);
+    emit_insn (gen_setltu<mode> (operands[0], operands[1], operands[2]));
+    DONE;
+   }"
+  [(set_attr "length" "4,4,8")
+   (set_attr "type" "setcc")])
+
+;; MOVCC patterns
+(define_expand "mov<mode>cc"
+  [(set (match_operand:ALLI 0 "register_operand")
+	(if_then_else:ALLI (match_operand 1 "arc64_comparison_operator")
+			   (match_operand:ALLI 2 "register_operand")
+			   (match_operand:ALLI 3 "register_operand")))]
+  ""
+  {
+   rtx tmp;
+   enum rtx_code code = GET_CODE (operands[1]);
+
+   if (code == UNEQ || code == LTGT)
+     FAIL;
+
+   tmp = arc64_gen_compare_reg (code, XEXP (operands[1], 0),
+				XEXP (operands[1], 1));
+   operands[1] = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
+  })
+
+(define_expand "mov<mode>cc"
+  [(set (match_operand:GPF_HF 0 "register_operand")
+	(if_then_else:GPF_HF (match_operand 1 "arc64_comparison_operator")
+			     (match_operand:GPF_HF 2 "register_operand")
+			     (match_operand:GPF_HF 3 "register_operand")))]
+  ""
+  {
+   rtx tmp;
+   enum rtx_code code = GET_CODE (operands[1]);
+
+   if (code == UNEQ || code == LTGT)
+     FAIL;
+
+   tmp = arc64_gen_compare_reg (code, XEXP (operands[1], 0),
+				XEXP (operands[1], 1));
+   operands[1] = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
+  })
+
+(define_insn "*cmov<mode>"
+  [(set (match_operand:ALLI 0 "register_operand" "=r,r,r,r")
+	(if_then_else:ALLI
+	 (match_operator 3 "arc64_comparison_operator"
+			 [(match_operand 4 "cc_register" "") (const_int 0)])
+	 (match_operand:ALLI 1 "nonmemory_operand"  "0,0,rU06S0,S32S0")
+	 (match_operand:ALLI 2 "nonmemory_operand" "rU06S0,S32S0,0,0")
+	 ))]
+  "register_operand (operands[0], <MODE>mode)
+   || register_operand (operands[1], <MODE>mode)"
+  "@
+   mov<mcctab>.%M3\\t%0,%2
+   mov<mcctab>.%M3\\t%0,%2
+   mov<mcctab>.%m3\\t%0,%1
+   mov<mcctab>.%m3\\t%0,%1"
+  [(set_attr "length" "4,8,4,8")
+   (set_attr "type" "move")])
+
+(define_insn "*cmov<mode>"
+  [(set (match_operand:HF_SF 0 "register_operand" "=w,*r,*r,w,*r,*r")
+	(if_then_else:HF_SF
+	 (match_operator 3 "arc64_comparison_operator"
+			 [(match_operand 4 "cc_register" "") (const_int 0)])
+	 (match_operand:HF_SF 1 "nonmemory_operand" "w,*r,*E,0, 0, 0")
+	 (match_operand:HF_SF 2 "nonmemory_operand" "0, 0, 0,w,*r,*E")))]
+  "register_operand (operands[0], <MODE>mode)
+   || register_operand (operands[1], <MODE>mode)"
+  "@
+   f<sfxtab>mov.%m3\\t%0,%1
+   mov<mcctab>.%m3\\t%0,%1
+   mov<mcctab>.%m3\\t%0,%1
+   f<sfxtab>mov.%M3\\t%0,%2
+   mov<mcctab>.%M3\\t%0,%2
+   mov<mcctab>.%M3\\t%0,%2"
+  [(set_attr "length" "4,4,8,4,4,8")
+   (set_attr "type" "fmov,move,move,fmov,move,move")])
+
+(define_insn "*cmovdf"
+  [(set (match_operand:DF 0 "register_operand" "=w,*r,w,*r")
+	(if_then_else:DF
+	 (match_operator 3 "arc64_comparison_operator"
+			 [(match_operand 4 "cc_register" "") (const_int 0)])
+	 (match_operand:DF 1 "register_operand" "w,*r,0, 0")
+	 (match_operand:DF 2 "register_operand" "0, 0,w,*r")))]
+  "ARC64_HAS_FPUD"
+  "@
+   fdmov.%m3\\t%0,%1
+   movl.%m3\\t%0,%1
+   fdmov.%M3\\t%0,%2
+   movl.%M3\\t%0,%2"
+  [(set_attr "length" "4")
+   (set_attr "type" "fmov,move,fmov,move")])
+
+;; -------------------------------------------------------------------
+;; Logical operations
+;; -------------------------------------------------------------------
+
+(define_expand "<optab><mode>3"
+  [(set (match_operand:GPI 0 "register_operand")
+	(LOGIC:GPI (match_operand:GPI 1 "register_operand")
+		   (match_operand:GPI 2 "nonmemory_operand")))]
+  ""
+  {
+   if (!arc64_nonmem_operand (operands[2], <MODE>mode))
+      operands[2] = force_reg (<MODE>mode, operands[2]);
+  })
+
+(define_expand "<optab><mode>2"
+  [(set (match_operand:GPI 0 "register_operand")
+	(NOT_ABS:GPI (match_operand:GPI 1 "register_operand")))]
+  ""
+  )
+
+(define_insn "negsi2"
+  [(set (match_operand:SI 0 "register_operand"        "=q,q,r,r")
+	(neg:SI (match_operand:SI 1 "register_operand" "0,q,0,r")))]
+  ""
+  "neg%?\\t%0,%1"
+  [(set_attr "type" "neg")
+   (set_attr "iscompact" "maybe,yes,no,no")
+   (set_attr "predicable" "yes,no,yes,no")
+   (set_attr "length" "*,2,4,4")])
+
+(define_insn "*<optab><mode>2"
+  [(set (match_operand:GPI 0 "register_operand" "=q,r")
+	(NOT_ABS:GPI (match_operand:GPI 1 "register_operand" "q,r")))]
+  ""
+  "<mntab><sfxtab>%?\\t%0,%1"
+  [(set_attr "type" "<mntab>")
+   (set_attr "iscompact" "maybe,no")
+   (set_attr "length" "*,4")])
+
+(define_insn "*<optab><mode>3"
+   [(set (match_operand:GPI 0 "register_operand"                  "=r,    r,     r,r")
+	 (MINMAX:GPI (match_operand:GPI 1 "register_operand"      "%0,    0,     r,r")
+		     (match_operand:GPI 2 "nonmemory_operand" "rU06S0,S12S0,rU06S0,S32S0")))]
+  ""
+  "<mntab><sfxtab>%?\\t%0,%1,%2"
+  [(set_attr "type" "<mntab>")
+   (set_attr "length" "4,4,4,8")
+   (set_attr "predicable" "yes,no,no,no")]
+)
+
+;; Zero-extend pattern
+(define_insn "*<optab>si_zextend"
+  [(set (match_operand:DI 0 "register_operand" "=q,r")
+	(zero_extend:DI
+	 (LOP2EX:SI (match_operand:SI 1 "register_operand" "q,r"))))]
+  "TARGET_64BIT"
+  "<mntab>%?\\t%0,%1"
+  [(set_attr "type" "<mntab>")
+   (set_attr "iscompact" "yes,no")
+   (set_attr "length" "*,4")])
+
+(define_insn "*<optab>3_zextend"
+  [(set (match_operand:DI 0 "register_operand"        "=r,    r,     r,r")
+	(zero_extend:DI
+	 (MINMAX:SI
+	  (match_operand:SI 1 "register_operand"      "%0,    0,     r,r")
+	  (match_operand:SI 2 "nonmemory_operand" "rU06S0,S12S0,rU06S0,S32S0"))))]
+  "TARGET_64BIT"
+  "<mntab>%?\\t%0,%1,%2"
+  [(set_attr "type" "max")
+   (set_attr "length" "4,4,4,8")
+   (set_attr "predicable" "yes,no,no,no")])
+
+;; NEGCC and NOTCC patterns used by ifcvt.
+(define_expand "<mntab><mode>cc"
+  [(set (match_operand:GPI 0 "register_operand")
+	(if_then_else:GPI (match_operand 1 "arc64_comparison_operator")
+			  (NEG_NOT:GPI (match_operand:GPI 2 "register_operand"))
+			  (match_operand:GPI 3 "register_operand")))]
+  ""
+  {
+   rtx tmp;
+   enum rtx_code code = GET_CODE (operands[1]);
+
+   if (code == UNEQ || code == LTGT)
+     FAIL;
+
+   tmp = arc64_gen_compare_reg (code, XEXP (operands[1], 0),
+				XEXP (operands[1], 1));
+   operands[1] = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
+  })
+
+(define_insn "*cneg<mode>"
+  [(set (match_operand:GPI 0 "register_operand" "=r,r,r")
+	(if_then_else:GPI
+	 (match_operator 3 "arc64_comparison_operator"
+			 [(match_operand 4 "cc_register" "") (const_int 0)])
+	 (neg:GPI (match_operand:GPI 1 "register_operand" "0,0,0"))
+	 (match_operand:GPI 2 "nonmemory_operand"  "0,rU06S0,S32S0")))]
+  ""
+  "@
+   rsub<sfxtab>.%m3\\t%0,%1,0
+   rsub<sfxtab>.%m3\\t%0,%1,0\\n\\tmov<mcctab>.%M3\\t%0,%2
+   rsub<sfxtab>.%m3\\t%0,%1,0\\n\\tmov<mcctab>.%M3\\t%0,%2"
+  [(set_attr "length" "4,8,12")
+   (set_attr "type" "neg")])
+
+(define_insn "*cnot<mode>"
+  [(set (match_operand:GPI 0 "register_operand" "=r,r,r")
+	(if_then_else:GPI
+	 (match_operator 3 "arc64_comparison_operator"
+			 [(match_operand 4 "cc_register" "") (const_int 0)])
+	 (not:GPI (match_operand:GPI 1 "register_operand" "0,0,0"))
+	 (match_operand:GPI 2 "register_operand"  "0,rU06S0,S32S0")))]
+  ""
+  "@
+   xor<sfxtab>.%m3\\t%0,%1,-1
+   xor<sfxtab>.%m3\\t%0,%1,-1\\n\\tmov<mcctab>.%M3\\t%0,%2
+   xor<sfxtab>.%m3\\t%0,%1,-1\\n\\tmov<mcctab>.%M3\\t%0,%2"
+  [(set_attr "length" "8,12,16")
+   (set_attr "type" "xor")])
+
+;; -------------------------------------------------------------------
+;; Shifts
+;; -------------------------------------------------------------------
+
+;; FIXME! check if we get better code if we use QI for op 2.
+(define_expand "<optab><mode>3"
+  [(set (match_operand:GPI 0 "register_operand")
+	(ASHIFT:GPI (match_operand:GPI 1 "register_operand")
+		    (match_operand:GPI 2 "nonmemory_operand")))]
+  "")
+
+(define_expand "rotrsi3"
+  [(set (match_operand:SI 0 "register_operand")
+	(rotatert:SI (match_operand:SI 1 "nonmemory_operand")
+		     (match_operand:SI 2 "nonmemory_operand")))]
+  "")
+
+(define_insn "*rotrsi3"
+  [(set (match_operand:SI 0 "register_operand"                  "=r,    r,    r,     r,r")
+	(rotatert:SI (match_operand:SI 1 "nonmemory_operand"     "r,    r,    r,     r,i")
+		     (match_operand:SI 2 "nonmemory_operand" "U0001,U0008,U0016,rU06S0,r")))]
+  ;; FIXME! this needs BARREL_SHIFTER option
+  "register_operand (operands[0], SImode)
+   || register_operand (operands[1], SImode)"
+  "@
+   ror\\t%0,%1
+   ror8\\t%0,%1
+   swap\\t%0,%1
+   ror\\t%0,%1,%2
+   ror\\t%0,%1,%2"
+  [(set_attr "type" "ror,ror,swap,ror,ror")
+   (set_attr "length" "4,4,4,4,8")])
+
+(define_expand "rotlsi3"
+  [(set (match_operand:SI 0 "register_operand")
+	(rotatert:SI (match_operand:SI 1 "nonmemory_operand")
+		     (match_operand:SI 2 "nonmemory_operand")))]
+  ""
+  "
+  if (CONST_INT_P (operands[2])
+      && (INTVAL (operands[2]) == 1))
+    {
+     gen_rotl1 (operands[0], operands[1]);
+     DONE;
+    }
+
+  if (CONST_INT_P (operands[2])
+      && (INTVAL (operands[2]) == 8))
+    {
+     gen_rotl8 (operands[0], operands[1]);
+     DONE;
+    }
+
+  if (CONST_INT_P (operands[2]))
+    operands[2] = GEN_INT ((32 - INTVAL (operands[2])) % 32);
+  else
+    {
+      rtx reg = gen_reg_rtx (SImode);
+      emit_insn (gen_subsi3 (reg, GEN_INT (32), operands[2]));
+      operands[2] = reg;
+    }
+  ")
+
+(define_insn "rotl1"
+  [(set (match_operand:SI 0 "register_operand"             "=     r,r")
+	(rotate:SI (match_operand:SI 1 "nonmemory_operand"  "rU06S0,i")
+		   (const_int 1)))]
+  ""
+  "rol%?\\t%0,%1"
+  [(set_attr "type" "rol")
+   (set_attr "predicable" "no")
+   (set_attr "length" "4,8")])
+
+(define_insn "rotl8"
+  [(set (match_operand:SI 0 "register_operand"             "=     r,r")
+	(rotate:SI (match_operand:SI 1 "nonmemory_operand"  "rU06S0,i")
+		   (const_int 8)))]
+  ""
+  "rol8%?\\t%0,%1"
+  [(set_attr "type" "rol")
+   (set_attr "predicable" "no")
+   (set_attr "length" "4,8")])
+
+
+;; -------------------------------------------------------------------
+;; Bitfields
+;; -------------------------------------------------------------------
+
+(define_expand "extzv<mode>"
+  [(set (match_operand:GPI 0 "register_operand" "")
+	(zero_extract:GPI (match_operand:GPI 1 "register_operand" "")
+			  (match_operand 2 "const_int_operand" "")
+			  (match_operand 3 "const_int_operand" "")))]
+  "")
+
+(define_insn "*extzvsi"
+  [(set (match_operand:SI 0 "register_operand"                  "=r,r")
+	(zero_extract:SI (match_operand:SI 1 "register_operand"  "0,r")
+			 (match_operand 2    "const_int_operand" "n,n")
+			 (match_operand 3    "const_int_operand" "n,n")))]
+  ""
+  {
+   int assemble_op2 = (((INTVAL (operands[2]) - 1) & 0x1f) << 5)
+                       | (INTVAL (operands[3]) & 0x1f);
+   operands[2] = GEN_INT (assemble_op2);
+   return "xbfu%?\\t%0,%1,%2";
+  }
+  [(set_attr "type"       "xbfu")
+   (set_attr "iscompact"  "no")
+   (set_attr "length"     "4,8")
+   (set_attr "predicable" "no")])
+
+(define_insn "*zextzvsi"
+  [(set (match_operand:DI 0 "register_operand"                  "=r,r")
+	(zero_extract:DI (match_operand:SI 1 "register_operand"  "0,r")
+			 (match_operand 2    "const_int_operand" "n,n")
+			 (match_operand 3    "const_int_operand" "n,n")))]
+  ""
+  {
+   int assemble_op2 = (((INTVAL (operands[2]) - 1) & 0x1f) << 5)
+                       | (INTVAL (operands[3]) & 0x1f);
+   operands[2] = GEN_INT (assemble_op2);
+   return "xbfu%?\\t%0,%1,%2";
+  }
+  [(set_attr "type"       "xbfu")
+   (set_attr "iscompact"  "no")
+   (set_attr "length"     "4,8")
+   (set_attr "predicable" "no")])
+
+;;FIXME! compute length based on the input args.
+(define_insn "*extzvdi"
+  [(set (match_operand:DI 0 "register_operand"                  "=r,r")
+	(zero_extract:DI (match_operand:DI 1 "register_operand"  "0,r")
+			 (match_operand 2    "const_int_operand" "n,n")
+			 (match_operand 3    "const_int_operand" "n,n")))]
+  ""
+  {
+   int assemble_op2 = (((INTVAL (operands[2]) - 1) & 0x3f) << 6)
+                       | (INTVAL (operands[3]) & 0x3f);
+   operands[2] = GEN_INT (assemble_op2);
+   return "xbful%?\\t%0,%1,%2";
+  }
+  [(set_attr "type"       "xbfu")
+   (set_attr "iscompact"  "no")
+   (set_attr "length"     "8,8")
+   (set_attr "predicable" "no")])
+
+(define_insn "*extzvsi_cmp0"
+  [(set (reg:CC_ZN CC_REGNUM)
+	(compare:CC_ZN
+	 (zero_extract:SI
+	  (match_operand:SI 1 "register_operand"  "0,r")
+	  (match_operand 2    "const_int_operand" "n,n")
+	  (match_operand 3    "const_int_operand" "n,n"))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "register_operand"   "=r,r")
+	(zero_extract:SI (match_dup 1)
+			 (match_dup 2)
+			 (match_dup 3)))]
+  ""
+  {
+   int assemble_op2 = (((INTVAL (operands[2]) - 1) & 0x1f) << 5)
+                       | (INTVAL (operands[3]) & 0x1f);
+   operands[2] = GEN_INT (assemble_op2);
+   return "xbfu.f\\t%0,%1,%2";
+  }
+  [(set_attr "type"       "xbfu")
+   (set_attr "length"     "4,8")])
+
+(define_insn "*extzvsi_cmp0_noout"
+  [(set (reg:CC_ZN CC_REGNUM)
+	(compare:CC_ZN
+	 (zero_extract:SI
+	  (match_operand:SI 0 "register_operand"  "r")
+	  (match_operand 1    "const_int_operand" "n")
+	  (match_operand 2    "const_int_operand" "n"))
+	 (const_int 0)))]
+  ""
+  {
+   int assemble_op2 = (((INTVAL (operands[1]) - 1) & 0x1f) << 5)
+                       | (INTVAL (operands[2]) & 0x1f);
+   operands[1] = GEN_INT (assemble_op2);
+   return "xbfu.f\\t0,%0,%1";
+  }
+  [(set_attr "type"       "xbfu")
+   (set_attr "length"     "8")])
+
+(define_insn "bswap<mode>2"
+  [(set (match_operand:GPI 0 "register_operand"  "=r,r")
+	(bswap:GPI
+	 (match_operand:GPI 1 "nonmemory_operand" "rU06S0,S32S0")))]
+  ""
+  "swape<mcctab>\\t%0,%1"
+  [(set_attr "length" "4,8")
+   (set_attr "type" "swap")])
+
+;; -------------------------------------------------------------------
+;; Bitscan
+;; -------------------------------------------------------------------
+
+(define_insn "clrsb<mode>2"
+  [(set (match_operand:EPI 0 "register_operand"           "=r")
+	(clrsb:EPI (match_operand:EPI 1 "register_operand" "r")))]
+  "TARGET_BITSCAN"
+  "norm<sfxtab>\\t%0,%1"
+  [(set_attr "length" "4")
+   (set_attr "type" "norm<sfxtab>")])
+
+(define_expand "clz<mode>2"
+  [(match_operand:GPI 0 "register_operand")
+   (match_operand:GPI 1 "register_operand")]
+  "TARGET_BITSCAN"
+  {
+   rtx tmp = gen_reg_rtx (<MODE>mode);
+   unsigned int size = GET_MODE_SIZE (<MODE>mode) * BITS_PER_UNIT - 1;
+   emit_insn (gen_arc64_fls<sfxtab>2 (tmp, operands[1]));
+   emit_insn (gen_sub<mode>3 (operands[0], GEN_INT (size), tmp));
+   DONE;
+   })
+
+(define_insn "ctz<mode>2"
+  [(set (match_operand:GPI 0 "register_operand"         "=r")
+	(ctz:GPI (match_operand:GPI 1 "register_operand" "r")))]
+  "TARGET_BITSCAN"
+  "ffs<sfxtab>\\t%0,%1"
+  [(set_attr "length" "4")
+   (set_attr "type" "ffs")])
+
+(define_insn "arc64_fls<sfxtab>2"
+  [(set (match_operand:GPI  0 "register_operand"            "=r")
+	(unspec:GPI [(match_operand:GPI 1 "register_operand" "r")]
+		    ARC64_UNSPEC_FLS))]
+  "TARGET_BITSCAN"
+  "fls<sfxtab>\\t%0,%1"
+  [(set_attr "length" "4")
+   (set_attr "type" "fls")])
+
+;; -------------------------------------------------------------------
+;; Floating-point intrinsics
+;; -------------------------------------------------------------------
+
+(define_insn "round<mode>2"
+  [(set (match_operand:GPF 0 "register_operand" "=w")
+	(unspec:GPF [(match_operand:GPF 1 "register_operand" "w")]
+		    ARC64_UNSPEC_ROUND))]
+  "ARC64_HAS_FP_BASE"
+  "f<sfxtab>rnd\\t%0,%1"
+  [(set_attr "length" "4")
+   (set_attr "type" "frnd")])
+
+(define_insn "btrunc<mode>2"
+  [(set (match_operand:GPF 0 "register_operand" "=w")
+	(unspec:GPF [(match_operand:GPF 1 "register_operand" "w")]
+		    ARC64_UNSPEC_BTRUNC))]
+  "ARC64_HAS_FP_BASE"
+  "f<sfxtab>rnd_rz\\t%0,%1"
+  [(set_attr "length" "4")
+   (set_attr "type" "frnd")])
+
+;; -------------------------------------------------------------------
+;; Floating-point conversions
+;; -------------------------------------------------------------------
+
+(define_insn "extendsfdf2"
+  [(set (match_operand:DF 0 "register_operand" "=w")
+        (float_extend:DF (match_operand:SF 1 "register_operand" "w")))]
+  "ARC64_HAS_FPUD"
+  "fs2d\\t%0,%1"
+  [(set_attr "length" "4")
+   (set_attr "type" "fs2d")])
+
+(define_insn "extendhfsf2"
+  [(set (match_operand:SF 0 "register_operand" "=w")
+        (float_extend:SF (match_operand:HF 1 "register_operand" "w")))]
+  "ARC64_HAS_FPUH"
+  "fh2s\\t%0,%1"
+  [(set_attr "length" "4")
+   (set_attr "type" "fh2s")])
+
+(define_expand "extendhfdf2"
+  [(match_operand:DF 0 "register_operand")
+   (match_operand:HF 1 "register_operand")]
+  "ARC64_HAS_FPUS"
+  {
+    rtx tmp = gen_reg_rtx (SFmode);
+    emit_insn (gen_extendhfsf2 (tmp, operands[1]));
+    if (ARC64_HAS_FPUD)
+      emit_insn (gen_extendsfdf2 (operands[0], tmp));
+    else
+      {
+	rtx ret;
+	ret = emit_library_call_value (gen_rtx_SYMBOL_REF (Pmode,
+							   "__extendsfdf2"),
+				       operands[0], LCT_NORMAL, DFmode,
+				       tmp, SFmode);
+	if (ret != operands[0])
+	  emit_move_insn (operands[0], ret);
+      }
+    DONE;
+  })
+
+(define_insn "truncdfsf2"
+  [(set (match_operand:SF 0 "register_operand" "=w")
+        (float_truncate:SF (match_operand:DF 1 "register_operand" "w")))]
+  "ARC64_HAS_FPUD"
+  "fd2s\\t%0,%1"
+  [(set_attr "length" "4")
+   (set_attr "type" "fd2s")])
+
+(define_insn "truncsfhf2"
+  [(set (match_operand:HF 0 "register_operand" "=w")
+        (float_truncate:HF (match_operand:SF 1 "register_operand" "w")))]
+  "ARC64_HAS_FPUH"
+  "fs2h\\t%0,%1"
+  [(set_attr "length" "4")
+   (set_attr "type" "fs2h")])
+
+(define_expand "truncdfhf2"
+  [(match_operand:HF 0 "register_operand")
+   (match_operand:DF 1 "register_operand")]
+  "ARC64_HAS_FPUS"
+  {
+    rtx tmp = gen_reg_rtx (SFmode);
+    if (ARC64_HAS_FPUD)
+      emit_insn (gen_truncdfsf2 (tmp, operands[1]));
+    else
+      {
+	rtx ret;
+	ret = emit_library_call_value (gen_rtx_SYMBOL_REF (Pmode,
+							   "__truncdfsf2"),
+				       tmp, LCT_NORMAL, SFmode,
+				       operands[1], DFmode);
+	if (ret != tmp)
+	  emit_move_insn (tmp, ret);
+      }
+    emit_insn (gen_truncsfhf2 (operands[0], tmp));
+    DONE;
+  })
+
+;; SI->SF SI->DF DI->SF DI->DF
+;; FINT2S FINT2D FL2S FL2D
+(define_insn "float<GPI:mode><GPF:mode>2"
+  [(set (match_operand:GPF 0 "register_operand"           "=w")
+	(float:GPF (match_operand:GPI 1 "core_register_operand" "c")))]
+  "ARC64_HAS_FP_BASE"
+  "f<GPI:f2tab>2<GPF:sfxtab>\\t%0,%1"
+  [(set_attr "length" "4")
+   (set_attr "type" "int2fp")])
+
+(define_expand "floatsihf2"
+  [(match_operand:HF 0 "register_operand")
+   (match_operand:SI 1 "core_register_operand")]
+  "ARC64_HAS_FPUH"
+  {
+    rtx tmp = gen_reg_rtx (SFmode);
+    emit_insn (gen_floatsisf2 (tmp, operands[1]));
+    emit_insn (gen_truncsfhf2 (operands[0], tmp));
+    DONE;
+  })
+
+(define_expand "floatdihf2"
+  [(match_operand:HF 0 "register_operand")
+   (match_operand:DI 1 "core_register_operand")]
+  "ARC64_HAS_FPUH"
+  {
+    rtx tmp = gen_reg_rtx (SFmode);
+    emit_insn (gen_floatdisf2 (tmp, operands[1]));
+    emit_insn (gen_truncsfhf2 (operands[0], tmp));
+    DONE;
+    })
+
+;; uSI->SF uSI->DF uDI->SF uDI->DF
+;; FUINT2S FUINT2D FUL2S FUL2D
+(define_insn "floatuns<GPI:mode><GPF:mode>2"
+  [(set (match_operand:GPF 0 "register_operand"                    "=w")
+	(unsigned_float:GPF (match_operand:GPI 1 "core_register_operand" "c")))]
+  "ARC64_HAS_FP_BASE"
+  "fu<GPI:f2tab>2<GPF:sfxtab>\\t%0,%1"
+  [(set_attr "length" "4")
+   (set_attr "type" "uint2fp")])
+
+;; SF->uSI SF->uDI DF->uSI DF->uDI (using rounding towards zero)
+;; FS2UINT_RZ FS2UL_RZ FD2UINT_RZ FD2UL_RZ
+(define_insn "fixuns_trunc<GPF:mode><GPI:mode>2"
+  [(set (match_operand:GPI 0 "core_register_operand"         "=c")
+	(unsigned_fix:GPI (match_operand:GPF 1 "register_operand" "w")))]
+  "ARC64_HAS_FP_BASE"
+  "f<GPF:sfxtab>2u<GPI:f2tab>_rz\\t%0,%1"
+  [(set_attr "length" "4")
+   (set_attr "type" "fp2uint")])
+
+;; SF->SI SF->DI DF->SI DF->DI (using rounding towards zero)
+;; FS2INT_RZ FS2L_RZ FD2INT_RZ FD2L_RZ
+(define_insn "fix_trunc<GPF:mode><GPI:mode>2"
+  [(set (match_operand:GPI 0 "core_register_operand"         "=c")
+	(fix:GPI (match_operand:GPF 1 "register_operand" "w")))]
+  "ARC64_HAS_FP_BASE"
+  "f<GPF:sfxtab>2<GPI:f2tab>_rz\\t%0,%1"
+  [(set_attr "length" "4")
+   (set_attr "type" "fp2int")])
+
+;; -------------------------------------------------------------------
+;; Floating-point arithmetic
+;; -------------------------------------------------------------------
+
+;; F<P>ADD F<P>SUB F<P>MUL F<P>DIV F<P>MIN F<P>MAX
+(define_insn "<optab><mode>3"
+  [(set (match_operand:GPF_HF 0 "register_operand" "=w")
+	(DOPF:GPF_HF (match_operand:GPF_HF 1 "register_operand" "w")
+		     (match_operand:GPF_HF 2 "register_operand" "w")))]
+  "ARC64_HAS_FP_BASE"
+  "f<sfxtab><mntab>\\t%0,%1,%2"
+  [(set_attr "length" "4")
+   (set_attr "type" "f<mntab>")])
+
+;; F<P>ABS
+;; FIXME! bclr can be short. Also we can predicate it
+(define_insn "abs<mode>2"
+  [(set (match_operand:GPF_HF 0 "register_operand" "=w,*r")
+	(abs:GPF_HF (match_operand:GPF_HF 1 "register_operand" "w,*r")))]
+  ""
+  "@
+  f<sfxtab>sgnjx\\t%0,%1,%1
+  bclr<fptab>\\t%0,%1,<sizen>"
+  [(set_attr "length" "4")
+   (set_attr "type" "fsgnjx,bclr")])
+
+;; F<P>NEG
+;; FIXME! bxor can be predicated
+(define_insn "neg<mode>2"
+  [(set (match_operand:GPF_HF 0 "register_operand" "=w,*r")
+	(neg:GPF_HF (match_operand:GPF_HF 1 "register_operand" "w,*r")))]
+  ""
+  "@
+  f<sfxtab>sgnjn\\t%0,%1,%1
+  bxor<fptab>\\t%0,%1,<sizen>"
+  [(set_attr "length" "4")
+   (set_attr "type" "fsgnjn,bxor")])
+
+;; F<P>MADD
+(define_insn "fma<mode>4"
+  [(set (match_operand:GPF_HF 0 "register_operand" "=w")
+	(fma:GPF_HF (match_operand:GPF_HF 1 "register_operand"  "w")
+		    (match_operand:GPF_HF 2 "register_operand"  "w")
+		    (match_operand:GPF_HF 3 "register_operand"  "w")))]
+  "ARC64_HAS_FP_BASE"
+  "f<sfxtab>madd\\t%0,%1,%2,%3"
+  [(set_attr "length" "4")
+   (set_attr "type" "fmadd")])
+
+;; F<P>MSUB
+(define_insn "fnma<mode>4"
+  [(set (match_operand:GPF_HF 0 "register_operand" "=w")
+	(fma:GPF_HF (neg:GPF_HF (match_operand:GPF_HF 1 "register_operand"  "w"))
+		    (match_operand:GPF_HF 2 "register_operand"  "w")
+		    (match_operand:GPF_HF 3 "register_operand"  "w")))]
+  "ARC64_HAS_FP_BASE"
+  "f<sfxtab>msub\\t%0,%1,%2,%3"
+  [(set_attr "length" "4")
+   (set_attr "type" "fmsub")])
+
+;; F<P>NMSUB
+;; TBI: the md.texi says Like @code{fma@var{m}4}, except
+;; operand 3 subtracted from the product instead of added to the
+;; product. However, fnmsub does -(s3 - (s1 * s2))
+(define_insn "fms<mode>4"
+  [(set (match_operand:GPF_HF 0 "register_operand" "=w")
+	(fma:GPF_HF (match_operand:GPF_HF 1 "register_operand"  "w")
+		    (match_operand:GPF_HF 2 "register_operand"  "w")
+		    (neg:GPF_HF (match_operand:GPF_HF 3 "register_operand"  "w"))))]
+  "!HONOR_SIGNED_ZEROS (<MODE>mode) && ARC64_HAS_FP_BASE"
+  "f<sfxtab>nmsub\\t%0,%1,%2,%3"
+  [(set_attr "length" "4")
+   (set_attr "type" "fnmsub")])
+
+;; -(op3 - (op1 * op2))
+(define_insn "*nfnms<mode>4"
+  [(set (match_operand:GPF_HF 0 "register_operand" "=w")
+	(neg:GPF_HF (fma:GPF_HF (neg:GPF_HF (match_operand:GPF_HF 1 "register_operand"  "w"))
+			  (match_operand:GPF_HF 2 "register_operand"  "w")
+			  (match_operand:GPF_HF 3 "register_operand"  "w"))))]
+  "ARC64_HAS_FP_BASE"
+  "f<sfxtab>nmsub\\t%0,%1,%2,%3"
+  [(set_attr "length" "4")
+   (set_attr "type" "fnmsub")])
+
+;; F<P>NMADD
+;; Likewise like above
+(define_insn "fnms<mode>4"
+  [(set (match_operand:GPF_HF 0 "register_operand" "=w")
+	(fma:GPF_HF (neg:GPF_HF (match_operand:GPF_HF 1 "register_operand"  "w"))
+		    (match_operand:GPF_HF 2 "register_operand"  "w")
+		    (neg:GPF_HF (match_operand:GPF_HF 3 "register_operand"  "w"))))]
+  "!HONOR_SIGNED_ZEROS (<MODE>mode) && ARC64_HAS_FP_BASE"
+  "f<sfxtab>nmadd\\t%0,%1,%2,%3"
+  [(set_attr "length" "4")
+   (set_attr "type" "fnmadd")])
+
+;; -(op3 + (op1 * op2))
+(define_insn "*nfms<mode>4"
+  [(set (match_operand:GPF_HF 0 "register_operand" "=w")
+	(neg:GPF_HF (fma:GPF_HF (match_operand:GPF_HF 1 "register_operand"  "w")
+				(match_operand:GPF_HF 2 "register_operand"  "w")
+				(match_operand:GPF_HF 3 "register_operand"  "w"))))]
+  "ARC64_HAS_FP_BASE"
+  "f<sfxtab>nmadd\\t%0,%1,%2,%3"
+  [(set_attr "length" "4")
+   (set_attr "type" "fnmadd")])
+
+;; F<P>SQRT
+(define_insn "sqrt<mode>2"
+  [(set (match_operand:GPF_HF 0 "register_operand" "=w")
+	(sqrt:GPF_HF (match_operand:GPF_HF 1 "register_operand" "w")))]
+  "ARC64_HAS_FP_BASE"
+  "f<sfxtab>sqrt\\t%0,%1"
+  [(set_attr "length" "4")
+   (set_attr "type" "fsqrt")])
+
+;; F<P>SGNJ
+(define_insn "copysign<mode>3"
+  [(set (match_operand:GPF_HF 0 "register_operand" "=w")
+	(unspec:GPF_HF [(match_operand:GPF_HF 1 "register_operand" "w")
+			(match_operand:GPF_HF 2 "register_operand" "w")]
+		       ARC64_UNSPEC_COPYSIGN))]
+  "ARC64_HAS_FP_BASE"
+  "f<sfxtab>sgnj\\t%0,%1,%2"
+  [(set_attr "length" "4")
+   (set_attr "type" "fsgnj")])
+
+;; F<P>SGNJX
+(define_insn "xorsign<mode>3"
+  [(set (match_operand:GPF_HF 0 "register_operand" "=w")
+	(unspec:GPF_HF [(match_operand:GPF_HF 1 "register_operand" "w")
+			(match_operand:GPF_HF 2 "register_operand" "w")]
+		       ARC64_UNSPEC_XORSIGN))]
+  "ARC64_HAS_FP_BASE"
+  "f<sfxtab>sgnjx\\t%0,%1,%2"
+  [(set_attr "length" "4")
+   (set_attr "type" "fsgnjx")])
+
+;; F<P>SGNJN
+(define_insn "*ncopysign<mode>3"
+  [(set (match_operand:GPF_HF 0 "register_operand" "=w")
+	(neg:GPF_HF (unspec:GPF_HF
+		     [(match_operand:GPF_HF 1 "register_operand" "w")
+		      (match_operand:GPF_HF 2 "register_operand" "w")]
+		     ARC64_UNSPEC_COPYSIGN)))]
+  "ARC64_HAS_FP_BASE"
+  "f<sfxtab>sgnjn\\t%0,%1,%2"
+  [(set_attr "length" "4")
+   (set_attr "type" "fsgnjn")])
+
+;; -------------------------------------------------------------------
+;; Builtins
+;; -------------------------------------------------------------------
+
+(define_insn "lr"
+  [(set (match_operand:SI  0 "register_operand" "=r,r,r,r")
+	(unspec_volatile:SI
+	 [(match_operand:SI 1 "nonmemory_operand" "U06S0,S12S0,r,i")]
+	 ARC64_VUNSPEC_LR))]
+  ""
+  "lr\\t%0,[%1]"
+  [(set_attr "length" "4,4,4,8")
+   (set_attr "type" "lr")])
+
+(define_insn "sr"
+  [(unspec_volatile
+    [(match_operand:SI 0 "register_operand"  "    r,    r, r, r")
+     (match_operand:SI 1 "nonmemory_operand" "U06S0,S12S0, i, r")]
+    ARC64_VUNSPEC_SR)]
+  ""
+  "sr\\t%0,[%1]"
+  [(set_attr "length" "4,4,4,8")
+   (set_attr "type" "sr")])
+
+(define_insn "lrl"
+  [(set (match_operand:DI  0 "register_operand" "=r,r,r,r")
+	(unspec_volatile:DI
+	 [(match_operand:DI 1 "nonmemory_operand" "U06S0,S12S0,r,i")]
+	 ARC64_VUNSPEC_LRL))]
+  ""
+  "lrl\\t%0,[%1]"
+  [(set_attr "length" "4,4,4,8")
+   (set_attr "type" "lr")])
+
+(define_insn "srl"
+  [(unspec_volatile
+    [(match_operand:DI 0 "register_operand"  "    r,    r, r, r")
+     (match_operand:DI 1 "nonmemory_operand" "U06S0,S12S0, i, r")]
+    ARC64_VUNSPEC_SRL)]
+  ""
+  "srl\\t%0,[%1]"
+  [(set_attr "length" "4,4,4,8")
+   (set_attr "type" "sr")])
+
+(define_insn "flag"
+  [(unspec_volatile [(match_operand:SI 0 "nonmemory_operand" "U06S0,S12S0,r,i")]
+		   ARC64_VUNSPEC_FLAG)]
+  ""
+  "@
+    flag%?\\t%0
+    flag\\t%0
+    flag%?\\t%0
+    flag%?\\t%0"
+  [(set_attr "length" "4,4,4,8")
+   (set_attr "type" "flag")
+   (set_attr "predicable" "yes,no,yes,yes")])
+
+(define_insn "brk"
+  [(unspec_volatile [(const_int 0)] ARC64_VUNSPEC_BRK)]
+  ""
+  "brk"
+  [(set_attr "length" "4")
+  (set_attr "type" "brk")])
+
+(define_insn "nopv"
+  [(unspec_volatile [(const_int 0)] ARC64_VUNSPEC_NOP)]
+  ""
+  "nop_s"
+  [(set_attr "type" "nop")
+   (set_attr "length" "2")])
+
+
+;; For thread pointer builtins
+(define_expand "get_thread_pointer<mode>"
+  [(set (match_operand:P 0 "register_operand") (match_dup 1))]
+ ""
+ "operands[1] = gen_rtx_REG (Pmode, R30_REGNUM);")
+
+(define_expand "set_thread_pointer<mode>"
+  [(set (match_dup 1) (match_operand:P 0 "register_operand"))]
+ ""
+ "operands[1] = gen_rtx_REG (Pmode, R30_REGNUM);")
+
+(define_insn "sync"
+  [(unspec_volatile [(const_int 1)]
+		   ARC64_VUNSPEC_SYNC)]
+  ""
+  "sync"
+  [(set_attr "length" "4")
+  (set_attr "type" "sync")])
+
+(include "arith.md")
+(include "atomic.md")
+(include "arc32.md")
+(include "condexec.md")
+
+;; mode:emacs-lisp
+;; comment-start: ";; "
+;; eval: (set-syntax-table (copy-sequence (syntax-table)))
+;; eval: (modify-syntax-entry ?[ "(]")
+;; eval: (modify-syntax-entry ?] ")[")
+;; eval: (modify-syntax-entry ?{ "(}")
+;; eval: (modify-syntax-entry ?} "){")
+;; eval: (setq indent-tabs-mode t)
+;; End:
diff --git a/gcc/config/arc64/arc64.opt b/gcc/config/arc64/arc64.opt
new file mode 100644
index 0000000000000..6c60a513dcb21
--- /dev/null
+++ b/gcc/config/arc64/arc64.opt
@@ -0,0 +1,132 @@
+; Machine description for ARC64 architecture.
+; Copyright (C) 2021 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it
+; under the terms of the GNU General Public License as published by
+; the Free Software Foundation; either version 3, or (at your option)
+; any later version.
+;
+; GCC is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of
+; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+; General Public License for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+HeaderInclude
+config/arc64/arc64-opts.h
+
+TargetVariable
+bool arc64_target_64bit = ARC64_64BIT_DEFAULT
+
+Enum
+Name(cmodel) Type(enum arc64_code_model)
+The code model option names for -mcmodel:
+
+EnumValue
+Enum(cmodel) String(small) Value(ARC64_CMODEL_SMALL)
+
+EnumValue
+Enum(cmodel) String(medium) Value(ARC64_CMODEL_MEDIUM)
+
+EnumValue
+Enum(cmodel) String(large) Value(ARC64_CMODEL_LARGE)
+
+mcmodel=
+Target RejectNegative Joined Enum(cmodel) Var(arc64_cmodel_var) Init(ARC64_CMODEL_MEDIUM) Save
+Specify the code model.
+
+Enum
+Name(arc64_atomic) Type(int)
+
+EnumValue
+Enum(arc64_atomic) String(0) Value(0)
+
+EnumValue
+Enum(arc64_atomic) String(1) Value(1)
+
+EnumValue
+Enum(arc64_atomic) String(2) Value(2)
+
+EnumValue
+Enum(arc64_atomic) String(3) Value(3)
+
+matomic=
+Target RejectNegative Joined Enum(arc64_atomic) Var(arc64_atomic_option) Init(DEFAULT_ARC64_ATOMIC_OPTION)
+Enable atomic instructions: {0, 1, 2, 3}.
+
+Enum
+Name(fpmodel) Type(int)
+
+EnumValue
+Enum(fpmodel) String(none) Value(0)
+
+EnumValue
+Enum(fpmodel) String(fpus) Value(1)
+
+EnumValue
+Enum(fpmodel) String(fpud) Value(2)
+
+mfpu=
+Target RejectNegative Joined Enum(fpmodel) Var(arc64_fp_model) Init(0)
+Specify hardware FP model used.
+
+mdiv-rem
+Target Var(TARGET_ARC64_DIVREM) Init(TARGET_ARC64_DIVREM_DEFAULT)
+Enable DIV-REM instructions.
+
+mbitscan
+Target Mask(BITSCAN)
+Enable NORM, NORMH, FFS, FLS, NORML, FFSL, and FLSL bitscan instructions.
+
+mcpu=
+Target RejectNegative ToLower Joined Var(arcv3_cpu_string)
+-mcpu=CPU Generate code for specific ARCv3 CPU variant.
+
+munaligned-access
+Target Var(unaligned_access) Init(UNALIGNED_ACCESS_DEFAULT)
+Enable unaligned accesse for packed data.
+
+mvolatile-di
+Target Mask(VOLATILE_DI)
+Enable uncached access for volatile memories.
+
+mcode-density
+Target Mask(CODE_DENSITY)
+Enable code-density instructions.
+
+msimd
+Target Mask(SIMD)
+Enable integer SIMD instructions.
+
+mwide
+Target Mask(WIDE_SIMD)
+Enable wide floating point SIMD support.
+
+mll64
+Target Mask(LL64)
+Enable double load/store instructions for arc64:32.
+
+m128
+Target Mask(WIDE_LDST)
+Enable wide data transfer support.
+
+mfpmov
+Target Mask(FP_MOVE)
+Reduce pressure on GPRs by using FPRs for memory operations like memcpy.
+
+mbrcc
+Target Mask(BRCC)
+Generate BRcc instructions during combiner step.
+
+mbbit
+Target Mask(BBIT)
+Generate BBITx instructions during combiner step.
+
+mexperimental
+Target Mask(EXP)
+Experimental option, to be removed.
diff --git a/gcc/config/arc64/arith.md b/gcc/config/arc64/arith.md
new file mode 100644
index 0000000000000..60ba93f811a6d
--- /dev/null
+++ b/gcc/config/arc64/arith.md
@@ -0,0 +1,2963 @@
+;; SI instructions having short instruction variant
+(define_insn "*<optab><mode>_insn"
+  [(set (                match_operand:GPI 0 "register_operand"   "=q,q,     r,    r,     r,    r,    r,    r,r")
+	(COMMUTATIVE:GPI (match_operand:GPI 1 "nonmemory_operand" "%0,q,     0,    0,     r,U06S0,S12S0,S32S0,r")
+			 (match_operand:GPI 2 "nonmemory_operand" " q,0,rU06S0,S12S0,rU06S0,    r,    0,    r,S32S0")))]
+  "register_operand (operands[1], <MODE>mode)
+   || register_operand (operands[2], <MODE>mode)"
+  "@
+   <mntab><sfxtab>%?\\t%0,%1,%2
+   <mntab><sfxtab>%?\\t%0,%2,%1
+   <mntab><sfxtab>%?\\t%0,%1,%2
+   <mntab><sfxtab>%?\\t%0,%1,%2
+   <mntab><sfxtab>%?\\t%0,%1,%2
+   <mntab><sfxtab>%?\\t%0,%2,%1
+   <mntab><sfxtab>%?\\t%0,%2,%1
+   <mntab><sfxtab>%?\\t%0,%1,%2
+   <mntab><sfxtab>%?\\t%0,%1,%2"
+  [(set_attr "iscompact" "maybe,maybe,no,no,no,no,no,no,no")
+   (set_attr "length"     "*,*,4,4,4,4,4,8,8")
+   (set_attr "type"       "<mntab>")]
+  )
+
+;; The zero extend variant of the above
+(define_insn "*<optab>si3_zextend"
+  [(set (match_operand:DI 0 "register_operand"   "=q,q,     r,    r,     r,    r,    r,    r,r")
+	(zero_extend:DI
+	 (COMMUTATIVE:SI
+	  (match_operand:SI 1 "nonmemory_operand" "%0,q,     0,    0,     r,U06S0,S12S0,S32S0,r")
+	  (match_operand:SI 2 "nonmemory_operand" " q,0,rU06S0,S12S0,rU06S0,    r,    0,    r,S32S0"))))]
+  "register_operand (operands[1], SImode)
+   || register_operand (operands[2], SImode)"
+  "@
+   <mntab>%?\\t%0,%1,%2
+   <mntab>%?\\t%0,%2,%1
+   <mntab>%?\\t%0,%1,%2
+   <mntab>%?\\t%0,%1,%2
+   <mntab>%?\\t%0,%1,%2
+   <mntab>%?\\t%0,%2,%1
+   <mntab>%?\\t%0,%2,%1
+   <mntab>%?\\t%0,%1,%2
+   <mntab>%?\\t%0,%1,%2"
+  [(set_attr "iscompact" "maybe,maybe,no,no,no,no,no,no,no")
+   (set_attr "length"     "*,*,4,4,4,4,4,8,8")
+   (set_attr "type"       "<mntab>")]
+  )
+
+(define_insn "*<optab><mode>_insn"
+  [(set (            match_operand:GPI 0 "register_operand"  "=q,     r,    r,    r,    r,r")
+	(ASHIFT:GPI (match_operand:GPI 1 "nonmemory_operand" " 0,     0,    0,    r,S32S0,r")
+		    (match_operand:GPI 2 "nonmemory_operand" " q,rU06S0,S12S0,rU06S0,   r,S32S0")))]
+  "register_operand (operands[1], <MODE>mode)
+   || register_operand (operands[2], <MODE>mode)"
+  "<mntab><sfxtab>%?\\t%0,%1,%2"
+  [(set_attr "iscompact" "maybe,no,no,no,no,no")
+   (set_attr "length"     "*,4,4,4,8,8")
+   (set_attr "type"       "<mntab>")])
+
+(define_insn "*<optab>zsidi_insn"
+  [(set (match_operand:DI 0 "register_operand"    "=q,     r,    r,    r,    r,r")
+	(zero_extend:DI
+	 (ASHIFT:SI
+	  (match_operand:SI 1 "nonmemory_operand" " 0,     0,    0,    r,S32S0,r")
+	  (match_operand:SI 2 "nonmemory_operand" " q,rU06S0,S12S0,rU06S0,   r,S32S0"))))]
+  "register_operand (operands[1], SImode)
+   || register_operand (operands[2], SImode)"
+  "<mntab>%?\\t%0,%1,%2"
+  [(set_attr "iscompact" "yes,no,no,no,no,no")
+   (set_attr "length"     "*,4,4,4,8,8")
+   (set_attr "type"       "<mntab>")])
+
+(define_insn "*<optab><mode>_cmp0"
+  [(set (reg:CC_ZN CC_REGNUM)
+	(compare:CC_ZN
+	 (ASHIFT:GPI
+	  (match_operand:GPI 1 "register_operand"  "     0,0")
+	  (match_operand:GPI 2 "nonmemory_operand" "rU06S0,S32S0"))
+	 (const_int 0)))
+    (set (match_operand:GPI 0 "register_operand"   "=     r,r")
+	 (ASHIFT:GPI (match_dup 1) (match_dup 2)))]
+  ""
+  "<mntab><sfxtab>.f\\t%0,%1,%2"
+  [(set_attr "iscompact" "no")
+   (set_attr "length"     "4,8")
+   (set_attr "type"       "<mntab>")])
+
+(define_insn "*<optab><mode>_cmp0_noout"
+  [(set (reg:CC_ZN CC_REGNUM)
+	(compare:CC_ZN
+	 (ASHIFT:GPI
+	  (match_operand:GPI 0 "register_operand"  "     r,r")
+	  (match_operand:GPI 1 "nonmemory_operand" "rU06S0,S32S0"))
+	 (const_int 0)))]
+  ""
+  "<mntab><sfxtab>.f\\t0,%0,%1"
+  [(set_attr "iscompact" "no")
+   (set_attr "length"     "4,8")
+   (set_attr "type"       "<mntab>")])
+
+
+(define_insn "*sub<mode>_insn"
+  [(set (           match_operand:GPI 0 "register_operand"  "=q,    q,     r,     r,    r,     r,    r,    r,    r,r")
+	(minus:GPI (match_operand:GPI 1 "nonmemory_operand" " 0,    0,     0,rU06S0,    0,     r,U06S0,S12S0,S32S0,r")
+		   (match_operand:GPI 2 "nonmemory_operand" " q,U05S0,rU06Sx,     0,S12S0,rU06Sx,    r,    0,    r,S32S0")))]
+  "register_operand (operands[1], <MODE>mode)
+   || register_operand (operands[2], <MODE>mode)"
+  "@
+   sub<sfxtab>%?\\t%0,%1,%2
+   sub<sfxtab>%?\\t%0,%1,%2
+   sub%s2<sfxtab>%?\\t%0,%1,%S2
+   rsub<sfxtab>%?\\t%0,%2,%1
+   sub<sfxtab>%?\\t%0,%1,%2
+   sub%s2<sfxtab>%?\\t%0,%1,%S2
+   rsub<sfxtab>%?\\t%0,%2,%1
+   rsub<sfxtab>%?\\t%0,%2,%1
+   sub<sfxtab>%?\\t%0,%1,%2
+   sub<sfxtab>%?\\t%0,%1,%2"
+  [(set_attr "iscompact"  "yes,maybe,no,no,no,no,no,no,no,no")
+   (set_attr "length"     "2,*,4,4,4,4,4,4,8,8")
+   (set_attr "type"       "sub")]
+  )
+
+;; zero extend of the above
+(define_insn "*subsi3r_zextend"
+  [(set (match_operand:DI 0 "register_operand"   "=q,r")
+	(zero_extend:DI
+	 (minus:SI
+	  (match_operand:SI 1 "register_operand" " 0,r")
+	  (match_operand:SI 2 "register_operand" " q,r"))))]
+  ""
+  "sub%?\\t%0,%1,%2"
+  [(set_attr "iscompact"  "yes,no")
+   (set_attr "length"     "2,4")
+   (set_attr "type"       "sub")]
+  )
+
+(define_insn "*add<mode>_insn"
+  [(set (          match_operand:GPI 0 "register_operand"  "=q, q,q,     r,    r,    r,     r,    r,r")
+	(plus:GPI (match_operand:GPI 1 "register_operand"  "%0, 0,q,     0,    0,    0,     r,    r,r")
+		  (match_operand:GPI 2 "nonmemory_operand" " q,qh,q,rU06Sx,N06Sx,S12Sx,rU06Sx,N06Sx,S32S0")))]
+  "register_operand (operands[1], <MODE>mode)
+   || register_operand (operands[2], <MODE>mode)"
+  "@
+   add<sfxtab>%?\\t%0,%1,%2
+   add<sfxtab>%?\\t%0,%1,%2
+   add<sfxtab>%?\\t%0,%1,%2
+   add%s2<sfxtab>%?\\t%0,%1,%S2
+   sub%s2<sfxtab>%?\\t%0,%1,%N2
+   add%s2<sfxtab>%?\\t%0,%1,%S2
+   add%s2<sfxtab>%?\\t%0,%1,%S2
+   sub%s2<sfxtab>%?\\t%0,%1,%N2
+   add<sfxtab>%?\\t%0,%1,%2"
+  [(set_attr "iscompact"  "yes,maybe,maybe,no,no,no,no,no,no")
+   (set_attr "length"     "2,*,*,4,4,4,4,4,8")
+   (set_attr "type"       "add")]
+  )
+
+;; zero extend of the above
+(define_insn "*addsi3_zextend"
+  [(set (match_operand:DI 0 "register_operand"            "=q, q,q,    r,    r,    r,     r,    r,r")
+	(zero_extend:DI
+	 (plus:SI (match_operand:SI 1 "register_operand"  "%0, 0,q,     0,    0,    0,     r,    r,r")
+		  (match_operand:SI 2 "nonmemory_operand" " q,qh,q,rU06Sx,N06Sx,S12Sx,rU06Sx,N06Sx,S32S0"))))]
+  "register_operand (operands[1], SImode)
+   || register_operand (operands[2], SImode)"
+  "@
+   add%?\\t%0,%1,%2
+   add%?\\t%0,%1,%2
+   add%?\\t%0,%1,%2
+   add%s2%?\\t%0,%1,%S2
+   sub%s2%?\\t%0,%1,%N2
+   add%s2%?\\t%0,%1,%S2
+   add%s2%?\\t%0,%1,%S2
+   sub%s2%?\\t%0,%1,%N2
+   add%?\\t%0,%1,%2"
+  [(set_attr "iscompact"  "yes,maybe,maybe,no,no,no,no,no,no")
+   (set_attr "length"     "2,*,*,4,4,4,4,4,8")
+   (set_attr "type"       "add")])
+
+;; This pattern is needed because the GT (pnz) is not reversible and I
+;; cannot convert CCmode to CC_ZNmode.
+(define_insn "*<ADDSUB:optab><GPI:mode>3_f"
+  [(set (reg:CC CC_REGNUM)
+	(compare:CC
+	 (ADDSUB:GPI
+	  (match_operand:GPI 1 "arc64_nonmem_operand" "0,    0,    0,    r,r,S32S0,    r")
+	  (match_operand:GPI 2 "arc64_nonmem_operand" "r,U06S0,S12S0,U06S0,r,    r,S32S0"))
+	 (const_int 0)))
+   (set (match_operand:GPI 0 "register_operand"      "=r,    r,    r,    r,r,    r,    r")
+	(ADDSUB:GPI (match_dup 1) (match_dup 2)))]
+  "register_operand (operands[1], <MODE>mode)
+   || register_operand (operands[2], <MODE>mode)"
+  "<ADDSUB:optab><GPI:sfxtab>.f\\t%0,%1,%2"
+  [(set_attr "predicable" "yes,yes,no,no,no,no,no")
+   (set_attr "length"     "4,4,4,4,4,8,8")
+   (set_attr "type"       "<ADDSUB:optab><GPI:sfxtab>")])
+
+;; Arithmetic patterns used by the combiner.
+(define_insn "*bic<mode>3"
+  [(set (                  match_operand:GPI 0 "register_operand"  "=q,r,r,    r")
+	(and:GPI (not:GPI (match_operand:GPI 1 "register_operand"   "q,r,r,    r"))
+		 (         match_operand:GPI 2 "nonmemory_operand"  "0,0,r,S32S0")))]
+  ""
+  "bic<sfxtab>%?\\t%0,%2,%1"
+  [(set_attr "iscompact" "maybe,no,no,no")
+   (set_attr "predicable" "no,yes,no,no")
+   (set_attr "length"     "*,4,4,8")
+   (set_attr "type"       "bic")])
+
+(define_insn "*bic<mode>3_cmp0"
+  [(set (reg:CC_ZN CC_REGNUM)
+	(compare:CC_ZN
+	 (and:GPI
+	  (not:GPI (match_operand:GPI 1 "register_operand"   "r,r,    r"))
+	  (match_operand:GPI 2 "nonmemory_operand"  "0,r,S32S0"))
+	 (const_int 0)))
+   (set (match_operand:GPI 0 "register_operand"  "=r,r,r")
+	(and:GPI (not:GPI (match_dup 1)) (match_dup 2)))]
+  ""
+  "bic<sfxtab>%?.f\\t%0,%2,%1"
+  [(set_attr "iscompact" "no,no,no")
+   (set_attr "predicable" "yes,no,no")
+   (set_attr "length"     "4,4,8")
+   (set_attr "type"       "bic")])
+
+(define_insn "*bic<mode>3_cmp0_noout"
+  [(set (reg:CC_ZN CC_REGNUM)
+	(compare:CC_ZN
+	 (and:GPI
+	  (not:GPI (match_operand:GPI 0 "register_operand"   "r,r"))
+	  (match_operand:GPI 1 "nonmemory_operand"  "r,S32S0"))
+	 (const_int 0)))]
+  ""
+  "bic<sfxtab>.f\\t0,%1,%0"
+  [(set_attr "iscompact"  "no,no")
+   (set_attr "predicable" "no,no")
+   (set_attr "length"     "4,8")
+   (set_attr "type"       "bic")])
+
+(define_insn "*<bit_optab><mode>3"
+  [(set (match_operand:GPI 0 "register_operand" "=r,r,r")
+	(BIT:GPI
+	 (ashift:GPI
+	  (const_int 1)
+	  (match_operand:GPI 1 "register_operand" "r,r,r"))
+	 (match_operand:GPI 2 "nonmemory_operand" "0,r,S32S0")))]
+  ""
+  "<bit_optab><sfxtab>%?\\t%0,%2,%1"
+  [(set_attr "type" "<bit_optab>")
+   (set_attr "iscompact" "no")
+   (set_attr "length" "4,4,8")
+   (set_attr "predicable" "yes,no,no")])
+
+(define_insn "*bset<mode>3_cmp0"
+  [(set (reg:CC_ZN CC_REGNUM)
+	(compare:CC_ZN
+	 (BIT:GPI
+	  (ashift:GPI
+	   (const_int 1)
+	   (match_operand:GPI 1 "register_operand" "r,r,r"))
+	  (match_operand:GPI 2 "nonmemory_operand" "0,r,S32S0"))
+	 (const_int 0)))
+   (set (match_operand:GPI 0 "register_operand" "=r,r,r")
+	(BIT:GPI
+	 (ashift:GPI
+	  (const_int 1)
+	  (match_dup 1))
+	 (match_dup 2)))]
+  ""
+  "<bit_optab><sfxtab>%?.f\\t%0,%2,%1"
+  [(set_attr "type" "<bit_optab>")
+   (set_attr "iscompact" "no")
+   (set_attr "length" "4,4,8")
+   (set_attr "predicable" "yes,no,no")])
+
+(define_insn "*bset<mode>3_cmp0_noout"
+  [(set (reg:CC_ZN CC_REGNUM)
+	(compare:CC_ZN
+	 (BIT:GPI
+	  (ashift:GPI
+	   (const_int 1)
+	   (match_operand:GPI 0 "register_operand" "r,r"))
+	  (match_operand:GPI 1 "nonmemory_operand" "r,S32S0"))
+	 (const_int 0)))]
+  ""
+  "<bit_optab><sfxtab>.f\\t0,%1,%0"
+  [(set_attr "type" "<bit_optab>")
+   (set_attr "iscompact" "no")
+   (set_attr "length" "4,8")
+   (set_attr "predicable" "no,no")])
+
+(define_insn "<optab><mode>_cmp0"
+  [(set (reg:CC_ZN CC_REGNUM)
+	(compare:CC_ZN
+	 (COMMUTATIVEF:GPI
+	  (match_operand:GPI 1 "nonmemory_operand" "%     0,    0,     r,U06S0,S12S0,S32S0,r")
+	  (match_operand:GPI 2 "nonmemory_operand" " rU06S0,S12S0,rU06S0,    r,    0,    r,S32S0"))
+	 (const_int 0)))
+   (set (match_operand:GPI 0 "register_operand"  "=     r,    r,     r,    r,    r,    r,r")
+	(COMMUTATIVEF:GPI (match_dup 1) (match_dup 2)))]
+  "register_operand (operands[1], <MODE>mode)
+   || register_operand (operands[2], <MODE>mode)"
+  "@
+   <mntab><sfxtab>%?.f\\t%0,%1,%2
+   <mntab><sfxtab>%?.f\\t%0,%1,%2
+   <mntab><sfxtab>%?.f\\t%0,%1,%2
+   <mntab><sfxtab>%?.f\\t%0,%2,%1
+   <mntab><sfxtab>%?.f\\t%0,%2,%1
+   <mntab><sfxtab>%?.f\\t%0,%1,%2
+   <mntab><sfxtab>%?.f\\t%0,%1,%2"
+  [(set_attr "iscompact"  "no,no,no,no,no,no,no")
+   (set_attr "predicable" "yes,no,no,no,no,no,no")
+   (set_attr "length"     "4,4,4,4,4,8,8")
+   (set_attr "type"       "<mntab>")]
+  )
+
+;; It may be worth to have a separate pattern for AND to take
+;; advantage of TST_S instruction.
+(define_insn "*<optab><mode>_cmp0_noout"
+  [(set (reg:CC_ZN CC_REGNUM)
+	(compare:CC_ZN
+	 (COMMUTATIVEF:GPI
+	  (match_operand:GPI 0 "nonmemory_operand" "%     r,U06S0,S32S0,r")
+	  (match_operand:GPI 1 "nonmemory_operand" " rU06S0,    r,    r,S32S0"))
+	 (const_int 0)))]
+  "register_operand (operands[0], <MODE>mode)
+   || register_operand (operands[1], <MODE>mode)"
+  "@
+   <mntab><sfxtab>.f\\t0,%0,%1
+   <mntab><sfxtab>.f\\t0,%1,%0
+   <mntab><sfxtab>.f\\t0,%0,%1
+   <mntab><sfxtab>.f\\t0,%0,%1"
+  [(set_attr "iscompact"  "no")
+   (set_attr "predicable" "no")
+   (set_attr "length"     "4,4,8,8")
+   (set_attr "type"       "<mntab>")]
+  )
+
+(define_insn "*sub<mode>_insn_cmp0"
+  [(set (reg:CC_ZN CC_REGNUM)
+	(compare:CC_ZN
+	 (minus:GPI (match_operand:GPI 1 "nonmemory_operand" "    0,     r,U06S0,S12S0,S32S0,r")
+		    (match_operand:GPI 2 "nonmemory_operand" "S12S0,rU06Sx,    r,    0,    r,S32S0"))
+	 (const_int 0)))
+   (set (           match_operand:GPI 0 "register_operand"  "=    r,     r,    r,    r,    r,r")
+	(minus:GPI (match_dup 1) (match_dup 2)))]
+  "register_operand (operands[1], <MODE>mode)
+   || register_operand (operands[2], <MODE>mode)"
+  "@
+   sub<sfxtab>.f\\t%0,%1,%2
+   sub%s2<sfxtab>.f\\t%0,%1,%S2
+   rsub<sfxtab>.f\\t%0,%2,%1
+   rsub<sfxtab>.f\\t%0,%2,%1
+   sub<sfxtab>.f\\t%0,%1,%2
+   sub<sfxtab>.f\\t%0,%1,%2"
+  [(set_attr "iscompact"  "no")
+   (set_attr "length"     "4,4,4,4,8,8")
+   (set_attr "type"       "sub")]
+  )
+
+(define_insn "*sub<mode>_insn_cmp0_noout"
+  [(set (reg:CC_ZN CC_REGNUM)
+	(compare:CC_ZN
+	 (minus:GPI (match_operand:GPI 0 "nonmemory_operand" "     r,U06S0,S32S0,r")
+		    (match_operand:GPI 1 "nonmemory_operand" "rU06Sx,    r,    r,S32S0"))
+	 (const_int 0)))]
+  "register_operand (operands[0], <MODE>mode)
+   || register_operand (operands[1], <MODE>mode)"
+  "@
+   sub%s1<sfxtab>.f\\t0,%0,%S1
+   rsub<sfxtab>.f\\t0,%1,%0
+   sub<sfxtab>.f\\t0,%0,%1
+   sub<sfxtab>.f\\t0,%0,%1"
+  [(set_attr "iscompact"  "no")
+   (set_attr "length"     "4,4,8,8")
+   (set_attr "type"       "sub")]
+  )
+
+(define_insn "*<ANY_EXTEND:optab><SHORT:mode>si2_cmp0_noout"
+  [(set (reg:CC_ZN CC_REGNUM)
+	(compare:CC_ZN
+	 (ANY_EXTEND:SI (match_operand:SHORT 0 "register_operand" "r"))
+	 (const_int 0)))]
+  ""
+  "<ANY_EXTEND:mntab><SHORT:exttab>.f\\t0,%0"
+  [(set_attr "type" "<ANY_EXTEND:mntab>")
+   (set_attr "length" "4")])
+
+(define_insn "*extend<EXT:mode>di2_cmp0_noout"
+  [(set (reg:CC_ZN CC_REGNUM)
+	(compare:CC_ZN
+	 (sign_extend:DI (match_operand:EXT 0 "register_operand" "r"))
+	 (const_int 0)))]
+  ""
+  "sex<EXT:exttab>l.f\\t0,%0"
+  [(set_attr "type" "sex")
+   (set_attr "length" "4")])
+
+(define_insn "*<ANY_EXTEND:optab><SHORT:mode>si_cmp0"
+  [(set (reg:CC_ZN CC_REGNUM)
+	(compare:CC_ZN
+	 (ANY_EXTEND:SI (match_operand:SHORT 1 "register_operand" "r"))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "register_operand" "=r")
+	(ANY_EXTEND:SI (match_dup 1)))]
+  ""
+  "<ANY_EXTEND:mntab><SHORT:exttab>.f\\t%0,%1"
+  [(set_attr "type" "<ANY_EXTEND:mntab>")
+   (set_attr "length" "4")])
+
+(define_insn "*extend<EXT:mode>di_cmp0"
+  [(set (reg:CC_ZN CC_REGNUM)
+	(compare:CC_ZN
+	 (sign_extend:DI (match_operand:EXT 1 "register_operand" "r"))
+	 (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=r")
+	(sign_extend:DI (match_dup 1)))]
+  ""
+  "sex<EXT:exttab>l.f\\t%0,%1"
+  [(set_attr "type" "sex")
+   (set_attr "length" "4")])
+
+(define_insn "*btst<mode>"
+  [(set (reg:CC_ZN CC_REGNUM)
+	(compare:CC_ZN (zero_extract:GPI
+			(match_operand:GPI 0 "register_operand"   "q,r")
+			(const_int 1)
+			(match_operand 1 "const_int_operand" "U05S0,S12S0"))
+		       (const_int 0)))]
+  ""
+  "btst<sfxtab>%?\\t%0,%1"
+  [(set_attr "type" "btst")
+   (set_attr "length" "*,4")
+   (set_attr "iscompact" "maybe,no")
+   (set_attr "cost" "2,4")])
+
+;; SI/DI DIV/REM instructions.
+(define_expand "<optab><mode>3"
+  [(set (match_operand:GPI 0 "register_operand")
+	(DIVREM:GPI (match_operand:GPI 1 "nonmemory_operand")
+		    (match_operand:GPI 2 "nonmemory_operand")))]
+  "TARGET_ARC64_DIVREM"
+  {
+   if (<MODE>mode == DImode
+       && !register_operand (operands[2], DImode))
+     operands[2] = force_reg (DImode, operands[2]);
+   if (<MODE>mode == DImode
+       && !register_operand (operands[1], DImode))
+     operands[1] = force_reg (DImode, operands[1]);
+  }
+  )
+
+(define_insn "*<optab><mode>3"
+  [(set (match_operand:GPI 0 "register_operand"                 "=r,    r,    r,    r,r,    r,    r")
+	(DIVREM:GPI (match_operand:GPI 1 "arc64_nonmem_operand" " 0,    0,    0,    r,r,S32S0,    r")
+		    (match_operand:GPI 2 "arc64_nonmem_operand" " r,U06S0,S12S0,U06S0,r,    r,S32S0")))]
+  "TARGET_ARC64_DIVREM
+   && (register_operand (operands[1], <MODE>mode)
+       || register_operand (operands[2], <MODE>mode))"
+  "<mntab><sfxtab>%?\\t%0,%1,%2"
+  [(set_attr "length"     "4,4,4,4,4,8,8")
+   (set_attr "type"       "<optab><sfxtab>")])
+
+(define_insn "*<optab><mode>3_cmp0"
+  [(set (reg:CC_ZN CC_REGNUM)
+	(compare:CC_ZN
+	 (DIVREM:GPI
+	  (match_operand:GPI 1 "arc64_nonmem_operand" " 0,r,S32S0,    r")
+	  (match_operand:GPI 2 "arc64_nonmem_operand" " r,r,    r,S32S0"))
+	(const_int 0)))
+   (set (match_operand:GPI 0 "register_operand"       "=r,r,    r,    r")
+	(DIVREM:GPI (match_dup 1)
+		    (match_dup 2)))]
+  "TARGET_ARC64_DIVREM
+   && (register_operand (operands[1], <MODE>mode)
+       || register_operand (operands[2], <MODE>mode))"
+  "<mntab><sfxtab>.f\\t%0,%1,%2"
+  [(set_attr "length"     "4,4,8,8")
+   (set_attr "type"       "<optab><sfxtab>")])
+
+(define_insn "*<optab><mode>3_cmp0_noout"
+  [(set (reg:CC_ZN CC_REGNUM)
+	(compare:CC_ZN
+	 (DIVREM:GPI
+	  (match_operand:GPI 0 "arc64_nonmem_operand" "r,S32S0,    r")
+	  (match_operand:GPI 1 "arc64_nonmem_operand" "r,    r,S32S0"))
+	(const_int 0)))]
+  "TARGET_ARC64_DIVREM
+   && (register_operand (operands[0], <MODE>mode)
+       || register_operand (operands[1], <MODE>mode))"
+  "<mntab><sfxtab>.f\\t0,%0,%1"
+  [(set_attr "length"     "4,8,8")
+   (set_attr "type"       "<optab><sfxtab>")])
+
+;; To be merged into adddi3
+(define_insn "*add_tls_off<mode>"
+  [(set (match_operand:P 0 "register_operand" "=r")
+	(plus:P (match_operand:P 1 "register_operand" "r")
+		(unspec:P [(match_operand 2 "" "")]
+			    ARC64_UNSPEC_TLS_OFF)))]
+  ""
+  "add<sfxtab>\\t%0,%1,%2@tpoff"
+  [(set_attr "type" "add<sfxtab>")
+   (set_attr "length" "8")]
+  )
+
+(define_insn "sub<mode>3_cmp"
+  [(set (reg:CC CC_REGNUM)
+	(compare:CC
+	 (match_operand:GPI 1 "arc64_nonmem_operand" "    0,    r,r,S32S0,    r")
+	 (match_operand:GPI 2 "arc64_nonmem_operand" "S12S0,U06S0,r,    r,S32S0")))
+   (set (match_operand:GPI 0 "register_operand"         "=r,    r,r,    r,    r")
+	(minus:GPI (match_dup 1) (match_dup 2)))]
+  "register_operand (operands[1], <MODE>mode)
+   || register_operand (operands[2], <MODE>mode)"
+  "sub<sfxtab>.f\\t%0,%1,%2"
+  [(set_attr "length"     "4,4,4,8,8")
+   (set_attr "type"       "sub<sfxtab>")]
+  )
+
+(define_insn "add<mode>3_cmp"
+  [(set (match_operand 3 "cc_register" "")
+	(match_operator 4 "cc_compare_operator"
+			[(plus:GPI
+			  (match_operand:GPI 1 "arc64_nonmem_operand" "    0,    r,r,S32S0,    r")
+			  (match_operand:GPI 2 "arc64_nonmem_operand" "S12S0,U06S0,r,    r,S32S0"))
+			 (match_dup 1)]))
+   (set (match_operand:GPI 0 "register_operand"          "=r,    r,r,    r,    r")
+	(plus:GPI (match_dup 1) (match_dup 2)))]
+  "register_operand (operands[1], <MODE>mode)
+   || register_operand (operands[2], <MODE>mode)"
+  "add<sfxtab>.f\\t%0,%1,%2"
+  [(set_attr "length"     "4,4,4,8,8")
+   (set_attr "type"       "add<sfxtab>")])
+
+;; Extending this pattern to handle CCmode, we need to match GEU code
+;; also.
+(define_insn "add<mode>3_carry"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+	(plus:GPI
+	 (plus:GPI (match_operand:GPI 1 "register_operand" "r")
+		   (match_operand:GPI 2 "register_operand" "r"))
+	 (ltu:GPI (reg:CC_C CC_REGNUM) (const_int 0))))]
+  ""
+  "adc<sfxtab>\\t%0,%1,%2"
+  [(set_attr "type" "adc<sfxtab>")
+   (set_attr "length" "4")])
+
+;; Extending this pattern to handle Cmode, we need to match GEU code
+;; also.
+(define_insn "sub<mode>3_carry"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+	(minus:GPI
+	 (minus:GPI (match_operand:GPI 1 "register_operand" "r")
+		    (match_operand:GPI 2 "register_operand" "r"))
+	 (ltu:GPI (reg:CC CC_REGNUM) (const_int 0))))]
+  ""
+  "sbc<sfxtab>\\t%0,%1,%2"
+  [(set_attr "type" "sbc<sfxtab>")
+   (set_attr "length" "4")])
+
+(define_expand "add<mode>3_Ccmp"
+  [(parallel
+    [(set (reg:CC_C CC_REGNUM)
+	  (compare:CC_C
+	   (plus:GPI
+	    (match_operand:GPI 1 "arc64_nonmem_operand")
+	    (match_operand:GPI 2 "arc64_nonmem_operand"))
+	   (match_dup 1)))
+     (set (match_operand:GPI 0 "register_operand")
+	  (plus:GPI (match_dup 1) (match_dup 2)))])]
+  ""
+  )
+
+(define_expand "sub<mode>3_Ccmp"
+  [(parallel
+    [(set (reg:CC CC_REGNUM)
+	  (compare:CC
+	   (match_operand:GPI 1 "arc64_nonmem_operand")
+	   (match_operand:GPI 2 "arc64_nonmem_operand")))
+     (set (match_operand:GPI 0 "register_operand")
+	  (minus:GPI (match_dup 1) (match_dup 2)))])]
+  ""
+  )
+
+(define_expand "<optab><mode>3"
+  [(set (match_operand:DBLI 0 "register_operand")
+	(ADDSUB:DBLI (match_operand:DBLI 1 "register_operand")
+		     (match_operand:DBLI 2 "nonmemory_operand")))]
+  ""
+{
+  rtx low_dest, op1_low, op2_low, high_dest, op1_high, op2_high;
+
+  if (GET_MODE_SIZE (<MODE>mode) == (UNITS_PER_WORD * 2))
+    {
+      high_dest = gen_highpart (<REL>mode, operands[0]);
+      low_dest = gen_lowpart (<REL>mode, operands[0]);
+      op1_high = gen_highpart (<REL>mode, operands[1]);
+      op1_low = gen_lowpart (<REL>mode, operands[1]);
+      op2_high = gen_highpart_mode (<REL>mode, <MODE>mode, operands[2]);
+      op2_low = gen_lowpart (<REL>mode, operands[2]);
+
+      emit_insn (gen_<optab><rel>3_Ccmp (low_dest, op1_low,
+					 force_reg (<REL>mode, op2_low)));
+      emit_insn (gen_<optab><rel>3_carry (high_dest, op1_high,
+					  force_reg (<REL>mode, op2_high)));
+
+      DONE;
+    }
+  else if (!register_operand (operands[2], <MODE>mode)
+	   && !satisfies_constraint_S32S0 (operands[2]))
+    operands[2] = force_reg (<MODE>mode, operands[2]);
+
+})
+
+;; Shifted adds and subs
+(define_insn "*add<mode>_shift"
+  [(set (match_operand:GPI 0 "register_operand" "=q,r,r,r")
+	(plus:GPI
+	 (ashift:GPI (match_operand:GPI 1 "register_operand" "q,r,r,r")
+		     (match_operand:GPI 2 "_1_2_3_operand" ""))
+	 (match_operand:GPI 3 "arc64_regsym_operand"  "0,0,r,S32S0SymMV")))]
+  ""
+  "add%2<sfxtab>%?\\t%0,%3,%1"
+  [(set_attr "type" "add")
+   (set_attr "length" "*,4,4,8")
+   (set_attr "iscompact" "maybe,no,no,no")])
+
+(define_insn "*addzsidi_shift"
+  [(set (match_operand:DI 0 "register_operand" "=q,r,r,r")
+	(zero_extend:DI
+	 (plus:SI
+	  (ashift:SI (match_operand:SI 1 "register_operand" "q,r,r,r")
+		     (match_operand:SI 2 "_1_2_3_operand" ""))
+	 (match_operand:SI 3 "arc64_regsym_operand"  "0,0,r,S32S0SymMV"))))]
+   ""
+   "add%2%?\\t%0,%3,%1"
+   [(set_attr "type" "add")
+    (set_attr "length" "*,4,4,8")
+    (set_attr "iscompact" "yes,no,no,no")])
+
+(define_insn "*addx<mode>_cmp0"
+  [(set (reg:CC_ZN CC_REGNUM)
+	(compare:CC_ZN
+	 (plus:GPI (ashift:GPI (match_operand:GPI 1 "register_operand" "r,r,r")
+			       (match_operand:GPI 2 "_1_2_3_operand" ""))
+		  (match_operand:GPI 3 "arc64_regsym_operand"  "0,r,S32S0SymMV"))
+	 (const_int 0)))
+   (set (match_operand:GPI 0 "register_operand" "=r,r,r")
+	(plus:GPI (ashift:GPI (match_dup 1) (match_dup 2))
+		  (match_dup 3)))]
+  ""
+  "add%2<sfxtab>%?.f\\t%0,%3,%1"
+  [(set_attr "type" "add<sfxtab>")
+   (set_attr "length" "4,4,8")
+   (set_attr "predicable" "yes,no,no")])
+
+(define_insn "*addx<mode>_cmp0_noout"
+  [(set (reg:CC_ZN CC_REGNUM)
+	(compare:CC_ZN
+	 (plus:GPI (ashift:GPI (match_operand:GPI 0 "register_operand" "r,r,r")
+			       (match_operand:GPI 1 "_1_2_3_operand" ""))
+		  (match_operand:GPI 2 "arc64_regsym_operand"  "0,r,S32S0SymMV"))
+	 (const_int 0)))]
+  ""
+  "add%1<sfxtab>%?.f\\t0,%2,%1"
+  [(set_attr "type" "add<sfxtab>")
+   (set_attr "length" "4,4,8")
+   (set_attr "predicable" "yes,no,no")])
+
+(define_insn "*sub<mode>_shift"
+  [(set (match_operand:GPI 0 "register_operand" "=r,r,r")
+	(minus:GPI (match_operand:GPI 1 "arc64_regsym_operand" "0,r,S32S0SymMV")
+		   (ashift:GPI (match_operand:GPI 2 "register_operand" "r,r,r")
+			       (match_operand:GPI 3 "_1_2_3_operand" ""))))]
+  ""
+  "sub%3<sfxtab>\\t%0,%1,%2"
+  [(set_attr "type" "sub<sfxtab>")
+   (set_attr "length" "4,4,8")
+   (set_attr "predicable" "yes,no,no")])
+
+(define_insn "*add<mode>_mult"
+  [(set (match_operand:GPI 0 "register_operand" "=q,r,r")
+	(plus:GPI
+	 (mult:GPI (match_operand:GPI 1 "register_operand" "q,r,r")
+		   (match_operand:GPI 2 "_2_4_8_operand" ""))
+	 (match_operand:GPI 3 "arc64_regsym_operand"  "0,r,S32S0SymMV")))]
+  ""
+  "add%s2<sfxtab>%?\\t%0,%3,%1"
+  [(set_attr "type" "add")
+   (set_attr "length" "*,4,8")
+   (set_attr "iscompact" "maybe,no,no")])
+
+(define_insn "*add<mode>_mult2"
+  [(set (match_operand:GPI 0 "register_operand" "=q,r,r")
+	(plus:GPI
+	 (match_operand:GPI 1 "nonmemory_operand"  "0,r,S32S0")
+	 (mult:GPI (match_operand:GPI 2 "register_operand" "q,r,r")
+		   (match_operand:GPI 3 "_2_4_8_operand" ""))))]
+  ""
+  "add%s3<sfxtab>%?\\t%0,%1,%2"
+  [(set_attr "type" "add")
+   (set_attr "length" "*,4,8")
+   (set_attr "iscompact" "maybe,no,no")])
+
+;; Multiplications
+
+(define_expand "<ANY_EXTEND:su_optab>mulhisi3"
+  [(set (match_operand:SI 0 "register_operand")
+	(mult:SI (ANY_EXTEND:SI (match_operand:HI 1 "register_operand"))
+		 (ANY_EXTEND:SI (match_operand:HI 2 "nonmemory_operand"))))]
+   ""
+   "
+    if (CONSTANT_P (operands[2]))
+    {
+      operands[2] = force_reg (HImode, operands[2]);
+    }
+   "
+  )
+
+(define_insn "*<ANY_EXTEND:su_optab>mulhisi3r"
+  [(set (match_operand:SI 0 "register_operand"                         "=q,r,r")
+	(mult:SI (ANY_EXTEND:SI (match_operand:HI 1 "register_operand" "%0,0,r"))
+		 (ANY_EXTEND:SI (match_operand:HI 2 "register_operand"  "q,r,r"))))]
+  ""
+  "mpy<ANY_EXTEND:su_optab>w%?\\t%0,%1,%2"
+  [(set_attr "length" "*,4,4")
+   (set_attr "iscompact" "maybe,no,no")
+   (set_attr "type" "mpy")
+   (set_attr "predicable" "yes,yes,no")
+   ])
+
+(define_insn "*<ANY_EXTEND:su_optab>mulhisi3rze"
+  [(set (match_operand:DI 0 "register_operand"                         "=q,r,r")
+	(zero_extend:DI
+	 (mult:SI (ANY_EXTEND:SI (match_operand:HI 1 "register_operand" "%0,0,r"))
+		  (ANY_EXTEND:SI (match_operand:HI 2 "register_operand"  "q,r,r")))))]
+  ""
+  "mpy<ANY_EXTEND:su_optab>w%?\\t%0,%1,%2"
+  [(set_attr "length" "*,4,4")
+   (set_attr "iscompact" "maybe,no,no")
+   (set_attr "type" "mpy")])
+
+(define_insn "mulhisi3i"
+  [(set (match_operand:SI 0 "register_operand"            "=r,    r,    r,    r,accrn,r")
+	(mult:SI
+	 (sign_extend:SI
+	  (match_operand:HI 1 "register_operand"          "%0,    r,    0,    0,    r,r"))
+	 (match_operand:HI 2 "short_immediate_operand" "U06S0,U06S0,S12S0,S16S0,S16S0,S16S0")))]
+  ""
+  "@
+   mpyw%?\\t%0,%1,%2
+   mpyw%?\\t%0,%1,%2
+   mpyw%?\\t%0,%1,%2
+   mpyw%?\\t%0,%1,%2
+   dmpyh\\t0,%1,%2
+   mpyw%?\\t%0,%1,%2"
+  [(set_attr "length" "4,4,4,8,8,8")
+   (set_attr "type" "mpy")
+   (set_attr "predicable" "yes,no,no,yes,no,no")])
+
+(define_insn "umulhisi3i"
+  [(set (match_operand:SI 0 "register_operand"            "=r,     r,    r,    r,accrn,r")
+	(mult:SI
+	 (zero_extend:SI
+	  (match_operand:HI 1 "register_operand"           "%0,    r,    0,    0,    r,r"))
+	 (match_operand:HI 2 "unsign_immediate_operand" "U06S0,U06S0,U12S0,U16S0,U16S0,U16S0")))]
+  ""
+  "@
+   mpyuw%?\\t%0,%1,%2
+   mpyuw%?\\t%0,%1,%2
+   mpyuw%?\\t%0,%1,%2
+   mpyuw%?\\t%0,%1,%2
+   dmpyhu\\t0,%1,%2
+   mpyuw%?\\t%0,%1,%2"
+  [(set_attr "length" "4,4,4,8,8,8")
+   (set_attr "type" "mpy")
+   (set_attr "predicable" "yes,no,no,yes,no,no")])
+
+;faulty;(define_insn "<ANY_EXTEND:su_optab>mulhisi3ize"
+;faulty;  [(set (match_operand:DI 0 "register_operand"              "=r,    r,    r,r,r")
+;faulty;	(zero_extend:DI
+;faulty;	 (mult:SI (ANY_EXTEND:SI
+;faulty;		   (match_operand:HI 1 "register_operand"    "%0,    r,    0,0,r"))
+;faulty;		  (match_operand:HI 2 "immediate_operand" "U06S0,U06S0,S12S0,i,i"))))]
+;faulty;  ""
+;faulty;  "mpy<ANY_EXTEND:su_optab>w%?\\t%0,%1,%2"
+;faulty;  [(set_attr "length" "4,4,4,8,8")
+;faulty;   (set_attr "type" "mpy")
+;faulty;   (set_attr "predicable" "yes,no,no,yes,no")])
+
+(define_insn "*mul<mode>3"
+  [(set (match_operand:GPI 0 "register_operand"            "=q,q,     r,     r,    r,    r, accrn,    r")
+	(mult:GPI (match_operand:GPI 1 "register_operand"  "%0,q,     0,     r,    0,    0,     r,    r")
+		  (match_operand:GPI 2 "nonmemory_operand"  "q,0,rU06S0,rU06S0,S12S0,S32S0,S32S0r,S32S0")))]
+ ""
+ "@
+  mpy<sfxtab>%?\\t%0,%1,%2
+  mpy<sfxtab>%?\\t%0,%2,%1
+  mpy<sfxtab>%?\\t%0,%1,%2
+  mpy<sfxtab>%?\\t%0,%1,%2
+  mpy<sfxtab>%?\\t%0,%1,%2
+  mpy<sfxtab>%?\\t%0,%1,%2
+  mpyd%?\\t0,%1,%2
+  mpy<sfxtab>%?\\t%0,%1,%2"
+ [(set_attr "length" "*,*,4,4,4,8,8,8")
+  (set_attr "iscompact" "maybe,maybe,no,no,no,no,no,no")
+  (set_attr "type" "mpy<sfxtab>")
+  (set_attr "predicable" "no,no,yes,no,no,yes,no,no")])
+
+(define_insn "*mulsi3ze"
+  [(set (match_operand:DI 0 "register_operand"   "=q,q,     r,     r,    r,    r,    r")
+	(zero_extend:DI
+	 (mult:SI
+	  (match_operand:SI 1 "register_operand"  "%0,q,     0,     r,    0,    0,    r")
+	  (match_operand:SI 2 "nonmemory_operand"  "q,0,rU06S0,rU06S0,S12S0,S32S0,S32S0"))))]
+ ""
+ "@
+  mpy%?\\t%0,%1,%2
+  mpy%?\\t%0,%2,%1
+  mpy%?\\t%0,%1,%2
+  mpy%?\\t%0,%1,%2
+  mpy%?\\t%0,%1,%2
+  mpy%?\\t%0,%1,%2
+  mpy%?\\t%0,%1,%2"
+ [(set_attr "length" "*,*,4,4,4,8,8")
+  (set_attr "iscompact" "yes,yes,no,no,no,no,no")
+  (set_attr "type" "mpy")
+  (set_attr "predicable" "no,no,yes,no,no,yes,no")])
+
+(define_insn "*mulsi3_cmp0"
+  [(set (reg:CC_Z CC_REGNUM)
+	(compare:CC_Z
+	 (mult:SI
+	  (match_operand:SI 1 "register_operand"      "%r,    0,r")
+	  (match_operand:SI 2 "nonmemory_operand" "rU06S0,S12S0,i"))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "register_operand"        "=r,    r,r")
+	(mult:SI (match_dup 1) (match_dup 2)))]
+ ""
+ "mpy%?.f\\t%0,%1,%2"
+ [(set_attr "length" "4,4,8")
+  (set_attr "type" "mpy")])
+
+(define_insn "*mulsi3_cmp0_noout"
+  [(set (reg:CC_Z CC_REGNUM)
+	(compare:CC_Z
+	 (mult:SI
+	  (match_operand:SI 0 "register_operand"  "%     r,    r,r")
+	  (match_operand:SI 1 "nonmemory_operand"  "rU06S0,S12S0,i"))
+	 (const_int 0)))]
+ ""
+ "mpy%?.f\\t0,%0,%1"
+ [(set_attr "length" "4,4,8")
+  (set_attr "type" "mpy")])
+
+(define_insn "<su>mulsi3_highpart"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(truncate:SI
+	 (lshiftrt:DI
+	  (mult:DI
+	   (ANY_EXTEND:DI (match_operand:SI 1 "register_operand" "%0,r"))
+	   (ANY_EXTEND:DI (match_operand:SI 2 "register_operand"  "r,r")))
+	  (const_int 32))))]
+  ""
+  "mpym<su_optab>%?\\t%0,%1,%2"
+  [(set_attr "length" "4")
+   (set_attr "type" "mpy")
+   (set_attr "predicable" "yes,no")])
+
+(define_insn "<su>muldi3_highpart"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(truncate:DI
+	 (lshiftrt:TI
+	  (mult:TI
+	   (ANY_EXTEND:TI (match_operand:DI 1 "register_operand" "%0,r"))
+	   (ANY_EXTEND:TI (match_operand:DI 2 "register_operand" "r,r")))
+	  (const_int 64))))]
+  "TARGET_64BIT"
+  "mpym<su_optab>l%?\\t%0,%1,%2"
+  [(set_attr "type" "mpyl")
+   (set_attr "length" "4")
+   (set_attr "predicable" "yes,no")])
+
+(define_expand "<su_optab>mulditi3"
+  [(set (match_operand:TI 0 "register_operand")
+	(mult:TI (ANY_EXTEND:TI (match_operand:DI 1 "register_operand"))
+		 (ANY_EXTEND:TI (match_operand:DI 2 "register_operand"))))]
+  "TARGET_64BIT"
+{
+  rtx low = gen_reg_rtx (DImode);
+  emit_insn (gen_muldi3 (low, operands[1], operands[2]));
+
+  rtx high = gen_reg_rtx (DImode);
+  emit_insn (gen_<su>muldi3_highpart (high, operands[1], operands[2]));
+
+  emit_move_insn (gen_lowpart (DImode, operands[0]), low);
+  emit_move_insn (gen_highpart (DImode, operands[0]), high);
+  DONE;
+})
+
+(define_expand "usmulditi3"
+  [(set (match_operand:TI                          0 "register_operand")
+	(mult:TI (zero_extend:TI (match_operand:DI 1 "register_operand"))
+		 (sign_extend:TI (match_operand:DI 2 "register_operand"))))]
+  "TARGET_64BIT"
+{
+  rtx low = gen_reg_rtx (DImode);
+  emit_insn (gen_muldi3 (low, operands[1], operands[2]));
+
+  rtx high = gen_reg_rtx (DImode);
+  emit_insn (gen_usmuldi3_highpart (high, operands[1], operands[2]));
+
+  emit_move_insn (gen_lowpart (DImode, operands[0]), low);
+  emit_move_insn (gen_highpart (DImode, operands[0]), high);
+  DONE;
+})
+
+(define_insn "usmuldi3_highpart"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(truncate:DI
+	 (lshiftrt:TI
+	  (mult:TI (zero_extend:TI
+		    (match_operand:DI 1 "register_operand"  "r"))
+		   (sign_extend:TI
+		    (match_operand:DI 2 "register_operand" " r")))
+	  (const_int 64))))]
+  ""
+  "mpymsul\t%0,%2,%1"
+  [(set_attr "type" "mpyl")
+   (set_attr "length" "4")])
+
+
+;; 32 x 32 -> 64 (signed/unsigned) Triggers FAIL: c-c++-common/torture/builtin-arith-overflow-12.c
+(define_expand "<ANY_EXTEND:su_optab>mulsidi3"
+  [(parallel [(set (match_operand:DI 0 "register_operand")
+		   (mult:DI
+		    (ANY_EXTEND:DI (match_operand:SI 1 "register_operand"))
+		    (ANY_EXTEND:DI (match_operand:SI 2 "nonmemory_operand"))))
+	      (clobber (reg:DI R58_REGNUM))])]
+   "TARGET_SIMD"
+   "
+    if (CONSTANT_P (operands[2]))
+    {
+      operands[2] = force_reg (SImode, operands[2]);
+    }
+   ")
+
+(define_insn "*mpyd<ANY_EXTEND:su_optab>"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(mult:DI
+	 (ANY_EXTEND:DI (match_operand:SI 1 "register_operand" "r"))
+	 (ANY_EXTEND:DI (match_operand:SI 2 "register_operand" "r"))))
+   (clobber (reg:DI R58_REGNUM))]
+  "TARGET_SIMD"
+  "mpyd<ANY_EXTEND:su_optab>\\t%0,%1,%2"
+  [(set_attr "length" "4")
+   (set_attr "type" "mpy")])
+
+(define_insn "*mpyd<ANY_EXTEND:su_optab>0"
+  [(set (reg:DI R58_REGNUM)
+	(mult:DI
+	 (ANY_EXTEND:DI (match_operand:SI 0 "register_operand" "r"))
+	 (ANY_EXTEND:DI (match_operand:SI 1 "register_operand" "r"))))]
+  "TARGET_SIMD"
+  "mpyd<ANY_EXTEND:su_optab>\\t0,%0,%1"
+  [(set_attr "length" "4")
+   (set_attr "type" "mpy")])
+
+(define_insn "*mpyd<ANY_EXTEND:su_optab>i"
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r")
+	(mult:DI
+	 (ANY_EXTEND:DI (match_operand:SI 1 "register_operand" "r,0,r"))
+	 (match_operand:SI 2 "<su_optab>signed32b_operand" "U06S0,S12S0,i")))
+   (clobber (reg:DI R58_REGNUM))]
+  "TARGET_SIMD"
+  "mpyd<ANY_EXTEND:su_optab>\\t%0,%1,%2"
+  [(set_attr "length" "4,4,8")
+   (set_attr "type" "mpy")])
+
+;; 16bit operations using SIMD instructions
+;; This gives worst code, keep it here for any other ideas.
+;; exth -> add -> exth/extb :: add -> exth/extb
+;;(define_insn "<optab>hi3"
+;;  [(set (match_operand:HI 0 "register_operand"  "=r,r")
+;;	(ADDSUB:HI
+;;	 (match_operand:HI 1 "register_operand"  "r,r")
+;;	 (match_operand:HI 2 "nonmemory_operand" "r,i")))]
+;;  "TARGET_SIMD"
+;;  "@
+;;   v<optab>2h\\t%0,%1,%2
+;;   v<optab>2h\\t%0,%1,%2@u32"
+;;   [(set_attr "length" "4,8")
+;;   (set_attr "type" "v<optab>")])
+
+
+;; MADD patterns
+;; 32 + (signe) 16 x (signe) 16 -> 32
+(define_expand "<ANY_EXTEND:su_optab>maddhisi4"
+  [(set (match_operand: SI 0 "register_operand")
+	(plus:SI
+	 (mult:SI
+	  (ANY_EXTEND:SI (match_operand:HI 1 "register_operand"))
+	  (ANY_EXTEND:SI (match_operand:HI 2 "register_operand")))
+	 (match_operand:SI 3 "register_operand")))]
+  "TARGET_SIMD && TARGET_64BIT"
+  {
+   rtx acc = gen_rtx_REG (SImode, R58_REGNUM);
+
+   emit_move_insn (acc, operands[3]);
+   emit_insn (gen_<ANY_EXTEND:su_optab>machi (operands[0], operands[1],
+                                             operands[2], acc));
+   DONE;
+   })
+
+(define_insn "<ANY_EXTEND:su_optab>machi0"
+ [(set (reg:SI R58_REGNUM)
+       (plus:SI
+	(mult:SI
+	 (ANY_EXTEND:SI (match_operand:HI 0 "register_operand" "%r,r"))
+	 (ANY_EXTEND:SI (match_operand:HI 1 "nonmemory_operand" "rU06S0,i")))
+	(reg:SI R58_REGNUM)))]
+ "TARGET_SIMD && TARGET_64BIT"
+ "vmac2h<ANY_EXTEND:su_optab>\\t0,%0,%1"
+ [(set_attr "length" "4,8")
+  (set_attr "type" "mac")])
+
+;; The second move instruction can be remove, however, we need to add
+;; a step that recognizes implicit accumulator reads and writes.
+(define_insn_and_split "<ANY_EXTEND:su_optab>machi"
+ [(set (match_operand:SI 0 "register_operand" "=r,r,r,r")
+       (plus:SI
+	(mult:SI
+	 (ANY_EXTEND:SI (match_operand:HI 1 "register_operand"      "%r,    0,r,r"))
+	 (ANY_EXTEND:SI (match_operand:HI 2 "nonmemory_operand" "rU06S0,S12S0,i,*ri")))
+	(match_operand:SI 3 "register_operand" "accum,accum,accum,*r")))
+  (clobber (reg:SI R58_REGNUM))]
+ "TARGET_SIMD && TARGET_64BIT"
+ "@
+  vmac2h<ANY_EXTEND:su_optab>\\t%0,%1,%2
+  vmac2h<ANY_EXTEND:su_optab>\\t%0,%1,%2
+  vmac2h<ANY_EXTEND:su_optab>\\t%0,%1,%2
+  #"
+ "&& reload_completed && (REGNO (operands[3]) != R58_REGNUM)"
+ [(set (reg:SI R58_REGNUM) (match_dup 3))
+  (set (reg:SI R58_REGNUM)
+       (plus:SI (mult:SI (ANY_EXTEND:SI (match_dup 1))
+			 (ANY_EXTEND:SI (match_dup 2)))
+		(reg:SI R58_REGNUM)))
+  (set (match_dup 0) (reg:SI R58_REGNUM))]
+  ""
+  [(set_attr "length" "4,4,8,8")
+   (set_attr "type" "mac")])
+
+;; 64 + (signe) 32 x (signe) 32 -> 64
+(define_expand "<ANY_EXTEND:su_optab>maddsidi4"
+  [(set (match_operand: DI 0 "register_operand")
+	(plus:DI
+	 (mult:DI
+	  (ANY_EXTEND:DI (match_operand:SI 1 "register_operand"))
+	  (ANY_EXTEND:DI (match_operand:SI 2 "register_operand")))
+	 (match_operand:DI 3 "register_operand")))]
+  "TARGET_SIMD"
+  {
+   rtx acc = gen_rtx_REG (DImode, R58_REGNUM);
+
+   emit_move_insn (acc, operands[3]);
+   emit_insn (gen_<ANY_EXTEND:su_optab>macd (operands[0], operands[1],
+                                            operands[2], acc));
+   DONE;
+   })
+
+(define_insn "<ANY_EXTEND:su_optab>macd0"
+ [(set (reg:DI R58_REGNUM)
+       (plus:DI
+	(mult:DI
+	 (ANY_EXTEND:DI (match_operand:SI 0 "register_operand" "%r,r"))
+	 (ANY_EXTEND:DI (match_operand:SI 1 "nonmemory_operand" "rU06S0,i")))
+	(reg:DI R58_REGNUM)))]
+ "TARGET_SIMD"
+ "macd<ANY_EXTEND:su_optab>\\t0,%0,%1"
+ [(set_attr "length" "4,8")
+  (set_attr "type" "mac")])
+
+(define_insn_and_split "<ANY_EXTEND:su_optab>macd"
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r,r")
+	(plus:DI
+	 (mult:DI
+	  (ANY_EXTEND:DI (match_operand:SI 1 "register_operand" "%r,0,r,r"))
+	  (ANY_EXTEND:DI (match_operand:SI 2 "nonmemory_operand" "rU06S0,S12S0,i,*ri")))
+	 (match_operand:DI 3 "register_operand" "accum,accum,accum,*r")))
+   (clobber (reg:DI R58_REGNUM))]
+  "TARGET_SIMD"
+  "@
+   macd<ANY_EXTEND:su_optab>\\t%0,%1,%2
+   macd<ANY_EXTEND:su_optab>\\t%0,%1,%2
+   macd<ANY_EXTEND:su_optab>\\t%0,%1,%2
+   #"
+  "&& reload_completed && (REGNO (operands[3]) != R58_REGNUM)"
+  [(set (reg:DI R58_REGNUM) (match_dup 3))
+   (parallel
+    [(set (match_dup 0)
+	  (plus:DI (mult:DI (ANY_EXTEND:DI (match_dup 1))
+			    (ANY_EXTEND:DI (match_dup 2)))
+		   (reg:DI R58_REGNUM)))
+     (clobber (reg:DI R58_REGNUM))])]
+  ""
+  [(set_attr "length" "4,4,8,8")
+   (set_attr "type" "mac")])
+
+;; This is a combiner pattern: we need to split it in 3 instructions.
+;; The second move is propagated to fallowing instructions by
+;; cprop_hardreg.  Unfortunately, I cannot use a second peephole
+;; pattern for merging the left overs from cprop_hardreg back to mac
+;; instruction as there is no peephole step following it, thus, we
+;; make use of ARC's specific machine reorder step to merge back into
+;; MAC instruction the MOV instructions which were not propagated by
+;; cprop_hardreg step.
+
+(define_insn_and_split "macsi"
+ [(set (match_operand:SI 0 "register_operand" "=r,r,r")
+       (plus:SI (mult:SI (match_operand:SI 1 "register_operand" "%r,r,r")
+			 (match_operand:SI 2 "nonmemory_operand" "rU06S0,i,*ri"))
+		(match_operand:SI 3 "nonmemory_operand" "accum,accum,*ri")))
+  (clobber (reg:SI R58_REGNUM))]
+ "TARGET_SIMD"
+ "@
+  mac\\t%0,%1,%2
+  mac\\t%0,%1,%2
+  #"
+ "&& reload_completed && (REGNO (operands[3]) != R58_REGNUM)"
+ [(set (reg:SI R58_REGNUM) (match_dup 3))
+  (set (reg:SI R58_REGNUM)
+   (plus:SI (mult:SI (match_dup 1) (match_dup 2)) (reg:SI R58_REGNUM)))
+  (set (match_dup 0) (reg:SI R58_REGNUM))]
+ ""
+ [(set_attr "length" "4,8,8")
+  (set_attr "type" "mac")])
+
+(define_insn "macsi0"
+ [(set (reg:SI R58_REGNUM)
+       (plus:SI (mult:SI (match_operand:SI 0 "register_operand" "%r,r")
+			 (match_operand:SI 1 "nonmemory_operand" "rU06S0,i"))
+		(reg:SI R58_REGNUM)))]
+ "TARGET_SIMD"
+ "mac\\t0,%0,%1"
+ [(set_attr "length" "4,8")
+  (set_attr "type" "mac")])
+
+;; Try to propagate first move into adjacent previous instructions
+;; N.B. Probably we need to make a more complex step to take care of
+;; this operation when we schedule
+(define_peephole2
+  [(set (match_operand:HI_SI 0 "register_operand" "")
+	(ARITH:HI_SI (match_operand:HI_SI 1 "register_operand" "")
+		     (match_operand:HI_SI 2 "nonmemory_operand" "")))
+   (set (reg:HI_SI R58_REGNUM) (match_dup 0))]
+  "peep2_reg_dead_p (2, operands[0])"
+  [(set (reg:HI_SI R58_REGNUM) (ARITH:HI_SI (match_dup 1) (match_dup 2)))])
+
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+	(mult:SI (ANY_EXTEND:SI (match_operand:HI 1 "register_operand" ""))
+		 (ANY_EXTEND:SI (match_operand:HI 2 "register_operand" ""))))
+   (set (reg:SI R58_REGNUM) (match_dup 0))]
+  "peep2_reg_dead_p (2, operands[0])"
+  [(set (reg:SI R58_REGNUM)
+	(mult:SI (ANY_EXTEND:SI (match_dup 1)) (ANY_EXTEND:SI (match_dup 2))))])
+
+(define_peephole2
+  [(parallel
+    [(set (match_operand:DI 0 "register_operand" "")
+	  (mult:DI (ANY_EXTEND:DI (match_operand:SI 1 "register_operand" ""))
+		   (ANY_EXTEND:DI (match_operand:SI 2 "register_operand" ""))))
+     (clobber (reg:DI R58_REGNUM))])
+   (set (reg:DI R58_REGNUM) (match_dup 0))]
+  "peep2_reg_dead_p (2, operands[0])"
+  [(set (reg:DI R58_REGNUM)
+	(mult:DI (ANY_EXTEND:DI (match_dup 1)) (ANY_EXTEND:DI (match_dup 2))))])
+
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+	(mult:SI (ANY_EXTEND:SI (match_operand:HI 1 "register_operand" ""))
+		  (match_operand 2 "immediate_operand" "")))
+   (set (reg:SI R58_REGNUM) (match_dup 0))]
+  "peep2_reg_dead_p (2, operands[0])"
+  [(set (reg:SI R58_REGNUM)
+	(mult:SI (ANY_EXTEND:SI (match_dup 1)) (match_dup 2)))])
+
+;; Propagate r58 to arithmetic operation when dealing with zero extension
+(define_peephole2
+  [(set (match_operand:HI 0 "register_operand")
+	(ADDSUB:HI (match_operand:HI 1 "register_operand")
+		   (match_operand:HI 2 "nonmemory_operand")))
+   (set (reg:SI R58_REGNUM) (match_operand:SI 3 "register_operand"))]
+  "peep2_reg_dead_p (2, operands[0])
+   && (REGNO (operands[3]) == REGNO (operands[0]))"
+  [(set (reg:HI R58_REGNUM) (ADDSUB:HI (match_dup 1) (match_dup 2)))])
+
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+	(mult:SI (ANY_EXTEND:SI (match_operand:HI 1 "register_operand" ""))
+		  (match_operand 2 "immediate_operand" "")))
+   (set (reg:HI R58_REGNUM) (match_operand:HI 3 "register_operand"))]
+  "peep2_reg_dead_p (2, operands[0])
+   && (REGNO (operands[3]) == REGNO (operands[0]))"
+  [(set (reg:SI R58_REGNUM)
+	(mult:SI (ANY_EXTEND:SI (match_dup 1)) (match_dup 2)))])
+
+;; Another combiner pattern (observed in rgbyiq01)
+(define_insn_and_split "dmpywhu"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI
+	 (mult:SI (match_operand:SI 1 "register_operand" "r")
+		  (match_operand 2 "unsign_immediate_operand" "i"))
+	 (mult:SI (match_operand:SI 3 "register_operand" "r")
+		  (match_operand 4 "unsign_immediate_operand" "i"))))
+   (clobber (reg:DI R58_REGNUM))]
+  "TARGET_SIMD && TARGET_64BIT"
+  "#"
+  "&& reload_completed"
+  [(parallel [(set (match_dup 0)
+		   (unspec:SI [(match_dup 5) (match_dup 2)]
+			      ARC64_UNSPEC_DMPYWHU))
+	      (clobber (reg:DI R58_REGNUM))])]
+  {
+   operands[5] = gen_lowpart (DImode, operands[0]);
+   emit_insn (gen_pack2silo (operands[5], operands[3], operands[1]));
+   operands[2] = GEN_INT ((INTVAL (operands[2]) << 16) + INTVAL (operands[4]));
+  }
+  [(set_attr "length" "8")
+   (set_attr "type" "dmpywh")])
+
+(define_insn "dmpywhu0"
+  [(set (match_operand:SI 0 "register_operand"        "=accum,r")
+	(unspec:SI [(match_operand:DI 1 "register_operand" "r,r")
+		    (match_operand 2 "immediate_operand"   "i,i")]
+		   ARC64_UNSPEC_DMPYWHU))
+   (clobber (reg:DI R58_REGNUM))]
+  "TARGET_SIMD && TARGET_64BIT"
+  "@
+   dmpywhu\\t0,%1,%2@u32
+   dmpywhu\\t%0,%1,%2@u32"
+  [(set_attr "length" "8")
+   (set_attr "type" "dmpywh")])
+
+(define_insn_and_split "dmpywh"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI
+	 (mult:SI (match_operand:SI 1 "register_operand" "r")
+		  (match_operand 2 "short_immediate_operand" "i"))
+	 (mult:SI (match_operand:SI 3"register_operand" "r")
+		  (match_operand 4 "short_immediate_operand" "i"))))
+   (clobber (reg:DI R58_REGNUM))]
+  "TARGET_SIMD && TARGET_64BIT"
+  "#"
+  "&& reload_completed"
+  [(parallel [(set (match_dup 0)
+		   (unspec:SI [(match_dup 5) (match_dup 2)]
+			      ARC64_UNSPEC_DMPYWH))
+	      (clobber (reg:SI R58_REGNUM))])]
+  {
+   operands[5] = gen_lowpart (DImode, operands[0]);
+   emit_insn (gen_pack2silo (operands[5], operands[3], operands[1]));
+   operands[2] = GEN_INT ((INTVAL (operands[2]) << 16) +
+			  (INTVAL (operands[4]) & 0xffff));
+  }
+  [(set_attr "length" "8")
+   (set_attr "type" "dmpywh")])
+
+(define_insn "dmpywh0"
+  [(set (match_operand:SI 2 "register_operand"        "=accum,r")
+	(unspec:SI [(match_operand:DI 0 "register_operand" "r,r")
+		    (match_operand 1 "immediate_operand"   "i,i")]
+		   ARC64_UNSPEC_DMPYWH))
+   (clobber (reg:SI R58_REGNUM))]
+  "TARGET_SIMD && TARGET_64BIT"
+  "@
+   dmpywh\\t0,%0,%1@u32
+   dmpywh\\t%2,%0,%1@u32"
+  [(set_attr "length" "8")
+   (set_attr "type" "dmpywh")])
+
+(define_insn "*mpywhu"
+  [(set (match_operand:SI 0 "register_operand"                 "=r,r")
+	(mult:SI
+	 (zero_extend:SI (match_operand:HI 1 "register_operand" "r,r"))
+	 (match_operand:SI 2 "arc64_reg_or_unsig_operand"       "r,i")))
+   (clobber (reg:DI R58_REGNUM))]
+  "TARGET_SIMD && TARGET_64BIT"
+  "dmpywhu\\t%0,%2,%1"
+  [(set_attr "length" "4,8")
+   (set_attr "type" "dmpywh")
+   ])
+
+(define_insn "*mpywh"
+  [(set (match_operand:SI 0 "register_operand"                 "=r,r")
+	(mult:SI
+	 (sign_extend:SI (match_operand:HI 1 "register_operand" "r,r"))
+	 (match_operand:SI 2 "arc64_nonmem_operand"             "r,i")))
+   (clobber (reg:DI R58_REGNUM))]
+  "TARGET_SIMD && TARGET_64BIT"
+  "dmpywh\\t%0,%2,%1"
+  [(set_attr "length" "4,8")
+   (set_attr "type" "dmpywh")])
+
+;; dmach combine pattern used to implement 16b MAC patterns.  Extra
+;; care needs to be taken when dealing with immediates which needs to
+;; set the higher 16b to zero.  I.e. we cannot use safely U6 or S12
+;; instruction variants.
+(define_insn_and_split "dmach"
+  [(set (match_operand:HI 0 "register_operand" "=r,r,r")
+	(plus:HI
+	 (mult:HI (match_operand:HI 1 "register_operand" "%r,r,r")
+		  (match_operand:HI 2 "nonmemory_operand" "r,i,*ri"))
+	 (match_operand:HI 3 "nonmemory_operand" "accum,accum,*ri")))
+   (clobber (reg:DI R58_REGNUM))]
+  "TARGET_SIMD"
+  "@
+   dmach\\t%0,%1,%2
+   dmach\\t%0,%1,%V2@u32
+   #"
+  "&& reload_completed
+   && (CONST_INT_P (operands[3]) || (REGNO (operands[3]) != R58_REGNUM))"
+  [(set (reg:HI R58_REGNUM) (match_dup 3))
+   (set (reg:HI R58_REGNUM)
+	(plus:HI (mult:HI (match_dup 1) (match_dup 2)) (reg:HI R58_REGNUM)))
+   (set (match_dup 0) (reg:HI R58_REGNUM))]
+  ""
+ [(set_attr "length" "4,8,8")
+  (set_attr "type" "mac")])
+
+(define_insn "dmach0"
+ [(set (reg:HI R58_REGNUM)
+       (plus:HI (mult:HI (match_operand:HI 0 "register_operand" "%r,r")
+			 (match_operand:HI 1 "nonmemory_operand" "r,i"))
+		(reg:HI R58_REGNUM)))]
+ "TARGET_SIMD"
+ "@
+  dmach\\t0,%0,%1
+  dmach\\t0,%0,%V1@u32"
+ [(set_attr "length" "4,8")
+  (set_attr "type" "mac")])
+
+;; macwh combine pattern
+;; FIXME! maybe we shoudl use r58 as intermediate result holder to
+;; enable linking (back-to-back) with other MAC instructions, but I
+;; haven't seen any example.
+(define_insn_and_split "dmacwh"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI
+	 (plus:SI
+	  (mult:SI (match_operand:SI 1 "register_operand"     "r")
+		   (match_operand 2 "short_immediate_operand" "i"))
+	  (mult:SI (match_operand:SI 3 "register_operand"     "r")
+		   (match_operand 4 "short_immediate_operand" "i")))
+	 (match_operand:SI 5 "nonmemory_operand"  "ri")))
+   (clobber (reg:DI R58_REGNUM))]
+  "TARGET_SIMD && TARGET_64BIT"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0)
+	(unspec:SI [(match_dup 6) (match_dup 2) (reg:SI R58_REGNUM)]
+		    ARC64_UNSPEC_DMACWH))]
+  {
+   emit_move_insn (gen_rtx_REG (SImode, R58_REGNUM), operands[5]);
+   operands[6] = gen_lowpart (DImode, operands[0]);
+   emit_insn (gen_pack2silo (operands[6], operands[3], operands[1]));
+   operands[2] = GEN_INT ((INTVAL (operands[2]) << 16)
+			  + (INTVAL (operands[4]) & 0xffff));
+  }
+  [(set_attr "length" "8")
+   (set_attr "type" "mac")])
+
+(define_insn "pack2silo"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand:SI 1 "register_operand" "r")
+		    (match_operand:SI 2 "register_operand" "r")]
+		   ARC64_UNSPEC_VPACK2WL))]
+  "TARGET_SIMD && TARGET_64BIT"
+  "vpack2wl\\t%0,%1,%2"
+  [(set_attr "length" "4")
+   (set_attr "type" "vpack")])
+
+(define_insn "dmacwh0"
+  [(set (match_operand:SI 0 "register_operand" "=accum,r")
+	(unspec:SI [(match_operand:DI 1 "register_operand"  "r,r")
+		    (match_operand 2 "immediate_operand" "i,i")
+		    (reg:SI R58_REGNUM)]
+		   ARC64_UNSPEC_DMACWH))]
+  "TARGET_SIMD"
+  "@
+   dmacwh\\t0,%1,%2@u32
+   dmacwh\\t%0,%1,%2@u32"
+  [(set_attr "length" "8")
+   (set_attr "type" "mac")])
+
+;; FIXME! maybe we shoudl use r58 as intermediate result holder to
+;; enable linking (back-to-back) with other MAC instructions, but I
+;; haven't seen any example.
+(define_insn_and_split "dmacwhu"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI
+	 (plus:SI
+	  (mult:SI (match_operand:SI 1 "register_operand"      "r")
+		   (match_operand 2 "unsign_immediate_operand" "i"))
+	  (mult:SI (match_operand:SI 3 "register_operand"      "r")
+		   (match_operand 4 "unsign_immediate_operand" "i")))
+	 (match_operand:SI 5 "nonmemory_operand"  "ri")))
+   (clobber (reg:DI R58_REGNUM))]
+  "TARGET_SIMD && TARGET_64BIT"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0)
+	(unspec:SI [(match_dup 6) (match_dup 2) (reg:SI R58_REGNUM)]
+		    ARC64_UNSPEC_DMACWHU))]
+  {
+   emit_move_insn (gen_rtx_REG (SImode, R58_REGNUM), operands[5]);
+   operands[6] = gen_lowpart (DImode, operands[0]);
+   emit_insn (gen_pack2silo (operands[6], operands[3], operands[1]));
+   operands[2] = GEN_INT ((INTVAL (operands[2]) << 16) + INTVAL (operands[4]));
+  }
+  [(set_attr "length" "8")
+   (set_attr "type" "mac")])
+
+(define_insn "dmacwhu0"
+  [(set (match_operand:SI 0 "register_operand" "=accum,r")
+	(unspec:SI [(match_operand:DI 1 "register_operand"  "r,r")
+		    (match_operand 2 "immediate_operand" "i,i")
+		    (reg:SI R58_REGNUM)]
+		   ARC64_UNSPEC_DMACWHU))]
+  "TARGET_SIMD"
+  "@
+   dmacwhu\\t0,%1,%2@u32
+   dmacwhu\\t%0,%1,%2@u32"
+  [(set_attr "length" "8")
+   (set_attr "type" "mac")])
+
+(define_insn "*vpack2hl_scalar"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ior:SI
+	 (and:SI (match_operand:SI 1 "register_operand" "r")
+		 (const_int 65535))
+	 (ashift:SI (match_operand:SI 2 "register_operand" "r")
+		    (const_int 16))))]
+  "TARGET_SIMD"
+  "vpack2hl\\t%0,%2,%1"
+  [(set_attr "type" "vpack")
+   (set_attr "length" "4")])
+
+(define_insn "*vpack2wl_scalar"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ior:DI
+	 (ashift:DI (match_operand:DI 1 "register_operand" "r")
+		    (const_int 32))
+	 (zero_extend:DI (match_operand:SI 2  "register_operand" "r"))))]
+  "TARGET_SIMD && TARGET_64BIT"
+  "vpack2wl\\t%0,%2,%1"
+  [(set_attr "type" "vpack")
+   (set_attr "length" "4")])
+
+;; -------------------------------------------------------------------
+;; Integer SIMD instructions
+;; -------------------------------------------------------------------
+
+(define_expand "mov<mode>"
+  [(set (match_operand:VALL 0 "nonimmediate_operand")
+	(match_operand:VALL 1 "general_operand"))]
+  "TARGET_SIMD"
+  "
+   if (arc64_prepare_move_operands (operands[0], operands[1], <MODE>mode))
+    DONE;
+  ")
+
+(define_expand "movmisalign<mode>"
+  [(set (match_operand:VALL 0 "nonimmediate_operand")
+	(match_operand:VALL 1 "general_operand"))]
+  "TARGET_SIMD && !STRICT_ALIGNMENT"
+  "
+   if (arc64_prepare_move_operands (operands[0], operands[1], <MODE>mode))
+    DONE;
+  ")
+
+(define_insn "*mov<mode>_insn"
+  [(set (match_operand:VALL 0 "arc64_dest_operand"  "=r,r,Ustor")
+	(match_operand:VALL 1 "nonimmediate_operand" "r,m,r"))]
+  "TARGET_SIMD && TARGET_64BIT
+   && (register_operand (operands[0], <MODE>mode)
+       || register_operand (operands[1], <MODE>mode))"
+  "@
+   mov<mcctab>\\t%0,%1
+   ld<mcctab>%U1\\t%0,%1
+   st<mcctab>%U0\\t%1,%0"
+  [(set_attr "type" "move,ld,st")])
+
+(define_insn "arc64_vpack_v4hihi"
+  [(set (match_operand:V4HI 0 "register_operand" "=r")
+	(unspec:V4HI [(match_operand:HI 1 "register_operand" "r")
+		      (match_operand:HI 2 "register_operand" "r")]
+		     ARC64_UNSPEC_VPACK4HL))]
+  "TARGET_SIMD && TARGET_64BIT"
+  "vpack4hl\\t%0,%1,%2"
+  [(set_attr "length" "4")
+   (set_attr "type" "vpack")])
+
+(define_insn "arc64_vpack_v2sisi"
+  [(set (match_operand:V2SI 0 "register_operand"    "=r,    r,r,r")
+	(vec_concat:V2SI
+         (match_operand:SI 1 "register_operand"  "    r,    0,r,r")
+	 (match_operand:SI 2 "nonmemory_operand" "U06S0,S12S0,r,S32S0")))]
+  "TARGET_SIMD && TARGET_64BIT"
+  "vpack2wl\\t%0,%1,%2"
+  [(set_attr "length" "4,4,4,8")
+   (set_attr "type" "vpack")])
+
+(define_expand "vec_init<mode><vel>"
+  [(match_operand:V64I 0 "register_operand")
+   (match_operand 1 "")]
+  "TARGET_SIMD && TARGET_64BIT"
+  {
+   arc64_expand_vector_init (operands[0], operands[1]);
+   DONE;
+  })
+
+(define_insn "<optab><mode>3"
+  [(set (match_operand:VALL 0 "register_operand"           "=r")
+	(ADDSUB:VALL (match_operand:VALL 1 "register_operand" "r")
+		     (match_operand:VALL 2 "register_operand" "r")))]
+  "TARGET_SIMD"
+  "v<mntab><sfxtab>\\t%0,%1,%2"
+  [(set_attr "length" "4")
+   (set_attr "type" "v<mntab>")])
+
+;; Add with duplicate input.
+(define_insn "*add<mode>3_dup"
+  [(set (match_operand:VALL 0 "register_operand" "=r,r")
+	(plus:VALL
+	 (vec_duplicate:VALL
+	  (match_operand 1 "vectdup_immediate_operand" "S06S0,S12S0"))
+	 (match_operand:VALL 2 "register_operand" "r,0")))]
+  "TARGET_SIMD"
+  "vadd<sfxtab>\\t%0,%2,%1"
+  [(set_attr "length" "4")
+   (set_attr "type" "vadd")])
+
+(define_insn "neg<mode>2"
+  [(set (match_operand:VALL 0 "register_operand" "=r")
+	(neg:VALL (match_operand:VALL 1 "register_operand" "r")))]
+  "TARGET_SIMD"
+  "vsub<sfxtab>\\t%0,0,%1"
+  [(set_attr "length" "8")
+   (set_attr "type" "vsub")])
+
+(define_expand "vec_widen_<su>mult_lo_v4hi"
+ [(match_operand:V2SI 0 "register_operand")
+  (ANY_EXTEND:V2SI (match_operand:V4HI 1 "register_operand"))
+  (ANY_EXTEND:V2SI (match_operand:V4HI 2 "register_operand"))]
+  "TARGET_SIMD"
+  {
+    emit_insn (gen_arc64_<su>vmpy2h (operands[0],
+				     operands[1],
+				     operands[2]));
+    DONE;
+  })
+
+(define_expand "vec_widen_<su>mult_hi_v4hi"
+ [(match_operand:V2SI 0 "register_operand")
+  (ANY_EXTEND:V2SI (match_operand:V4HI 1 "register_operand"))
+  (ANY_EXTEND:V2SI (match_operand:V4HI 2 "register_operand"))]
+  "TARGET_SIMD"
+  {
+    rtx tmp1;
+    rtx tmp2;
+    if (TARGET_64BIT)
+      {
+	tmp1 = gen_reg_rtx (V4HImode);
+	tmp2 = gen_reg_rtx (V4HImode);
+	emit_insn (gen_arc64_swapl (tmp1, operands[1]));
+	emit_insn (gen_arc64_swapl (tmp2, operands[2]));
+	emit_insn (gen_arc64_<su>vmpy2h (operands[0], tmp1, tmp2));
+      }
+    else
+      {
+	tmp1 = operands[1];
+	tmp2 = operands[2];
+	emit_insn (gen_arc32_<su>vmpy2h_hi (operands[0], tmp1, tmp2));
+      }
+    DONE;
+  })
+
+ (define_insn "arc64_<su>vmpy2h"
+   [(set (match_operand:V2SI 0 "register_operand"  "=r")
+	 (mult:V2SI
+	  (ANY_EXTEND:V2SI
+	   (vec_select:V2HI
+	    (match_operand:V4HI 1 "register_operand" "r")
+	    (parallel [(const_int 0) (const_int 1)])))
+	  (ANY_EXTEND:V2SI
+	   (vec_select:V2HI
+	    (match_operand:V4HI 2 "register_operand" "r")
+	    (parallel [(const_int 0) (const_int 1)])))))
+    (clobber (reg:V2SI R58_REGNUM))]
+   "TARGET_SIMD"
+   "vmpy2h<su_optab>\\t%0,%1,%2"
+   [(set_attr "length" "4")
+    (set_attr "type" "vmpy2h")])
+
+(define_insn "arc64_swapl"
+  [(set (match_operand:V4HI 0 "register_operand" "=r")
+	(vec_concat:V4HI
+	 (vec_select:V2HI (match_operand:V4HI 1 "register_operand" "r")
+			  (parallel [(const_int 2) (const_int 3)]))
+	 (vec_select:V2HI (match_dup 1) (parallel [(const_int 0) (const_int 1)]))))]
+  "TARGET_SIMD && TARGET_64BIT"
+  "swapl\\t%0,%1"
+ [(set_attr "length" "4")
+  (set_attr "type" "swapl")])
+
+(define_expand "<su>dot_prodv4hi"
+  [(match_operand:V2SI 0 "register_operand")
+   (ANY_EXTEND:V2SI (match_operand:V4HI 1 "register_operand"))
+   (ANY_EXTEND:V2SI (match_operand:V4HI 2 "register_operand"))
+   (match_operand:V2SI 3 "register_operand")]
+  "TARGET_SIMD"
+{
+  rtx acc_reg  = gen_rtx_REG (V2SImode, R58_REGNUM);
+
+  emit_move_insn (acc_reg, operands[3]);
+  emit_insn (gen_arc64_<su>vmach_zero (operands[1], operands[2]));
+  if (TARGET_64BIT)
+    {
+      rtx op1_high = gen_reg_rtx (V4HImode);
+      rtx op2_high = gen_reg_rtx (V4HImode);
+      emit_insn (gen_arc64_swapl (op1_high, operands[1]));
+      emit_insn (gen_arc64_swapl (op2_high, operands[2]));
+      emit_insn (gen_arc64_<su>vmach (operands[0], op1_high, op2_high));
+    }
+  else
+    {
+      emit_insn (gen_arc32_<su>vmach_hi (operands[0], operands[1], operands[2]));
+    }
+  DONE;
+})
+
+(define_insn "arc64_<su>vmach"
+ [(set (match_operand:V2SI 0 "register_operand" "=r")
+       (plus:V2SI
+	(mult:V2SI
+	 (ANY_EXTEND:V2SI
+	  (vec_select:V2HI (match_operand:V4HI 1 "register_operand" "r")
+			   (parallel [(const_int 0) (const_int 1)])))
+	 (ANY_EXTEND:V2SI
+	  (vec_select:V2HI (match_operand:V4HI 2 "register_operand" "r")
+			   (parallel [(const_int 0) (const_int 1)]))))
+	(reg:V2SI R58_REGNUM)))
+  (clobber (reg:V2SI R58_REGNUM))]
+  "TARGET_SIMD"
+  "vmac2h<su_optab>%?\\t%0,%1,%2"
+  [(set_attr "length" "4")
+   (set_attr "type" "vmac2h")])
+
+(define_insn "arc64_<su>vmach_zero"
+ [(set (reg:V2SI R58_REGNUM)
+       (plus:V2SI
+	(mult:V2SI
+	 (ANY_EXTEND:V2SI
+	  (vec_select:V2HI (match_operand:V4HI 0 "register_operand" "r")
+			   (parallel [(const_int 0) (const_int 1)])))
+	 (ANY_EXTEND:V2SI
+	  (vec_select:V2HI (match_operand:V4HI 1 "register_operand" "r")
+			   (parallel [(const_int 0) (const_int 1)]))))
+	(reg:V2SI R58_REGNUM)))]
+  "TARGET_SIMD"
+  "vmac2h<su_optab>%?\\t0,%0,%1"
+  [(set_attr "length" "4")
+   (set_attr "type" "vmac2h")])
+
+;; FIXME! for v2hi -> dmpyh
+(define_expand "reduc_plus_scal_v4hi"
+  [(parallel
+    [(set (match_operand:HI 0 "register_operand" "=r")
+	  (unspec:HI [(match_operand:V4HI 1 "register_operand" "r")]
+		     ARC64_UNSPEC_QMPYH))
+     (clobber (reg:DI R58_REGNUM))])]
+  "TARGET_SIMD"
+  "")
+
+(define_insn_and_split "*reduc_v4hi"
+  [(set (match_operand:HI 0 "register_operand" "=accum,r")
+	(unspec:HI [(match_operand:V4HI 1 "register_operand" "r,r")]
+		   ARC64_UNSPEC_QMPYH))
+   (clobber (reg:DI R58_REGNUM))]
+  "TARGET_SIMD"
+  "qmpyh\\t%0,%1,1"
+  "&& reload_completed && !TARGET_64BIT && (REGNO (operands[0]) != R58_REGNUM)"
+  [(parallel
+    [(set (reg:HI R58_REGNUM)
+	  (unspec:HI [(match_dup 1)] ARC64_UNSPEC_QMPYH))
+     (clobber (reg:DI R58_REGNUM))])
+   (set (match_dup 0) (reg:HI R58_REGNUM))]
+  ""
+  [(set_attr "length" "8,4")
+   (set_attr "type" "qmpyh")])
+
+(define_insn "reduc_plus_scal_v2si"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:V2SI 1 "register_operand" "r")]
+		   ARC64_UNSPEC_DMPYWH))
+   (clobber (reg:DI R58_REGNUM))]
+  "TARGET_SIMD && TARGET_64BIT"
+  "dmpywh\\t%0,%1,1"
+  [(set_attr "length" "4")
+   (set_attr "type" "dmpywh")])
+
+;; FIXME! for v2hi -> dmach
+(define_expand "fold_left_plus_v4hi"
+  [(set (match_operand:HI 0 "register_operand")
+	(unspec:HI [(match_operand:HI 1 "register_operand")
+		    (match_operand:V4HI 2 "register_operand")]
+		   ARC64_UNSPEC_QMACH))
+   (clobber (reg:DI R58_REGNUM))]
+  "TARGET_SIMD"
+  {
+    rtx acc_reg = gen_rtx_REG (HImode, R58_REGNUM);
+    emit_move_insn (acc_reg, operands[1]);
+    operands[1] = acc_reg;
+  })
+
+(define_insn "*qmach"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(unspec:HI [(reg:HI R58_REGNUM)
+		    (match_operand:V4HI 1 "register_operand" "r")]
+		   ARC64_UNSPEC_QMACH))
+   (clobber (reg:DI R58_REGNUM))]
+  "TARGET_SIMD"
+  "qmach\\t%0,%1,1"
+  [(set_attr "length" "4")
+   (set_attr "type" "qmach")])
+
+(define_expand "mulv2hi3"
+  [(set (match_operand:V2HI 0 "register_operand")
+	(mult:V2HI (match_operand:V2HI 1 "register_operand")
+		   (match_operand:V2HI 2 "register_operand")))]
+  "TARGET_SIMD && TARGET_64BIT"
+  {
+    rtx tmp = gen_reg_rtx (V2SImode);
+    emit_insn (gen_arc64_svmpy2h_lo (tmp, operands[1], operands[2]));
+    emit_insn (gen_arc64_packv2hi_lo (operands[0], tmp));
+    DONE;
+  })
+
+(define_insn "arc64_packv2hi_lo"
+  [(set (match_operand:V2HI 0 "register_operand" "=r")
+	(unspec:V2HI [(match_operand:V2SI 1 "register_operand" "r")
+		      (const_int 0)]
+		     ARC64_UNSPEC_VPACK4HL))]
+  "TARGET_SIMD && TARGET_64BIT"
+  "vpack4hl\\t%0,%1,0"
+  [(set_attr "length" "4")
+   (set_attr "type" "vpack")])
+
+(define_expand "<su>mulv2hi3_highpart"
+  [(match_operand:V2HI 0 "register_operand")
+   (ANY_EXTEND:SI (match_operand:V2HI 1 "register_operand"))
+   (ANY_EXTEND:SI (match_operand:V2HI 2 "register_operand"))]
+  "TARGET_SIMD && TARGET_64BIT"
+  {
+    rtx tmp = gen_reg_rtx (V2SImode);
+    emit_insn (gen_arc64_<su>vmpy2h_lo (tmp, operands[1], operands[2]));
+    emit_insn (gen_arc64_packv2hi_hi (operands[0], tmp));
+    DONE;
+  })
+
+(define_insn "arc64_packv2hi_hi"
+  [(set (match_operand:V2HI 0 "register_operand" "=r")
+	(unspec:V2HI [(match_operand:V2SI 1 "register_operand" "r")
+		      (const_int 1)]
+		     ARC64_UNSPEC_VPACK4HM))]
+  "TARGET_SIMD && TARGET_64BIT"
+  "vpack4hm\\t%0,%1,0"
+  [(set_attr "length" "4")
+   (set_attr "type" "vpack")])
+
+ (define_insn "arc64_<su>vmpy2h_lo"
+   [(set (match_operand:V2SI 0 "register_operand"  "=r")
+	 (mult:V2SI
+	  (ANY_EXTEND:V2SI
+	    (match_operand:V2HI 1 "register_operand" "r"))
+	  (ANY_EXTEND:V2SI
+	    (match_operand:V2HI 2 "register_operand" "r"))))
+    (clobber (reg:V2SI R58_REGNUM))]
+   "TARGET_SIMD"
+   "vmpy2h<su_optab>\\t%0,%1,%2"
+   [(set_attr "length" "4")
+    (set_attr "type" "vmpy2h")])
+
+(define_expand "mulv4hi3"
+  [(match_operand:V4HI 0 "register_operand")
+   (match_operand:V4HI 1 "register_operand")
+   (match_operand:V4HI 2 "register_operand")]
+  "TARGET_SIMD && TARGET_64BIT"
+  {
+    rtx tmpA = gen_reg_rtx (V2SImode);
+    rtx tmpB = gen_reg_rtx (V2SImode);
+    rtx tmp1 = gen_reg_rtx (V4HImode);
+    rtx tmp2 = gen_reg_rtx (V4HImode);
+
+    emit_insn (gen_arc64_swapl (tmp1, operands[1]));
+    emit_insn (gen_arc64_swapl (tmp2, operands[2]));
+    emit_insn (gen_arc64_svmpy2h (tmpA, operands[1], operands[2]));
+    emit_insn (gen_arc64_svmpy2h (tmpB, tmp1, tmp2));
+    emit_insn (gen_arc64_pack4hi (operands[0], tmpA, tmpB));
+    DONE;
+    })
+
+(define_insn "arc64_pack4hi"
+  [(set (match_operand:V4HI 0 "register_operand" "=r")
+	(vec_concat:V4HI
+	 (truncate:V2HI
+	  (match_operand:V2SI 1 "register_operand" "r"))
+	 (truncate:V2HI
+	  (match_operand:V2SI 2 "register_operand" "r"))))]
+  "TARGET_SIMD && TARGET_64BIT"
+  "vpack4hl\\t%0,%1,%2"
+  [(set_attr "length" "4")
+   (set_attr "type" "vpack")])
+
+(define_insn "bswap<mode>2"
+  [(set (match_operand:VALL 0 "register_operand" "=r")
+	(bswap:VALL (match_operand:VALL 1 "register_operand" "r")))]
+  "TARGET_SIMD"
+  "swape<mcctab>\\t%0,%1"
+  [(set_attr "length" "4")
+   (set_attr "type" "swap")])
+
+(define_insn "vec_extract<mode>"
+  [(set (match_operand:<VEL> 0 "register_operand" "=r")
+	(vec_select:<VEL> (match_operand:VALL 1 "register_operand" "r")
+			  (parallel [(match_operand:SI 2 "const_int_operand" "n")])))]
+  "TARGET_SIMD"
+  {
+    HOST_WIDE_INT elem = INTVAL (operands[2]);
+    gcc_assert (elem < 4);
+    elem = (((<vextrsz> - 1) & <vextrmsk>) << <vextrsh>)
+      | ((elem * <vextrsz>) & <vextrmsk>);
+    operands[2] = GEN_INT (elem);
+    return "xbfu<mcctab>\\t%0,%1,%2";
+  }
+  [(set_attr "length" "8")
+   (set_attr "type" "xbfu")])
+
+;; Alternative
+;;   emit_insn (gen_arc64_swap (tmpA, operands[1])); swap tmpA op1
+;;   emit_insn (gen_arc64_sel_lo (tmpB, operands[1])); bmask tmpB,15
+;;   emit_insn (gen_arc64_pack2si (operands[0], tmpB, tmpA)); vpack4hl op0,tmpB,tmpA
+(define_expand "vec_unpacku_lo_v4hi"
+  [(set (match_operand:V2SI 0 "register_operand")
+	(zero_extend:V2SI
+	 (vec_select:V2HI
+	  (match_operand:V4HI 1 "register_operand")
+	  (parallel [(const_int 0)(const_int 1)]))))]
+  "TARGET_SIMD && TARGET_64BIT"
+ {
+   rtx tmpA = gen_reg_rtx (HImode);
+   rtx tmpB = gen_reg_rtx (HImode);
+
+   emit_insn (gen_vec_extractv4hi (tmpA, operands[1], GEN_INT (0)));
+   emit_insn (gen_vec_extractv4hi (tmpB, operands[1], GEN_INT (1)));
+   emit_insn (gen_arc64_vec_concat (operands[0], tmpA, tmpB));
+   DONE;
+ })
+
+;; Alternative
+;;   emit_insn (gen_arc64_swapl (tmp0, operands[1]));
+;;   emit_insn (gen_arc64_swap (tmpA, tmp0));
+;;   emit_insn (gen_arc64_sel_lo (tmpB, tmp0));
+;;   emit_insn (gen_arc64_pack2si (operands[0], tmpB, tmpA));
+(define_expand "vec_unpacku_hi_v4hi"
+  [(set (match_operand:V2SI 0 "register_operand")
+	(zero_extend:V2SI
+	 (vec_select:V2HI
+	  (match_operand:V4HI 1 "register_operand")
+	  (parallel [(const_int 2)(const_int 3)]))))]
+  "TARGET_SIMD && TARGET_64BIT"
+ {
+   rtx tmpA = gen_reg_rtx (HImode);
+   rtx tmpB = gen_reg_rtx (HImode);
+
+   emit_insn (gen_vec_extractv4hi (tmpA, operands[1], GEN_INT (2)));
+   emit_insn (gen_vec_extractv4hi (tmpB, operands[1], GEN_INT (3)));
+   emit_insn (gen_arc64_vec_concat (operands[0], tmpA, tmpB));
+   DONE;
+ })
+
+(define_insn "arc64_vec_concat"
+  [(set (match_operand:V2SI 0 "register_operand" "=r")
+	(unspec:V2SI [(match_operand:HI 1 "register_operand" "r")
+		      (match_operand:HI 2 "register_operand" "r")]
+		     ARC64_UNSPEC_VPACK2WL))]
+  "TARGET_SIMD && TARGET_64BIT"
+  "vpack2wl\\t%0,%1,%2"
+  [(set_attr "length" "4")
+   (set_attr "type" "vpack")])
+
+(define_expand "vec_duplicatev4hi"
+  [(set (match_operand:V4HI 0 "register_operand")
+	(vec_duplicate:V4HI (match_operand:HI 1 "register_operand")))]
+ "TARGET_SIMD && TARGET_64BIT"
+ {
+   rtx tmp = gen_reg_rtx (V2SImode);
+   emit_insn (gen_arc64_duplicate_v2hi(tmp, operands[1]));
+   emit_insn (gen_arc64_pack4hi(operands[0], tmp, tmp));
+   DONE;
+ })
+
+(define_insn "arc64_duplicate_v2hi"
+  [(set (match_operand:V2SI 0 "register_operand" "=r")
+	(unspec:V2SI [(match_operand:HI 1 "register_operand" "r")
+		      (const_int 0)]
+		     ARC64_UNSPEC_VPACK4HL))]
+  "TARGET_SIMD && TARGET_64BIT"
+  "vpack4hl\\t%0,%1,%1"
+  [(set_attr "length" "4")
+   (set_attr "type" "vpack")])
+
+(define_insn "vec_duplicatev2si"
+  [(set (match_operand:V2SI 0 "register_operand" "=r")
+	(vec_duplicate:V2SI (match_operand:SI 1 "register_operand" "r")))]
+  "TARGET_SIMD && TARGET_64BIT"
+  "vpack2wl\\t%0,%1,%1"
+  [(set_attr "length" "4")
+   (set_attr "type" "vpack")])
+
+(define_insn "vec_shr_<mode>"
+  [(set (match_operand:V64I 0 "register_operand" "=r,r")
+	(unspec:V64I [(match_operand:V64I 1 "register_operand"  "0,r")
+		      (match_operand:SI 2 "immediate_operand" "S12S0,i")]
+		     ARC64_UNSPEC_VEC_SHR))]
+  "TARGET_SIMD && TARGET_64BIT"
+  "asrl\\t%0,%1,%2"
+  [(set_attr "length" "4,8")
+   (set_attr "type" "asl")])
+
+(define_insn "vec_shl_<mode>"
+  [(set (match_operand:V64I 0 "register_operand" "=r,r")
+	(unspec:V64I [(match_operand:V64I 1 "register_operand"  "0,r")
+		      (match_operand:SI 2 "immediate_operand" "S12S0,i")]
+		     ARC64_UNSPEC_VEC_SHL))]
+  "TARGET_SIMD && TARGET_64BIT"
+  "asll\\t%0,%1,%2"
+  [(set_attr "length" "4,8")
+   (set_attr "type" "asl")])
+
+;; Patterns used by vect_perm
+(define_insn "arc64_dup_lane0v2si"
+  [(set (match_operand:V2SI 0 "register_operand" "=r")
+	(vec_duplicate:V2SI
+	 (vec_select:SI
+	  (match_operand:V2SI 1 "register_operand" "r")
+	  (parallel [(const_int 0)])
+	  )))]
+  "TARGET_SIMD && TARGET_64BIT"
+  "vpack2wl\\t%0,%1,%1"
+  [(set_attr "length" "4")
+   (set_attr "type" "vpack")])
+
+(define_insn "arc64_dup_lane1v2si"
+  [(set (match_operand:V2SI 0 "register_operand" "=r")
+	(vec_duplicate:V2SI
+	 (vec_select:SI
+	  (match_operand:V2SI 1 "register_operand" "r")
+	  (parallel [(const_int 1)])
+	  )))]
+  "TARGET_SIMD && TARGET_64BIT"
+  "vpack2wm\\t%0,%1,%1"
+  [(set_attr "length" "4")
+   (set_attr "type" "vpack")])
+
+(define_insn "arc64_sel_lane0_v2si"
+  [(set (match_operand:V2SI 0 "register_operand" "=r")
+	(vec_concat:V2SI
+	 (vec_select:SI
+	  (match_operand:V2SI 1 "register_operand" "r")
+	  (parallel [(const_int 0)]))
+	 (vec_select:SI
+	  (match_operand:V2SI 2 "register_operand" "r")
+	  (parallel [(const_int 0)]))
+	 ))]
+  "TARGET_SIMD && TARGET_64BIT"
+  "vpack2wl\\t%0,%1,%2"
+  [(set_attr "length" "4")
+   (set_attr "type" "vpack")])
+
+(define_insn "arc64_sel_lane1_v2si"
+  [(set (match_operand:V2SI 0 "register_operand" "=r")
+	(vec_concat:V2SI
+	 (vec_select:SI
+	  (match_operand:V2SI 1 "register_operand" "r")
+	  (parallel [(const_int 1)]))
+	 (vec_select:SI
+	  (match_operand:V2SI 2 "register_operand" "r")
+	  (parallel [(const_int 1)]))
+	 ))]
+  "TARGET_SIMD && TARGET_64BIT"
+  "vpack2wm\\t%0,%1,%2"
+  [(set_attr "length" "4")
+   (set_attr "type" "vpack")])
+
+(define_insn "arc64_sel_lane2_0v4hi"
+  [(set (match_operand:V4HI 0 "register_operand" "=r")
+	(vec_concat:V4HI
+	  (vec_select:V2HI
+	   (match_operand:V4HI 1 "register_operand" "r")
+	   (parallel [(const_int 0) (const_int 2)]))
+	  (vec_select:V2HI
+	   (match_operand:V4HI 2 "register_operand" "r")
+	   (parallel [(const_int 0) (const_int 2)]))))]
+  "TARGET_SIMD && TARGET_64BIT"
+  "vpack4hl\\t%0,%1,%2"
+  [(set_attr "length" "4")
+   (set_attr "type" "vpack")])
+
+(define_insn "arc64_sel_lane3_1v4hi"
+  [(set (match_operand:V4HI 0 "register_operand" "=r")
+	(vec_concat:V4HI
+	  (vec_select:V2HI
+	   (match_operand:V4HI 1 "register_operand" "r")
+	   (parallel [(const_int 1) (const_int 3)]))
+	  (vec_select:V2HI
+	   (match_operand:V4HI 2 "register_operand" "r")
+	   (parallel [(const_int 1) (const_int 3)]))))]
+  "TARGET_SIMD && TARGET_64BIT"
+  "vpack4hm\\t%0,%1,%2"
+  [(set_attr "length" "4")
+   (set_attr "type" "vpack")])
+
+(define_insn "arc64_swaplv2si"
+  [(set (match_operand:V2SI 0 "register_operand" "=r")
+	(vec_concat:V2SI
+	 (vec_select:SI
+	  (match_operand:V2SI 1 "register_operand" "r")
+	  (parallel [(const_int 1)]))
+	 (vec_select:SI
+	  (match_dup 1)
+	  (parallel [(const_int 0)]))))]
+  "TARGET_64BIT"
+  "swapl\\t%0,%1"
+  [(set_attr "length" "4")
+   (set_attr "type" "swapl")])
+
+(define_insn "arc64_swapv4hi"
+  [(set (match_operand:V4HI 0 "register_operand" "=r")
+	(unspec:V4HI
+	 [(match_operand:V4HI 1 "register_operand" "r")]
+	  ARC64_UNSPEC_SWAP))]
+  "TARGET_64BIT"
+  "swap\\t%0,%1"
+  [(set_attr "length" "4")
+   (set_attr "type" "swap")])
+
+(define_insn "arc64_swapv2hi"
+  [(set (match_operand:V2HI 0 "register_operand" "=r")
+	(unspec:V2HI
+	 [(match_operand:V2HI 1 "register_operand" "r")]
+	  ARC64_UNSPEC_SWAP))]
+  ""
+  "swap\\t%0,%1"
+  [(set_attr "length" "4")
+   (set_attr "type" "swap")])
+
+(define_insn "arc64_swp_lane0_v4hi"
+  [(set (match_operand:V4HI 0 "register_operand" "=r")
+	(vec_concat:V4HI
+	 (vec_select:V2HI
+	  (match_operand:V4HI 1 "register_operand" "r")
+	  (parallel [(const_int 0) (const_int 1)]))
+	 (vec_select:V2HI
+	  (match_operand:V4HI 2 "register_operand" "r")
+	  (parallel [(const_int 0) (const_int 1)]))
+	 ))]
+  "TARGET_SIMD && TARGET_64BIT"
+  "vpack2wl\\t%0,%1,%2"
+  [(set_attr "length" "4")
+   (set_attr "type" "vpack")])
+
+(define_insn "arc64_swp_lane1_v4hi"
+  [(set (match_operand:V4HI 0 "register_operand" "=r")
+	(vec_concat:V4HI
+	 (vec_select:V2HI
+	  (match_operand:V4HI 1 "register_operand" "r")
+	  (parallel [(const_int 2) (const_int 3)]))
+	 (vec_select:V2HI
+	  (match_operand:V4HI 2 "register_operand" "r")
+	  (parallel [(const_int 2) (const_int 3)]))
+	 ))]
+  "TARGET_SIMD && TARGET_64BIT"
+  "vpack2wm\\t%0,%1,%2"
+  [(set_attr "length" "4")
+   (set_attr "type" "vpack")])
+
+(define_insn "*arc64_vsubadd<mode>3"
+    [(set (match_operand:VALL 0 "register_operand" "=r")
+	(unspec:VALL [(match_operand:VALL 1 "register_operand" "r")
+		      (match_operand:VALL 2 "register_operand" "r")]
+		      ARC64_UNSPEC_VSUBADD))]
+  "TARGET_SIMD"
+  "vsubadd<sfxtab>\\t%0,%1,%2"
+  [(set_attr "length" "4")
+   (set_attr "type" "vaddsub")])
+
+;; In 64b arches, we miss a shuffle pattern that swaps 16b pairs in a
+;; 64b reg.  In 32b arches, we miss a quick way to exchange 2 32b
+;; regs.  Hence, no support for v4hi.
+(define_expand "cadd90v2si3"
+  [(set (match_operand:V2SI 0 "register_operand")
+	(unspec:V2SI [(match_operand:V2SI 1 "register_operand")
+		      (match_operand:V2SI 2 "register_operand")]
+		      ARC64_UNSPEC_VSUBADD))]
+  "TARGET_SIMD && TARGET_64BIT"
+  {
+    rtx tmp = gen_reg_rtx (V2SImode);
+
+    emit_move_insn (tmp, gen_rtx_UNSPEC (V2SImode,
+					 gen_rtvec (1, operands[2]),
+					 ARC64_UNSPEC_SWAPL));
+    operands[2] = tmp;
+  })
+
+(define_expand "cadd90v2hi3"
+  [(set (match_operand:V2HI 0 "register_operand")
+	(unspec:V2HI [(match_operand:V2HI 1 "register_operand")
+		      (match_operand:V2HI 2 "register_operand")]
+		      ARC64_UNSPEC_VSUBADD))]
+  "TARGET_SIMD && TARGET_64BIT"
+  {
+    rtx tmp = gen_reg_rtx (V2HImode);
+
+    emit_move_insn (tmp, gen_rtx_UNSPEC (V2HImode,
+					 gen_rtvec (1, operands[2]),
+					 ARC64_UNSPEC_SWAP));
+    operands[2] = tmp;
+  })
+
+(define_insn "*arc64_vaddsub<mode>3"
+    [(set (match_operand:VALL 0 "register_operand" "=r")
+	(unspec:VALL [(match_operand:VALL 1 "register_operand" "r")
+		      (match_operand:VALL 2 "register_operand" "r")]
+		      ARC64_UNSPEC_VADDSUB))]
+  "TARGET_SIMD"
+  "vaddsub<sfxtab>\\t%0,%1,%2"
+  [(set_attr "length" "4")
+   (set_attr "type" "vaddsub")])
+
+;; In 64b arches, we miss a shuffle pattern that swaps 16b pairs in a
+;; 64b reg.  In 32b arches, we miss a quick way to exchange 2 32b
+;; regs.  Hence, no support for v4hi.
+(define_expand "cadd270v2si3"
+  [(set (match_operand:V2SI 0 "register_operand")
+	(unspec:V2SI [(match_operand:V2SI 1 "register_operand")
+		      (match_operand:V2SI 2 "register_operand")]
+		      ARC64_UNSPEC_VADDSUB))]
+  "TARGET_SIMD && TARGET_64BIT"
+  {
+    rtx tmp = gen_reg_rtx (V2SImode);
+
+    emit_move_insn (tmp, gen_rtx_UNSPEC (V2SImode,
+					 gen_rtvec (1, operands[2]),
+					 ARC64_UNSPEC_SWAPL));
+    operands[2] = tmp;
+  })
+
+(define_expand "cadd270v2hi3"
+  [(set (match_operand:V2HI 0 "register_operand")
+	(unspec:V2HI [(match_operand:V2HI 1 "register_operand")
+		      (match_operand:V2HI 2 "register_operand")]
+		      ARC64_UNSPEC_VADDSUB))]
+  "TARGET_SIMD"
+  {
+    rtx tmp = gen_reg_rtx (V2HImode);
+
+    emit_move_insn (tmp, gen_rtx_UNSPEC (V2HImode,
+					 gen_rtvec (1, operands[2]),
+					 ARC64_UNSPEC_SWAP));
+    operands[2] = tmp;
+  })
+
+;; Conversions.
+(define_insn "arc64_truncate_lo_v2hi"
+  [(set (match_operand:V2HI 0 "register_operand" "=r")
+	(truncate:V2HI (match_operand:V2SI 1 "register_operand" "r")))]
+  "TARGET_SIMD && !TARGET_64BIT"
+  "vpack2hl\\t%0,%H1,%L1"
+  [(set_attr "type" "vpack")
+   (set_attr "length" "4")])
+
+(define_insn "arc64_truncate_hi_v4hi"
+  [(set (match_operand:V4HI 0 "register_operand" "=r")
+	(vec_concat:V4HI
+	 (match_operand:V2HI 1 "register_operand" "0")
+	 (truncate:V2HI (match_operand:V2SI 2 "register_operand" "r"))))]
+  "TARGET_SIMD && !TARGET_64BIT"
+  "vpack2hl\\t%H0,%H2,%L2"
+  [(set_attr "type" "vpack")
+   (set_attr "length" "4")])
+
+;; Vector Pack:
+;; 32bit:
+;;   vpack2hl RTl, RAh, RAl
+;;   vpack2hl RTh, RBh, RBl
+;; 64bit:
+;;   vpack4hl RT, RA, RB
+(define_expand "vec_pack_trunc_v2si"
+  [(set (match_operand:V4HI 0 "register_operand")
+	(vec_concat:V4HI
+	 (truncate:V2HI
+	  (match_operand:V2SI 1 "register_operand"))
+	 (truncate:V2HI
+	  (match_operand:V2SI 2 "register_operand"))
+	 ))]
+  "TARGET_SIMD"
+  {
+   if (!TARGET_64BIT)
+     {
+       rtx tmp = gen_reg_rtx (V2HImode);
+
+       emit_insn (gen_arc64_truncate_lo_v2hi (tmp, operands[1]));
+       emit_insn (gen_arc64_truncate_hi_v4hi (operands[0], tmp, operands[2]));
+
+       DONE;
+     }
+  })
+
+(define_insn "vec_pack_trunc_si"
+  [(set (match_operand:V2HI 0 "register_operand" "=r")
+	(vec_concat:V2HI
+	 (truncate:HI
+	  (match_operand:SI 1 "register_operand" "r"))
+	 (truncate:HI
+	  (match_operand:SI 2 "register_operand" "r"))
+	 ))]
+  "TARGET_SIMD"
+  "vpack2hl\\t%0,%1,%2"
+  [(set_attr "type" "vpack")
+   (set_attr "length" "4")])
+
+(define_insn "vec_duplicatev2hi"
+  [(set (match_operand:V2HI 0 "register_operand" "=r")
+	(vec_duplicate:V2HI (match_operand:HI 1 "register_operand" "r")))]
+  "TARGET_SIMD"
+  "vpack2hl\\t%0,%1,%1"
+  [(set_attr "type" "vpack")
+   (set_attr "length" "4")])
+
+(define_insn "arc64_sel_lane0_v2hi"
+  [(set (match_operand:V2HI 0 "register_operand" "=r")
+	(vec_concat:V2HI
+	 (vec_select:HI
+	  (match_operand:V2HI 1 "register_operand" "r")
+	  (parallel [(const_int 0)]))
+	 (vec_select:HI
+	  (match_operand:V2HI 2 "register_operand" "r")
+	  (parallel [(const_int 0)]))
+	 ))]
+  "TARGET_SIMD"
+  "vpack2hl\\t%0,%1,%2"
+  [(set_attr "length" "4")
+   (set_attr "type" "vpack")])
+
+(define_insn "arc64_sel_lane1_v2hi"
+  [(set (match_operand:V2HI 0 "register_operand" "=r")
+	(vec_concat:V2HI
+	 (vec_select:HI
+	  (match_operand:V2HI 1 "register_operand" "r")
+	  (parallel [(const_int 1)]))
+	 (vec_select:HI
+	  (match_operand:V2HI 2 "register_operand" "r")
+	  (parallel [(const_int 1)]))
+	 ))]
+  "TARGET_SIMD"
+  "vpack2hm\\t%0,%1,%2"
+  [(set_attr "length" "4")
+   (set_attr "type" "vpack")])
+
+(define_insn "arc64_vpack_v2hihi"
+  [(set (match_operand:V2HI 0 "register_operand" "=r")
+	(vec_concat:V2HI
+	 (match_operand:HI 1 "register_operand" "r")
+	 (match_operand:HI 2 "register_operand" "r")
+	 ))]
+  "TARGET_SIMD"
+  "vpack2hl\\t%0,%1,%2"
+  [(set_attr "type" "vpack")
+   (set_attr "length" "4")])
+
+(define_insn "<optab>v2si3"
+  [(set (match_operand:V2SI 0 "register_operand" "=r")
+	(MINMAX:V2SI (match_operand:V2SI 1 "register_operand" "%r")
+                     (match_operand:V2SI 2 "register_operand" "r")))]
+  "TARGET_SIMD"
+  "v<mntab>2\\t%0,%1,%2"
+  [(set_attr "length" "4")
+   (set_attr "type" "<mntab>")])
+
+(define_insn "*<optab>v2si3_dup"
+  [(set (match_operand:V2SI 0 "register_operand" "=r,r")
+	(MINMAX:V2SI
+	 (vec_duplicate:V2SI
+	  (match_operand 1 "vectdup_immediate_operand" "S06S0,S12S0"))
+	 (match_operand:V2SI 2 "register_operand" "r,0")))]
+  "TARGET_SIMD"
+  "v<mntab>2\\t%0,%2,%1"
+  [(set_attr "length" "4")
+   (set_attr "type" "<mntab>")])
+
+;; -------------------------------------------------------------------
+;; FP SIMD instructions
+;; -------------------------------------------------------------------
+
+(define_expand "mov<mode>"
+  [(set (match_operand:VALLF 0 "nonimmediate_operand")
+	(match_operand:VALLF 1 "general_operand"))]
+  "ARC64_HAS_FP_BASE"
+  "
+   if (arc64_prepare_move_operands (operands[0], operands[1], <MODE>mode))
+    DONE;
+  ")
+
+(define_expand "movmisalign<mode>"
+  [(set (match_operand:VALLF 0 "nonimmediate_operand")
+	(match_operand:VALLF 1 "general_operand"))]
+  "ARC64_HAS_FP_BASE && !STRICT_ALIGNMENT"
+  "
+   if (arc64_prepare_move_operands (operands[0], operands[1], <MODE>mode))
+    DONE;
+  ")
+
+(define_insn "*mov<mode>"
+  [(set (match_operand:VALLF_64 0 "arc64_dest_operand"  "=w,    w,Ufpms,*r,*w,*r,*r,*Ustor")
+	(match_operand:VALLF_64 1 "nonimmediate_operand" "w,Ufpms,    w,*w,*r,*r,*m,*r"))]
+  "ARC64_HAS_FP_BASE
+   && (register_operand (operands[0], <MODE>mode)
+       || register_operand (operands[1], <MODE>mode))"
+  "@
+   vf<sfxtab>mov\\t%0,%1
+   fld<sizef>%U1\\t%0,%1
+   fst<sizef>%U0\\t%1,%0
+   fmv<fmvftab>2<fmvitab>\\t%0,%1
+   fmv<fmvitab>2<fmvftab>\\t%0,%1
+   mov<mcctab>\\t%0,%1
+   ld<slfp>%U1\\t%0,%1
+   st<slfp>%U0\\t%1,%0"
+  [(set_attr "type" "fmov,ld,st,move,move,move,ld,st")
+   (set_attr "length" "4,*,*,4,4,4,*,*")])
+
+;; The 128 bit moves need special care.
+(define_insn_and_split "*mov<mode>"
+  [(set (match_operand:VALLF_128 0 "arc64_fsimd_moperand" "=w,    w,Ufpms,*r,*w")
+	(match_operand:VALLF_128 1 "arc64_fsimd_moperand"  "w,Ufpms,    w,*w,*r"))]
+  "ARC64_HAS_FP_BASE
+   && (register_operand (operands[0], <MODE>mode)
+       || register_operand (operands[1], <MODE>mode))"
+  "@
+   vf<sfxtab>mov\\t%0,%1
+   fld<sizef>%U1\\t%0,%1
+   fst<sizef>%U0\\t%1,%0
+   #
+   #"
+  "&& reload_completed
+   && arc64_split_double_move_p (operands, <MODE>mode)"
+  [(const_int 0)]
+  {
+   arc64_split_double_move (operands, <MODE>mode);
+   DONE;
+  }
+  [(set_attr "type" "fmov,ld,st,move,move")
+   (set_attr "length" "4,*,*,8,8")])
+
+(define_insn "<optab><mode>3"
+  [(set (match_operand:VALLF 0 "arc64_fsimd_register"            "=w")
+	(VOPS:VALLF (match_operand:VALLF 1 "arc64_fsimd_register" "w")
+		    (match_operand:VALLF 2 "arc64_fsimd_register" "w")))]
+  "ARC64_HAS_FP_BASE"
+  "vf<sfxtab><mntab>\\t%0,%1,%2"
+  [(set_attr "length" "4")
+   (set_attr "type" "vf<mntab>")])
+
+;; We need a neg pattern (observed in specInt2006 481.wrf)
+(define_expand "neg<mode>2"
+  [(set (match_operand:V1FRF 0 "arc64_fsimd_register" "=w")
+	(neg:V1FRF (match_operand:V1FRF 1 "arc64_fsimd_register" "w")))]
+  "ARC64_HAS_FP_BASE"
+  "{
+    rtx tmp = gen_reg_rtx (<VEL>mode);
+    emit_move_insn (tmp, CONST0_RTX (<VEL>mode));
+    emit_insn (gen_vfnmadds<mode> (operands[0], operands[1],
+				   tmp, operands[1]));
+    DONE;
+  }")
+
+(define_insn "vec_duplicate<mode>"
+  [(set (match_operand:VALLF 0 "arc64_fsimd_register" "=w")
+	(vec_duplicate:VALLF (match_operand:<VEL> 1 "register_operand" "w")))]
+  "ARC64_HAS_FP_BASE"
+  "vf<sfxtab>rep\\t%0,%1"
+  [(set_attr "length" "4")
+   (set_attr "type" "vfrep")])
+
+(define_insn "<optab><mode>3_rep"
+  [(set (match_operand:VALLF 0 "arc64_fsimd_register" "=w")
+	(VOPS:VALLF
+	 (match_operand:VALLF 1 "arc64_fsimd_register" "w")
+	 (vec_duplicate:VALLF
+	  (match_operand:<VEL> 2 "register_operand" "w"))))]
+  "ARC64_HAS_FP_BASE"
+  "vf<sfxtab><mntab>s\\t%0,%1,%2"
+  [(set_attr "length" "4")
+   (set_attr "type" "vf<mntab>")])
+
+;; Canonical of the above (selected) patterns.
+(define_insn "<optab><mode>3_rep2"
+  [(set (match_operand:VALLF 0 "arc64_fsimd_register" "=w")
+	(VCOP:VALLF
+	 (vec_duplicate:VALLF
+	  (match_operand:<VEL> 1 "register_operand" "w"))
+	 (match_operand:VALLF 2 "arc64_fsimd_register" "w")))]
+  "ARC64_HAS_FP_BASE"
+  "vf<sfxtab><mntab>s\\t%0,%2,%1"
+  [(set_attr "length" "4")
+   (set_attr "type" "vf<mntab>")])
+
+(define_expand "vec_set<mode>"
+  [(set (match_operand:VALLF 0 "register_operand")
+	(vec_merge:VALLF
+	 (vec_duplicate:VALLF
+	  (match_operand:<VEL> 1 "register_operand"))
+	 (match_dup 0)
+	 (match_operand:SI 2 "immediate_operand")))]
+  "ARC64_HAS_FP_BASE"
+  {
+   HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
+   operands[2] = GEN_INT (elem);
+  })
+
+(define_insn "*vec_set<mode>"
+  [(set (match_operand:VALLF 0 "arc64_fsimd_register" "=w")
+	(vec_merge:VALLF
+	 (vec_duplicate:VALLF
+	  (match_operand:<VEL> 1 "register_operand" "w"))
+	 (match_operand:VALLF 3 "arc64_fsimd_register" "0")
+	 (match_operand:SI 2 "immediate_operand" "i")))]
+  "ARC64_HAS_FP_BASE"
+ {
+   int elt = exact_log2 (INTVAL (operands[2]));
+   gcc_assert (UNSIGNED_INT5 (elt));
+   operands[2] = GEN_INT (elt);
+   return  "vf<sfxtab>ins\\t%0[%2],%1";
+ }
+  [(set_attr "length" "4")
+   (set_attr "type" "vfins")])
+
+(define_insn "vec_extract<mode><vel>"
+  [(set (match_operand:<VEL> 0 "register_operand" "=w")
+	(vec_select:<VEL> (match_operand:VALLF 1 "arc64_fsimd_register" "w")
+			  (parallel [(match_operand:SI 2 "const_int_operand" "U05S0")])))]
+  "ARC64_HAS_FP_BASE"
+  "vf<sfxtab>ext\\t%0,%1[%2]"
+  [(set_attr "length" "4")
+   (set_attr "type" "vfext")])
+
+;; FV<P>MADD
+(define_insn "fma<mode>4"
+  [(set (match_operand:VALLF 0 "arc64_fsimd_register" "=w")
+	(fma:VALLF (match_operand:VALLF 1 "arc64_fsimd_register"  "w")
+		   (match_operand:VALLF 2 "arc64_fsimd_register"  "w")
+		   (match_operand:VALLF 3 "arc64_fsimd_register"  "w")))]
+  "ARC64_HAS_FP_BASE"
+  "vf<sfxtab>madd\\t%0,%1,%2,%3"
+  [(set_attr "length" "4")
+   (set_attr "type" "fmadd")])
+
+;; FV<P>MSUB
+(define_insn "fnma<mode>4"
+  [(set (match_operand:VALLF 0 "arc64_fsimd_register" "=w")
+	(fma:VALLF (neg:VALLF (match_operand:VALLF 1 "arc64_fsimd_register"  "w"))
+		   (match_operand:VALLF 2 "arc64_fsimd_register"  "w")
+		   (match_operand:VALLF 3 "arc64_fsimd_register"  "w")))]
+  "ARC64_HAS_FP_BASE"
+  "vf<sfxtab>msub\\t%0,%1,%2,%3"
+  [(set_attr "length" "4")
+   (set_attr "type" "fmsub")])
+
+(define_insn "fms<mode>4"
+  [(set (match_operand:VALLF 0 "arc64_fsimd_register" "=w")
+	(fma:VALLF (match_operand:VALLF 1 "arc64_fsimd_register"  "w")
+		   (match_operand:VALLF 2 "arc64_fsimd_register"  "w")
+		   (neg:VALLF (match_operand:VALLF 3 "arc64_fsimd_register"  "w"))))]
+  "!HONOR_SIGNED_ZEROS (<MODE>mode) && ARC64_HAS_FP_BASE"
+  "vf<sfxtab>nmsub\\t%0,%1,%2,%3"
+  [(set_attr "length" "4")
+   (set_attr "type" "fnmsub")])
+
+;; -(op3 - (op1 * op2))
+(define_insn "*nfnms<mode>4"
+  [(set (match_operand:VALLF 0 "arc64_fsimd_register" "=w")
+	(neg:VALLF (fma:VALLF (neg:VALLF (match_operand:VALLF 1 "arc64_fsimd_register"  "w"))
+			      (match_operand:VALLF 2 "arc64_fsimd_register"  "w")
+			      (match_operand:VALLF 3 "arc64_fsimd_register"  "w"))))]
+  "ARC64_HAS_FP_BASE"
+  "vf<sfxtab>nmsub\\t%0,%1,%2,%3"
+  [(set_attr "length" "4")
+   (set_attr "type" "fnmsub")])
+
+;; FV<P>NMADD
+(define_insn "fnms<mode>4"
+  [(set (match_operand:VALLF 0 "arc64_fsimd_register" "=w")
+	(fma:VALLF (neg:VALLF (match_operand:VALLF 1 "arc64_fsimd_register"  "w"))
+		   (match_operand:VALLF 2 "arc64_fsimd_register"  "w")
+		   (neg:VALLF (match_operand:VALLF 3 "arc64_fsimd_register"  "w"))))]
+  "!HONOR_SIGNED_ZEROS (<MODE>mode) && ARC64_HAS_FP_BASE"
+  "vf<sfxtab>nmadd\\t%0,%1,%2,%3"
+  [(set_attr "length" "4")
+   (set_attr "type" "fnmadd")])
+
+;; -(op3 + (op1 * op2))
+(define_insn "*nfms<mode>4"
+  [(set (match_operand:VALLF 0 "arc64_fsimd_register" "=w")
+	(neg:VALLF (fma:VALLF (match_operand:VALLF 1 "arc64_fsimd_register"  "w")
+			      (match_operand:VALLF 2 "arc64_fsimd_register"  "w")
+			      (match_operand:VALLF 3 "arc64_fsimd_register"  "w"))))]
+  "ARC64_HAS_FP_BASE"
+  "vf<sfxtab>nmadd\\t%0,%1,%2,%3"
+  [(set_attr "length" "4")
+   (set_attr "type" "fnmadd")])
+
+;; FV<P>SQRT
+(define_insn "sqrt<mode>2"
+  [(set (match_operand:VALLF 0 "arc64_fsimd_register" "=w")
+	(sqrt:VALLF (match_operand:VALLF 1 "arc64_fsimd_register" "w")))]
+  "ARC64_HAS_FP_BASE"
+  "vf<sfxtab>sqrt\\t%0,%1"
+  [(set_attr "length" "4")
+   (set_attr "type" "fsqrt")])
+
+;; FV<P>MADDS
+(define_insn "fma<mode>4_rep"
+  [(set (match_operand:VALLF 0 "arc64_fsimd_register" "=w")
+	(fma:VALLF (match_operand:VALLF 1 "arc64_fsimd_register"  "w")
+		   (vec_duplicate:VALLF
+		    (match_operand:<VEL> 2 "register_operand"  "w"))
+		   (match_operand:VALLF 3 "arc64_fsimd_register"  "w")))]
+  "ARC64_HAS_FP_BASE"
+  "vf<sfxtab>madds\\t%0,%1,%2,%3"
+  [(set_attr "length" "4")
+   (set_attr "type" "fmadd")])
+
+(define_peephole2
+  [(set (match_operand:VALLF 0 "arc64_fsimd_register" "")
+	(vec_duplicate:VALLF (match_operand:<VEL> 1 "register_operand"  "")))
+   (set (match_operand:VALLF 2 "arc64_fsimd_register" "")
+	(fma:VALLF (match_operand:VALLF 3 "arc64_fsimd_register" "")
+		   (match_dup 0)
+		   (match_operand:VALLF 4 "arc64_fsimd_register" "")))]
+  "ARC64_HAS_FP_BASE
+   && peep2_reg_dead_p (2, operands[0])"
+  [(set (match_dup 2)
+	(fma:VALLF (match_dup 3) (vec_duplicate:VALLF (match_dup 1))
+		   (match_dup 4)))]
+  "")
+
+;; FV<P>MSUBS
+(define_insn "fnma<mode>4_rep"
+  [(set (match_operand:VALLF 0 "arc64_fsimd_register" "=w")
+	(fma:VALLF (neg:VALLF (match_operand:VALLF 1 "arc64_fsimd_register"  "w"))
+		   (vec_duplicate:VALLF
+		    (match_operand:<VEL> 2 "register_operand"  "w"))
+		   (match_operand:VALLF 3 "arc64_fsimd_register"  "w")))]
+  "ARC64_HAS_FP_BASE"
+  "vf<sfxtab>msubs\\t%0,%1,%2,%3"
+  [(set_attr "length" "4")
+   (set_attr "type" "fmsub")])
+
+(define_peephole2
+  [(set (match_operand:VALLF 0 "arc64_fsimd_register" "")
+	(vec_duplicate:VALLF (match_operand:<VEL> 1 "register_operand"  "")))
+   (set (match_operand:VALLF 2 "arc64_fsimd_register" "")
+	(fma:VALLF (neg:VALLF (match_operand:VALLF 3 "arc64_fsimd_register" ""))
+		   (match_dup 0)
+		   (match_operand:VALLF 4 "arc64_fsimd_register" "")))]
+  "ARC64_HAS_FP_BASE
+   && peep2_reg_dead_p (2, operands[0])"
+  [(set (match_dup 2)
+	(fma:VALLF (neg:VALLF (match_dup 3)) (vec_duplicate:VALLF (match_dup 1))
+		   (match_dup 4)))]
+  "")
+
+;; FV<P>NMADDS
+(define_insn "vfnmadds<mode>"
+  [(set (match_operand:VALLF 0 "arc64_fsimd_register" "=w")
+	(neg:VALLF
+	 (fma:VALLF (match_operand:VALLF 1 "arc64_fsimd_register"  "w")
+		    (vec_duplicate:VALLF
+		     (match_operand:<VEL> 2 "register_operand"  "w"))
+		    (match_operand:VALLF 3 "arc64_fsimd_register"  "w"))))]
+  "ARC64_HAS_FP_BASE"
+  "vf<sfxtab>nmadds\\t%0,%1,%2,%3"
+  [(set_attr "length" "4")
+   (set_attr "type" "fnmadd")])
+
+;; FV<P>NMSUBS
+(define_insn "vfnmsubs<mode>"
+  [(set (match_operand:VALLF 0 "arc64_fsimd_register" "=w")
+	(neg:VALLF
+	 (fma:VALLF (neg:VALLF (match_operand:VALLF 1 "arc64_fsimd_register"  "w"))
+		    (vec_duplicate:VALLF
+		     (match_operand:<VEL> 2 "register_operand"  "w"))
+		    (match_operand:VALLF 3 "arc64_fsimd_register"  "w"))))]
+  "ARC64_HAS_FP_BASE"
+  "vf<sfxtab>nmsubs\\t%0,%1,%2,%3"
+  [(set_attr "length" "4")
+   (set_attr "type" "fnmsub")])
+
+;; Exchange unspecs used for reduction ops.
+(define_insn "arc64_dexch<mode>"
+  [(set (match_operand:VALLF_128 0 "register_operand" "=w")
+	(unspec:VALLF_128 [(match_operand:VALLF_128 1 "register_operand" "w")]
+			  ARC64_UNSPEC_DEXCH))]
+  "ARC64_HAS_FP_BASE"
+  "vfdexch\\t%0,%1"
+  [(set_attr "length" "4")
+   (set_attr "type" "vfexch")])
+
+(define_insn "arc64_sexch<mode>"
+  [(set (match_operand:V1FRF 0 "register_operand" "=w")
+	(unspec:V1FRF [(match_operand:V1FRF 1 "register_operand" "w")]
+		      ARC64_UNSPEC_SEXCH))]
+  "ARC64_HAS_FP_BASE"
+  "vfsexch\\t%0,%1"
+  [(set_attr "length" "4")
+   (set_attr "type" "vfexch")])
+
+(define_insn "arc64_hexch<mode>"
+  [(set (match_operand:VxHF 0 "register_operand" "=w")
+	(unspec:VxHF [(match_operand:VxHF 1 "register_operand" "w")]
+		     ARC64_UNSPEC_HEXCH))]
+  "ARC64_HAS_FP_BASE"
+  "vfhexch\\t%0,%1"
+  [(set_attr "length" "4")
+   (set_attr "type" "vfexch")])
+
+(define_expand "reduc_plus_scal_v8hf"
+  [(match_operand:HF 0 "register_operand")
+   (match_operand:V8HF 1 "register_operand")]
+  ""
+  {
+    rtx low = gen_lowpart (HFmode, operands[1]);
+    rtx high = gen_reg_rtx (HFmode);
+    rtx tmp0, tmp1, tmp2, tmp3;
+
+    tmp0 = gen_reg_rtx (V8HFmode);
+    tmp1 = gen_reg_rtx (V8HFmode);
+    tmp2 = gen_reg_rtx (V8HFmode);
+    tmp3 = gen_reg_rtx (V8HFmode);
+
+    /* 1/2 of the vector.  */
+    emit_insn (gen_arc64_dexchv8hf (tmp0, operands[1]));
+    emit_insn (gen_addv8hf3 (tmp1, tmp0, operands[1]));
+
+    /* 1/4 of the vector.  */
+    emit_insn (gen_arc64_sexchv8hf (tmp2, tmp1));
+    emit_insn (gen_addv8hf3 (tmp3, tmp2, tmp1));
+
+    /* Last 2 elements.  */
+    emit_insn (gen_vec_extractv8hfhf (high, tmp3, GEN_INT (1)));
+    emit_insn (gen_addhf3 (operands[0], high, low));
+    DONE;
+  })
+
+;; Vector reduction instructions (emulated)
+(define_expand "reduc_plus_scal_<mode>"
+  [(match_operand:<VEL> 0 "register_operand")
+   (match_operand:V2xF 1 "register_operand")]
+  ""
+  {
+    rtx low = gen_lowpart (<VEL>mode, operands[1]);
+    rtx high = gen_reg_rtx (<VEL>mode);
+
+    emit_insn (gen_vec_extract<mode><vel> (high, operands[1], GEN_INT (1)));
+    emit_insn (gen_add<vel>3 (operands[0], high, low));
+    DONE;
+  })
+
+(define_expand "reduc_plus_scal_<mode>"
+  [(match_operand:<VEL> 0 "register_operand")
+   (match_operand:V4xF 1 "register_operand")]
+  ""
+  {
+#if 0
+    rtx op0 = gen_lowpart (<VEL>mode, operands[1]);
+    rtx op1 = gen_reg_rtx (<VEL>mode);
+    rtx op2 = gen_reg_rtx (<VEL>mode);
+    rtx op3 = gen_reg_rtx (<VEL>mode);
+    rtx tmp1 = gen_reg_rtx (<VEL>mode);
+    rtx tmp2 = gen_reg_rtx (<VEL>mode);
+
+    emit_insn (gen_vec_extract<mode><vel> (op1, operands[1], GEN_INT (1)));
+    emit_insn (gen_add<vel>3 (tmp1, op1, op0));
+
+    if (<MODE>mode == V4SFmode)
+      op2 = gen_lowpart (SFmode, gen_highpart (DFmode, operands[1]));
+    else
+      emit_insn (gen_vec_extract<mode><vel> (op2, operands[1], GEN_INT (2)));
+
+    emit_insn (gen_vec_extract<mode><vel> (op3, operands[1], GEN_INT (3)));
+    emit_insn (gen_add<vel>3 (tmp2, op2, op3));
+
+    emit_insn (gen_add<vel>3 (operands[0], tmp1, tmp2));
+    DONE;
+#else
+    rtx low = gen_lowpart (<VEL>mode, operands[1]);
+    rtx high = gen_reg_rtx (<VEL>mode);
+    rtx tmp0, tmp1;
+
+    tmp0 = gen_reg_rtx (<MODE>mode);
+    tmp1 = gen_reg_rtx (<MODE>mode);
+
+    emit_insn (gen_arc64_<fmextab>exch<mode> (tmp0, operands[1]));
+    emit_insn (gen_add<mode>3 (tmp1, tmp0, operands[1]));
+
+    emit_insn (gen_vec_extract<mode><vel> (high, tmp1, GEN_INT (1)));
+    emit_insn (gen_add<vel>3 (operands[0], high, low));
+    DONE;
+#endif
+  })
+
+;; Emulated vector ops using scalar function, only for double width vectors.
+;; MAX/MIN
+(define_insn_and_split "<optab><mode>3"
+  [(set (match_operand:W2xF 0 "arc64_fsimd_register" "=w")
+	(MINMAX:W2xF (match_operand:W2xF 1 "arc64_fsimd_register" "w")
+		     (match_operand:W2xF 2 "arc64_fsimd_register" "w")))]
+  "ARC64_VFP_128"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  {
+   rtx high_dest = gen_highpart (<VEL>mode, operands[0]);
+   rtx low_dest = gen_lowpart (<VEL>mode, operands[0]);
+   rtx high_op1 = gen_highpart (<VEL>mode, operands[1]);
+   rtx low_op1 = gen_lowpart (<VEL>mode, operands[1]);
+   rtx high_op2 = gen_highpart (<VEL>mode, operands[2]);
+   rtx low_op2 = gen_lowpart (<VEL>mode, operands[2]);
+   emit_insn (gen_<optab><vel>3 (low_dest, low_op1,  low_op2));
+   emit_insn (gen_<optab><vel>3 (high_dest, high_op1,  high_op2));
+   DONE;
+  }
+  [(set_attr "length" "8")
+   (set_attr "type" "f<mntab>")])
+
+;; NEG/ABS
+(define_insn_and_split "<optab><mode>2"
+  [(set (match_operand:W2xF 0 "arc64_fsimd_register" "=w")
+	(ABS_NEG:W2xF (match_operand:W2xF 1 "arc64_fsimd_register" "w")))]
+  "ARC64_VFP_128"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  {
+   rtx high_dest = gen_highpart (<VEL>mode, operands[0]);
+   rtx low_dest = gen_lowpart (<VEL>mode, operands[0]);
+   rtx high_op1 = gen_highpart (<VEL>mode, operands[1]);
+   rtx low_op1 = gen_lowpart (<VEL>mode, operands[1]);
+   emit_insn (gen_<optab><vel>2 (low_dest, low_op1));
+   emit_insn (gen_<optab><vel>2 (high_dest, high_op1));
+   DONE;
+  }
+  [(set_attr "length" "8")
+   (set_attr "type" "fsgnjn")])
+
+;; Conversions.
+(define_expand "vec_pack_trunc_v2df"
+  [(set (match_operand:V4SF 0 "register_operand")
+      (vec_concat:V4SF
+	(float_truncate:V2SF
+	    (match_operand:V2DF 1 "register_operand"))
+	(float_truncate:V2SF
+	    (match_operand:V2DF 2 "register_operand"))
+	  ))]
+  "ARC64_VFP_128"
+  {
+    rtx high_dest = gen_lowpart (SFmode,
+				 gen_highpart (DFmode,
+					      operands[0]));
+    rtx low_dest = gen_lowpart (SFmode, operands[0]);
+
+    rtx high_op1 = gen_highpart (DFmode, operands[1]);
+    rtx low_op1 = gen_lowpart (DFmode, operands[1]);
+    rtx high_op2 = gen_highpart (DFmode, operands[2]);
+    rtx low_op2 = gen_lowpart (DFmode, operands[2]);
+    rtx tmp1 = gen_reg_rtx (SFmode);
+    rtx tmp3 = gen_reg_rtx (SFmode);
+
+    emit_insn (gen_truncdfsf2 (tmp3, high_op1));
+    emit_insn (gen_truncdfsf2 (high_dest, low_op1));
+    emit_insn (gen_truncdfsf2 (tmp1, high_op2));
+    emit_insn (gen_truncdfsf2 (low_dest, low_op2));
+
+    emit_insn (gen_vec_setv4sf (operands[0], tmp1, GEN_INT (1)));
+    emit_insn (gen_vec_setv4sf (operands[0], tmp3, GEN_INT (3)));
+    DONE;
+  })
+
+(define_expand "vec_pack_trunc_df"
+  [(set (match_operand:V2SF 0 "register_operand")
+      (vec_concat:V2SF
+	(float_truncate:SF
+	    (match_operand:DF 1 "register_operand"))
+	(float_truncate:SF
+	    (match_operand:DF 2 "register_operand"))
+	  ))]
+ "ARC64_VFP_64"
+ {
+    rtx low_dest = gen_lowpart (SFmode, operands[0]);
+    rtx tmp1 = gen_reg_rtx (SFmode);
+
+    emit_insn (gen_truncdfsf2 (low_dest, operands[2]));
+    emit_insn (gen_truncdfsf2 (tmp1, operands[1]));
+    emit_insn (gen_vec_setv2sf (operands[0], tmp1, GEN_INT (1)));
+    DONE;
+ })
+
+;; vec_load_lanes used when wide_simd is off and wide_ldst is
+;; on. Hence the simd lengthis 64bit
+
+;; Patterns used to vect permutate.
+
+;; This one pattern is only used when we don't want to make
+;; dup_permutations using vec_dup (see arc64_simd_dup).
+(define_insn "arc64_dup_lane0<mode>"
+  [(set (match_operand:VALLF 0 "arc64_fsimd_register" "=w")
+	(vec_duplicate:VALLF
+	 (vec_select:<VEL>
+	  (match_operand:VALLF 1 "arc64_fsimd_register" "w")
+	  (parallel [(const_int 0)])
+	  )))]
+  "ARC64_HAS_FP_BASE"
+  "vf<sfxtab>rep\\t%0,%1"
+  [(set_attr "length" "4")
+   (set_attr "type" "vfrep")])
+
+(define_insn "arc64_dup_lane1<mode>"
+  [(set (match_operand:W2xF 0 "arc64_fsimd_register" "=w")
+	(vec_duplicate:W2xF
+	 (vec_select:<VEL>
+	  (match_operand:W2xF 1 "arc64_fsimd_register" "w")
+	  (parallel [(const_int 1)])
+	  )))]
+  "ARC64_VFP_128"
+  "vf<sfxtab>rep\\t%0,%H1"
+  [(set_attr "length" "4")
+   (set_attr "type" "vfrep")])
+
+;; Shuffle patterns
+(define_insn "arc64_d<PERMUTED:perm_pat><mode>"
+  [(set (match_operand:VALLF_128 0 "register_operand" "=w")
+	(unspec:VALLF_128 [(match_operand:VALLF_128 1 "register_operand" "w")
+			   (match_operand:VALLF_128 2 "register_operand" "w")]
+			  PERMUTED))]
+  "ARC64_HAS_FP_BASE"
+  "vfd<PERMUTED:perm_pat>\\t%0,%1,%2"
+  [(set_attr "length" "4")
+   (set_attr "type" "vf<PERMUTED:perm_pat>")])
+
+(define_insn "arc64_s<PERMUTES:perm_pat><mode>"
+  [(set (match_operand:V1FRF 0 "register_operand" "=w")
+	(unspec:V1FRF [(match_operand:V1FRF 1 "register_operand" "w")
+		       (match_operand:V1FRF 2 "register_operand" "w")]
+		      PERMUTES))]
+  "ARC64_HAS_FP_BASE"
+  "vfs<PERMUTES:perm_pat>\\t%0,%1,%2"
+  [(set_attr "length" "4")
+   (set_attr "type" "vf<PERMUTES:perm_pat>")])
+
+(define_insn "arc64_h<PERMUTEH:perm_pat><mode>"
+  [(set (match_operand:VxHF 0 "register_operand" "=w")
+	(unspec:VxHF [(match_operand:VxHF 1 "register_operand" "w")
+		      (match_operand:VxHF 2 "register_operand" "w")]
+		      PERMUTEH))]
+  "ARC64_HAS_FP_BASE"
+  "vfh<PERMUTEH:perm_pat>\\t%0,%1,%2"
+  [(set_attr "length" "4")
+   (set_attr "type" "vf<PERMUTEH:perm_pat>")])
+
+;; Required pattern needed for vector reduction operations.
+;;(define_expand "vec_shr_<mode>"
+;;  [(match_operand:VALLF 0 "register_operand")
+;;   (match_operand:VALLF 1 "register_operand")
+;;   (match_operand:SI 2 "immediate_operand")]
+;;  "ARC64_HAS_FP_BASE"
+;;  {
+;;   if (arc64_expand_fvect_shr (operands))
+;;      DONE;
+;;   FAIL;
+;;  })
+
+;;(define_insn "vec_shr_<mode>"
+;;  [(set (match_operand:VALLF 0 "arc64_fsimd_moperand" "=w")
+;;	(unspec:VALLF [(match_operand:VALLF 1 "arc64_fsimd_moperand" "w")
+;;		       (match_operand:SI 2 "immediate_operand")]
+;;		      ARC64_UNSPEC_VEC_SHR))]
+;;  "ARC64_HAS_FP_BASE"
+;;  "vfasrl\\t%0,%1,%2"
+;;  [(set_attr "length" "4")
+;;   (set_attr "type" "asl")])
+
+
+(define_insn "*arc64_vfsubadd<mode>3"
+  [(set (match_operand:VALLF 0 "register_operand" "=w")
+	(unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")
+		       (match_operand:VALLF 2 "register_operand" "w")]
+		      ARC64_UNSPEC_VFSUBADD))]
+  "ARC64_HAS_FP_BASE"
+  "vf<sfxtab>subadd\\t%0,%1,%2"
+  [(set_attr "length" "4")
+   (set_attr "type" "vfsubadd")])
+
+(define_expand "cadd90<mode>3"
+  [(set (match_operand:VALLF 0 "register_operand")
+	(unspec:VALLF [(match_operand:VALLF 1 "register_operand")
+		       (match_operand:VALLF 2 "register_operand")]
+		      ARC64_UNSPEC_VFSUBADD))]
+  "ARC64_HAS_FP_BASE"
+ {
+    rtx tmp = gen_reg_rtx (<MODE>mode);
+
+    emit_move_insn (tmp, gen_rtx_UNSPEC (<MODE>mode,
+					 gen_rtvec (1, operands[2]),
+					 ARC64_UNSPEC_<cplxtab>EXCH));
+    operands[2] = tmp;
+ })
+
+(define_insn "*arc64_vfaddsub<mode>3"
+  [(set (match_operand:VALLF 0 "register_operand" "=w")
+	(unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")
+		       (match_operand:VALLF 2 "register_operand" "w")]
+		      ARC64_UNSPEC_VFADDSUB))]
+  "ARC64_HAS_FP_BASE"
+  "vf<sfxtab>addsub\\t%0,%1,%2"
+  [(set_attr "length" "4")
+   (set_attr "type" "vfaddsub")])
+
+(define_expand "cadd270<mode>3"
+  [(set (match_operand:VALLF 0 "register_operand")
+	(unspec:VALLF [(match_operand:VALLF 1 "register_operand")
+		       (match_operand:VALLF 2 "register_operand")]
+		      ARC64_UNSPEC_VFADDSUB))]
+  "ARC64_HAS_FP_BASE"
+  {
+    rtx tmp = gen_reg_rtx (<MODE>mode);
+
+    emit_move_insn (tmp, gen_rtx_UNSPEC (<MODE>mode,
+					 gen_rtvec (1, operands[2]),
+					 ARC64_UNSPEC_<cplxtab>EXCH));
+    operands[2] = tmp;
+  })
diff --git a/gcc/config/arc64/atomic.md b/gcc/config/arc64/atomic.md
new file mode 100644
index 0000000000000..dd9a81be28561
--- /dev/null
+++ b/gcc/config/arc64/atomic.md
@@ -0,0 +1,268 @@
+;; GCC machine description for ARC atomic instructions.
+;; Copyright (C) 2015-2020 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Operations which can be used with atomic loads and stores.
+(define_code_iterator ATOPS [plus minus ior xor and])
+
+;; Operations which are supported by hardware.
+(define_code_iterator ATHWOPS [plus ior xor and])
+
+(define_expand "memory_barrier"
+  [(set (match_dup 0)
+	(unspec:BLK [(match_dup 0)] ARC64_UNSPEC_MEMBAR))]
+  ""
+{
+  operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
+  MEM_VOLATILE_P (operands[0]) = 1;
+})
+
+(define_insn "*memory_barrier"
+  [(set (match_operand:BLK 0 "" "")
+	(unspec:BLK [(match_dup 0)] ARC64_UNSPEC_MEMBAR))]
+  ""
+  {
+       return "dmb\\t3";
+  }
+  [(set_attr "type" "dmb")
+   (set_attr "length" "4")])
+
+(define_expand "atomic_compare_and_swap<mode>"
+  [(match_operand:SI 0 "register_operand" "")	;; bool out
+   (match_operand:ALLI 1 "register_operand" "")	;; val out
+   (match_operand:ALLI 2 "mem_noofs_operand" "");; memory
+   (match_operand:ALLI 3 "register_operand" "")	;; expected
+   (match_operand:ALLI 4 "register_operand" "")	;; desired
+   (match_operand:SI 5 "const_int_operand")	;; is_weak
+   (match_operand:SI 6 "const_int_operand")    	;; mod_s
+   (match_operand:SI 7 "const_int_operand")]	;; mod_f
+  "ARC64_HAS_ATOMIC_1"
+{
+  arc64_expand_compare_and_swap (operands);
+  DONE;
+})
+
+(define_insn_and_split "atomic_compare_and_swap<mode>_1"
+  [(set (reg:CC_Z CC_REGNUM)					;; bool out
+	(unspec_volatile:CC_Z [(const_int 0)] ARC64_VUNSPEC_CAS))
+   (set (match_operand:GPI 0 "register_operand"      "=&r")		;; val out
+	(match_operand:GPI 1 "mem_noofs_operand"      "+ATOMC"))	;; memory
+   (set (match_dup 1)
+	(unspec_volatile
+	  [(match_operand:GPI 2 "register_operand"     "r") ;; expect
+	   (match_operand:GPI 3 "register_operand"     "r") ;; desired
+	   (match_operand:SI 4 "const_int_operand")	;; is_weak
+	   (match_operand:SI 5 "const_int_operand")	;; mod_s
+	   (match_operand:SI 6 "const_int_operand")]	;; mod_f
+	  ARC64_VUNSPEC_CAS))]
+  "ARC64_HAS_ATOMIC_1"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  {
+    arc64_split_compare_and_swap (operands);
+    DONE;
+  })
+
+(define_insn "arc_load_exclusive<mode>"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+	(unspec_volatile:GPI
+	  [(match_operand:GPI 1 "mem_noofs_operand" "ATOMC")]
+	  ARC64_VUNSPEC_LL))]
+  "ARC64_HAS_ATOMIC_1"
+  "llock<mcctab>\\t%0,%1"
+  [(set_attr "type" "llock")
+   (set_attr "iscompact" "no")
+   (set_attr "predicable" "no")
+   (set_attr "length" "*")])
+
+(define_insn "arc_store_exclusive<mode>"
+  [(set (match_operand:GPI 0 "mem_noofs_operand"     "=ATOMC")
+	(unspec_volatile:GPI[(match_operand:GPI 1 "register_operand" "r")]
+			   ARC64_VUNSPEC_SC))
+   (clobber (reg:CC_Z CC_REGNUM))]
+  "ARC64_HAS_ATOMIC_1"
+  "scond<mcctab>\\t%1,%0"
+  [(set_attr "type" "scond")
+   (set_attr "iscompact" "no")
+   (set_attr "predicable" "no")
+   (set_attr "length" "*")])
+
+(define_expand "atomic_exchangesi"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:SI 1 "mem_noofs_operand" "")
+   (match_operand:SI 2 "register_operand" "")
+   (match_operand:SI 3 "const_int_operand" "")]
+  "ARC64_HAS_ATOMIC_1"
+{
+  enum memmodel model = (enum memmodel) INTVAL (operands[3]);
+
+  if (model == MEMMODEL_SEQ_CST)
+    emit_insn (gen_sync ());
+  emit_insn (gen_exchangesi (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "exchange<mode>"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+	(unspec_volatile:GPI [(match_operand:GPI 1 "mem_noofs_operand" "+ATOMC")]
+			    ARC64_VUNSPEC_EX))
+   (set (match_dup 1)
+	(match_operand:GPI 2 "register_operand" "0"))]
+  ""
+  "ex<mcctab>\\t%0,%1"
+  [(set_attr "type" "ex")
+   (set_attr "iscompact" "no")
+   (set_attr "predicable" "no")
+   (set_attr "length" "*")])
+
+;; New Atomic options enabled by option 2
+(define_insn_and_split "atld_<optab><mode>"
+  [(set (match_operand:GPI 0 "register_operand" "=&r,r")
+	(match_operand:GPI 1 "mem_noofs_operand" "+ATOMC,ATOMC"))
+   (set (match_dup 1)
+	(unspec_volatile:GPI
+	 [(ATHWOPS:GPI (match_dup 0)
+		       (match_operand:GPI 2 "register_operand" "0,r"))
+	  (match_operand:SI 3 "const_int_operand")]
+	 ARC64_VUNSPEC_ATOOPS))]
+  "ARC64_HAS_ATOMIC_2"
+  "@
+   atld<sfxtab>.<optab>%A3\\t%0,%1
+   #"
+  "&& reload_completed && !operands_match_p (operands[0], operands[2])"
+  [(const_int 0)]
+  {
+   emit_insn (gen_rtx_SET (operands[0], operands[2]));
+   emit_insn (gen_atld_<optab><mode> (operands[0], operands[1], operands[0], operands[3]));
+   DONE;
+  }
+  [(set_attr "type" "atld<sfxtab>op")])
+
+(define_expand "atomic_<optab><mode>"
+  [(match_operand:GPI 0 "mem_noofs_operand" "")  ;; memory
+   (ATOPS:GPI (match_dup 0)
+	      (match_operand:GPI 1 "register_operand" "")) ;; operand
+   (match_operand:SI 2 "const_int_operand" "")] ;; model
+  "ARC64_HAS_ATOMIC_1"
+{
+  arc64_expand_atomic_op (<CODE>, operands[0], operands[1],
+				NULL_RTX, NULL_RTX, operands[2]);
+  DONE;
+})
+
+(define_expand "atomic_nandsi"
+  [(match_operand:SI 0 "mem_noofs_operand" "")	;; memory
+   (match_operand:SI 1 "register_operand" "")	;; operand
+   (match_operand:SI 2 "const_int_operand" "")]	;; model
+  "ARC64_HAS_ATOMIC_1"
+{
+ arc64_expand_atomic_op (NOT, operands[0], operands[1],
+			    NULL_RTX, NULL_RTX, operands[2]);
+ DONE;
+})
+
+(define_expand "atomic_fetch_<optab><mode>"
+  [(set (match_operand:GPI 0 "register_operand")	;; output
+	(match_operand:GPI 1 "mem_noofs_operand"))	;; memory
+   (set (match_dup 1)
+	(unspec_volatile:GPI
+	 [(ATHWOPS:GPI (match_dup 1)
+		     (match_operand:GPI 2 "register_operand")) ;; operand
+	  (match_operand:SI 3 "const_int_operand")]	;; model
+	 ARC64_VUNSPEC_ATOOPS))]
+  "ARC64_HAS_ATOMIC_1"
+  {
+   if (!ARC64_HAS_ATOMIC_2)
+     {
+       arc64_expand_atomic_op (<CODE>, operands[1], operands[2],
+			       operands[0], NULL_RTX, operands[3]);
+       DONE;
+     }
+    if (!ARC64_HAS_ATOMIC_3)
+      arc64_pre_atomic_barrier ((enum memmodel) INTVAL (operands[3]));
+    emit_insn (gen_atld_<optab><mode> (operands[0], operands[1], operands[2], operands[3]));
+    if (!ARC64_HAS_ATOMIC_3)
+      arc64_post_atomic_barrier ((enum memmodel) INTVAL (operands[3]));
+    DONE;
+   })
+
+;; ARCv3 doesn't have a MINUS atomic memory operation.
+(define_expand "atomic_fetch_sub<mode>"
+  [(set (match_operand:GPI 0 "register_operand")	;; output
+	(match_operand:GPI 1 "mem_noofs_operand"))	;; memory
+   (set (match_dup 1)
+	(unspec_volatile:GPI
+	 [(minus:GPI (match_dup 1)
+		     (match_operand:GPI 2 "register_operand")) ;; operand
+	  (match_operand:SI 3 "const_int_operand")]	;; model
+	 ARC64_VUNSPEC_ATOOPS))]
+  "ARC64_HAS_ATOMIC_1"
+  {
+    arc64_expand_atomic_op (MINUS, operands[1], operands[2],
+			    operands[0], NULL_RTX, operands[3]);
+    DONE;
+  })
+
+(define_expand "atomic_fetch_nand<mode>"
+  [(match_operand:GPI 0 "register_operand" "")	;; output
+   (match_operand:GPI 1 "mem_noofs_operand" "")	;; memory
+   (match_operand:GPI 2 "register_operand" "")	;; operand
+   (match_operand:SI  3 "const_int_operand" "")]	;; model
+  "ARC64_HAS_ATOMIC_1"
+{
+  arc64_expand_atomic_op (NOT, operands[1], operands[2],
+			     operands[0], NULL_RTX, operands[3]);
+  DONE;
+})
+
+(define_expand "atomic_<optab>_fetch<mode>"
+  [(match_operand:GPI 0 "register_operand" "")	;; output
+   (match_operand:GPI 1 "mem_noofs_operand" "")	;; memory
+   (ATOPS:GPI (match_dup 1)
+	      (match_operand:GPI 2 "register_operand" "")) ;; operand
+   (match_operand:SI 3 "const_int_operand" "")]	;; model
+  "ARC64_HAS_ATOMIC_1"
+{
+  arc64_expand_atomic_op (<CODE>, operands[1], operands[2],
+				NULL_RTX, operands[0], operands[3]);
+  DONE;
+})
+
+(define_expand "atomic_nand_fetch<mode>"
+  [(match_operand:GPI 0 "register_operand" "")		;; output
+   (match_operand:GPI 1 "mem_noofs_operand" "")		;; memory
+   (match_operand:GPI 2 "register_operand" "")		;; operand
+   (match_operand:SI 3 "const_int_operand" "")]	;; model
+  "ARC64_HAS_ATOMIC_1"
+{
+  arc64_expand_atomic_op (NOT, operands[1], operands[2],
+			     NULL_RTX, operands[0], operands[3]);
+  DONE;
+})
+
+
+;; mode:emacs-lisp
+;; comment-start: ";; "
+;; eval: (set-syntax-table (caopy-sequence (syntax-table)))
+;; eval: (modify-syntax-entry ?[ "(]")
+;; eval: (modify-syntax-entry ?] ")[")
+;; eval: (modify-syntax-entry ?{ "(}")
+;; eval: (modify-syntax-entry ?} "){")
+;; eval: (setq indent-tabs-mode t)
+;; End:
diff --git a/gcc/config/arc64/builtins.def b/gcc/config/arc64/builtins.def
new file mode 100644
index 0000000000000..7bd063a20c651
--- /dev/null
+++ b/gcc/config/arc64/builtins.def
@@ -0,0 +1,42 @@
+/* Copyright (C) 2020 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it under
+   the terms of the GNU General Public License as published by the Free
+   Software Foundation; either version 3, or (at your option) any later
+   version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or
+   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+   for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* This file contains the definitions and documentation for the
+   builtins defined in the ARC part of the GNU compiler.  Before
+   including this file, define a macro
+
+   DEF_BUILTIN(NAME, N_ARGS, TYPE, ICODE, MASK)
+
+   NAME:    `__builtin_arc_name' will be the user-level name of the builtin.
+            `ARC64_BUILTIN_NAME' will be the internal builtin's id.
+   N_ARGS:  Number of input arguments.  If special treatment is needed,
+            set to -1 and handle it by hand, see arc.c:arc_expand_builtin().
+   TYPE:    A tree node describing the prototype of the built-in.
+   ICODE:   Name of attached insn or expander.  If special treatment in arc.c
+            is needed to expand the built-in, use `nothing'.
+   MASK:    CPU selector mask.  */
+
+DEF_BUILTIN (NOP,        0, void_ftype_void,        nothing,    1)
+DEF_BUILTIN (SR,         2, void_ftype_usint_usint, sr,         1)
+DEF_BUILTIN (LR,         1, usint_ftype_usint,      lr,         1)
+DEF_BUILTIN (BRK,        0, void_ftype_void,        brk,        1)
+DEF_BUILTIN (FLAG,       1, void_ftype_usint,       flag,       1)
+DEF_BUILTIN (SRL,        2, void_ftype_long_long,   srl,        1)
+DEF_BUILTIN (LRL,        1, long_ftype_long,        lrl,        1)
+
+DEF_BUILTIN (TRAP_S,     1, void_ftype_usint,       trap_s,      1)
diff --git a/gcc/config/arc64/condexec.md b/gcc/config/arc64/condexec.md
new file mode 100644
index 0000000000000..6fc6125cb3b1a
--- /dev/null
+++ b/gcc/config/arc64/condexec.md
@@ -0,0 +1,392 @@
+;; Operations which can be predicated non commutative
+(define_code_iterator ARITHP [ashift ashiftrt lshiftrt])
+
+;; Conditional execution
+(define_insn_and_split "*zero_extend<SHORT:mode><GPI:mode>2_ce"
+  [(cond_exec
+    (match_operator 2 "arc64_comparison_operator"
+		    [(match_operand 3 "cc_register" "") (const_int 0)])
+    (set (match_operand:GPI 0 "register_operand"                   "=r,r")
+	 (zero_extend:GPI (match_operand:SHORT 1 "register_operand" "0,r"))))]
+  ""
+  "@
+  bmsk<GPI:mcctab>.%m2\\t%0,%1,<SHORT:sizen>
+  #"
+  "reload_completed && (REGNO (operands[0]) != REGNO (operands[1]))"
+  [(cond_exec
+    (match_op_dup 2 [(match_dup 3) (const_int 0)])
+    (set (match_dup 4) (match_dup 1)))
+   (cond_exec
+    (match_op_dup 2 [(match_dup 3) (const_int 0)])
+    (set (match_dup 0) (zero_extend:<GPI:MODE> (match_dup 4))))]
+  "
+  operands[4] = simplify_gen_subreg (<SHORT:MODE>mode, operands[0],
+                                     <GPI:MODE>mode, 0);
+  "
+  [(set_attr "type" "and")
+   (set_attr "length" "4,8")])
+
+;; Non-commutative operation, still I can swap the input operands if
+;; it is required.
+;; Todo: add conditional execution for leu and geu
+;; Todo: Add for DI
+(define_insn_and_split "*set<cctab>si_ce"
+  [(cond_exec
+    (match_operator 3 "arc64_comparison_operator"
+		    [(match_operand 4 "cc_register" "") (const_int 0)])
+    (set (match_operand:SI 0 "register_operand"              "=r,r,r,r")
+	 (SETCC:SI (match_operand:SI 1 "register_operand"     "0,0,r,r")
+		   (match_operand:SI 2 "arc64_nonmem_operand" "r,n,r,n"))))]
+  ""
+  "@
+   set<cctab>.%m3\\t%0,%1,%2
+   set<cctab>.%m3\\t%0,%1,%2
+   #
+   #"
+  "reload_completed && (!rtx_equal_p (operands[0], operands[1]))"
+  [(const_int 0)]
+  "
+  {
+    rtx tmp;
+    rtx cond = gen_rtx_fmt_ee (GET_CODE (operands[3]), GET_MODE (operands[4]),
+			       operands[4], const0_rtx);
+
+    enum rtx_code code = <CCTAB>;
+
+    if (register_operand (operands[2], SImode)
+	&& rtx_equal_p (operands[0], operands[2]))
+      {
+        /* we need to reverse any condition besides NE/EQ.  */
+        if (code != NE && code !=EQ)
+	   code = reverse_condition (code);
+        tmp = gen_rtx_fmt_ee (code, SImode, operands[2], operands[1]);
+      }
+    else
+      {
+        emit_insn (gen_rtx_COND_EXEC (VOIDmode, cond,
+                                      gen_rtx_SET (operands[0], operands[1])));
+        tmp = gen_rtx_fmt_ee (code, SImode, operands[0], operands[2]);
+      }
+    emit_insn (gen_rtx_COND_EXEC (VOIDmode, cond,
+                                  gen_rtx_SET (operands[0], tmp)));
+    DONE;
+  }
+  "
+  [(set_attr "type" "setcc")
+   (set_attr "length" "4,8,8,12")])
+
+;; Non commutative operation FIXME! what about op2 == op0
+(define_insn_and_split "*rotrsi_ce"
+  [(cond_exec
+    (match_operator 3 "arc64_comparison_operator"
+		    [(match_operand 4 "cc_register" "") (const_int 0)])
+    (set (match_operand:SI 0 "register_operand"                  "=r,r,r,r")
+	 (rotatert:SI (match_operand:SI 1 "register_operand"      "0,0,r,r")
+		      (match_operand:SI 2 "nonmemory_operand" "rU06S0,S32S0,U06S0,S32S0"))))]
+  ""
+  "@
+   ror.%m3\\t%0,%1,%2
+   ror.%m3\\t%0,%1,%2
+   #
+   #"
+  "reload_completed && (!rtx_equal_p (operands[0], operands[1]))"
+  [(cond_exec
+    (match_op_dup 3 [(match_dup 4) (const_int 0)])
+    (set (match_dup 0) (match_dup 1)))
+   (cond_exec
+    (match_op_dup 3 [(match_dup 4) (const_int 0)])
+    (set (match_dup 0) (rotatert:SI (match_dup 0) (match_dup 2))))]
+  ""
+  [(set_attr "type" "ror")
+   (set_attr "length" "4,8,8,12")])
+
+;; FIXME! what about op2 == op0
+(define_insn_and_split "*<optab><mode>_ce"
+  [(cond_exec
+    (match_operator 3 "arc64_comparison_operator"
+		    [(match_operand 4 "cc_register" "") (const_int 0)])
+    (set (match_operand:GPI 0 "register_operand"             "=     r,r,r,r")
+	 (ARITHP:GPI (match_operand:GPI 1 "register_operand"  "     0,0,r,r")
+		     (match_operand:GPI 2 "nonmemory_operand" "rU06S0,S32S0,U06S0,S32S0"))))]
+  ""
+  "@
+   <mntab><sfxtab>.%m3\\t%0,%1,%2
+   <mntab><sfxtab>.%m3\\t%0,%1,%2
+   #
+   #"
+  "reload_completed && (!rtx_equal_p (operands[0], operands[1]))"
+  [(cond_exec
+    (match_op_dup 3 [(match_dup 4) (const_int 0)])
+    (set (match_dup 0) (match_dup 1)))
+   (cond_exec
+    (match_op_dup 3 [(match_dup 4) (const_int 0)])
+    (set (match_dup 0) (ARITHP:GPI (match_dup 0) (match_dup 2))))]
+  ""
+  [(set_attr "length"     "4,8,8,12")
+   (set_attr "type"       "<mntab>")])
+
+(define_insn_and_split "*<optab><mode>_ce"
+  [(cond_exec
+    (match_operator 3 "arc64_comparison_operator"
+		    [(match_operand 4 "cc_register" "") (const_int 0)])
+    (set (match_operand:GPI 0 "register_operand"             "=     r,r,r,r")
+	 (DIVREM:GPI (match_operand:GPI 1 "register_operand"  "     0,0,r,r")
+		     (match_operand:GPI 2 "nonmemory_operand" "rU06S0,S32S0,U06S0,S32S0"))))]
+  "TARGET_ARC64_DIVREM"
+  "@
+   <mntab><sfxtab>.%m3\\t%0,%1,%2
+   <mntab><sfxtab>.%m3\\t%0,%1,%2
+   #
+   #"
+  "reload_completed && (!rtx_equal_p (operands[0], operands[1]))"
+  [(cond_exec
+    (match_op_dup 3 [(match_dup 4) (const_int 0)])
+    (set (match_dup 0) (match_dup 1)))
+   (cond_exec
+    (match_op_dup 3 [(match_dup 4) (const_int 0)])
+    (set (match_dup 0) (DIVREM:GPI (match_dup 0) (match_dup 2))))]
+  ""
+  [(set_attr "length"     "4,8,8,12")
+   (set_attr "type"       "<optab><sfxtab>")])
+
+;;ToDo: Add predicated SUBx patterns, for efficient handling of the
+;;short immediate field.
+(define_insn_and_split "*sub<mode>_ce"
+  [(cond_exec
+    (match_operator 3 "arc64_comparison_operator"
+		    [(match_operand 4 "cc_register" "") (const_int 0)])
+    (set (match_operand:GPI 0 "register_operand"            "=     r,     r,    r,    r,     r,    r,r")
+	 (minus:GPI (match_operand:GPI 1 "nonmemory_operand" "     0,rU06S0,S32S0,    0,     r,S32S0,r")
+		    (match_operand:GPI 2 "nonmemory_operand" "rU06S0,     0,    0,S32S0,rU06S0,    r,S32S0"))))]
+  "(register_operand (operands[1], <MODE>mode)
+    || register_operand (operands[2], <MODE>mode))"
+  "@
+   sub<sfxtab>.%m3\\t%0,%1,%2
+   rsub<sfxtab>.%m3\\t%0,%2,%1
+   rsub<sfxtab>.%m3\\t%0,%2,%1
+   sub<sfxtab>.%m3\\t%0,%1,%2
+   #
+   #
+   #"
+  "&& reload_completed
+    && (!((register_operand (operands[1], <MODE>mode)
+	   && rtx_equal_p (operands[0], operands[1]))
+	  || (register_operand (operands[2], <MODE>mode)
+	      && rtx_equal_p (operands[0], operands[2]))))"
+  [(const_int 0)]
+  "
+  {
+    rtx cond = gen_rtx_fmt_ee (GET_CODE (operands[3]), GET_MODE (operands[4]),
+			       operands[4], const0_rtx);
+
+    if (register_operand (operands[1], <MODE>mode)
+	&& (REGNO (operands[0]) != REGNO (operands[1])))
+      {
+	emit_insn (gen_rtx_COND_EXEC (VOIDmode, cond,
+				      gen_rtx_SET (operands[0], operands[1])));
+	emit_insn (gen_rtx_COND_EXEC (VOIDmode, cond,
+				      gen_rtx_SET (operands[0],
+						   gen_rtx_MINUS (<MODE>mode,
+								  operands[0],
+								  operands[2]))));
+	DONE;
+      }
+
+    if (register_operand (operands[2], <MODE>mode)
+	&& (REGNO (operands[0]) != REGNO (operands[2])))
+      {
+	emit_insn (gen_rtx_COND_EXEC (VOIDmode, cond,
+				      gen_rtx_SET (operands[0], operands[2])));
+	emit_insn (gen_rtx_COND_EXEC (VOIDmode, cond,
+				      gen_rtx_SET (operands[0],
+						   gen_rtx_MINUS (<MODE>mode,
+								  operands[1],
+								  operands[0]))));
+	DONE;
+      }
+    gcc_unreachable ();
+  }
+  "
+  [(set_attr "length" "4,4,8,8,8,12,12")
+   (set_attr "type"   "sub")])
+
+;; commutative MIN, MAX
+(define_insn_and_split "*<optab><mode>_ce"
+  [(cond_exec
+    (match_operator 3 "arc64_comparison_operator"
+		    [(match_operand 4 "cc_register" "") (const_int 0)])
+    (set (match_operand:GPI 0 "register_operand"                  "=r,r,r,r")
+	 (MINMAX:GPI (match_operand:GPI 1 "register_operand"      "%0,0,r,r")
+		     (match_operand:GPI 2 "nonmemory_operand" "rU06S0,S32S0,rU06S0,S32S0"))))]
+  ""
+  "@
+   <mntab><sfxtab>.%m3\\t%0,%1,%2
+   <mntab><sfxtab>.%m3\\t%0,%1,%2
+   #
+   #"
+  "reload_completed && (!rtx_equal_p (operands[0], operands[1]))"
+  [(cond_exec
+    (match_op_dup 3 [(match_dup 4) (const_int 0)])
+    (set (match_dup 0) (MINMAX:GPI (match_dup 0) (match_dup 2))))]
+  "
+ {
+   rtx cond = gen_rtx_fmt_ee (GET_CODE (operands[3]), GET_MODE (operands[4]),
+			      operands[4], const0_rtx);
+   /* Check first if the second input reg-operand is the same as the output
+      reg-operand.  */
+   if (rtx_equal_p (operands[0], operands[2]))
+     std::swap (operands[1], operands[2]);
+   else
+     emit_insn (gen_rtx_COND_EXEC (VOIDmode, cond,
+				   gen_rtx_SET (operands[0], operands[1])));
+ }
+  "
+  [(set_attr "type" "<mntab>")
+   (set_attr "length" "4,8,8,12")])
+
+(define_insn_and_split "*mul<mode>3_ce"
+  [(cond_exec
+    (match_operator 3 "arc64_comparison_operator"
+                   [(match_operand 4 "cc_register" "") (const_int 0)])
+    (set (match_operand:GPI 0 "register_operand"           "=     r,    r,r,r")
+        (mult:GPI (match_operand:GPI 1 "register_operand"  "%     0,    0,r,r")
+                  (match_operand:GPI 2 "nonmemory_operand"  "rU06S0,S32S0,rU06S0,S32S0"))))]
+ ""
+ "@
+  mpy<sfxtab>.%m3\\t%0,%1,%2
+  mpy<sfxtab>.%m3\\t%0,%1,%2
+  #
+  #"
+  "reload_completed && (!rtx_equal_p (operands[0], operands[1]))"
+  [(cond_exec
+    (match_op_dup 3 [(match_dup 4) (const_int 0)])
+    (set (match_dup 0) (mult:GPI (match_dup 0) (match_dup 2))))]
+  "
+ {
+   rtx cond = gen_rtx_fmt_ee (GET_CODE (operands[3]), GET_MODE (operands[4]),
+			      operands[4], const0_rtx);
+   /* Check first if the second input reg-operand is the same as the output
+      reg-operand.  */
+   if (rtx_equal_p (operands[0], operands[2]))
+     std::swap (operands[1], operands[2]);
+   else
+     emit_insn (gen_rtx_COND_EXEC (VOIDmode, cond,
+				   gen_rtx_SET (operands[0], operands[1])));
+ }
+  "
+  [(set_attr "length" "4,8,8,12")
+   (set_attr "type" "mpy<sfxtab>")])
+
+(define_insn_and_split "*<optab><mode>_ce"
+  [(cond_exec
+    (match_operator 3 "arc64_comparison_operator"
+		    [(match_operand 4 "cc_register" "") (const_int 0)])
+    (set (match_operand:GPI 0 "register_operand"  "=     r,    r,    r,     r,    r,r")
+	 (COMMUTATIVEF:GPI
+	  (match_operand:GPI 1 "nonmemory_operand" "     0,S32S0,    0,     r,S32S0,r")
+	  (match_operand:GPI 2 "nonmemory_operand" "rU06S0,    0,S32S0,rU06S0,    r,S32S0"))))]
+  "(register_operand (operands[1], <MODE>mode)
+    || register_operand (operands[2], <MODE>mode))"
+  "@
+   <mntab><sfxtab>.%m3\\t%0,%1,%2
+   <mntab><sfxtab>.%m3\\t%0,%2,%1
+   <mntab><sfxtab>.%m3\\t%0,%1,%2
+   #
+   #
+   #"
+  "&& reload_completed
+   && ((register_operand (operands[1], <MODE>mode)
+	&& (REGNO (operands[0]) != REGNO (operands[1])))
+       || (REGNO (operands[0]) != REGNO (operands[2])))"
+  [(cond_exec
+    (match_op_dup 3 [(match_dup 4) (const_int 0)])
+    (set (match_dup 0) (COMMUTATIVEF:GPI (match_dup 0) (match_dup 2))))]
+  "
+  {
+    rtx cond = gen_rtx_fmt_ee (GET_CODE (operands[3]), GET_MODE (operands[4]),
+			       operands[4], const0_rtx);
+    if (!register_operand (operands[1], <MODE>mode)
+	&& (REGNO (operands[0]) != REGNO (operands[2])))
+      std::swap (operands[1], operands[2]);
+    if (register_operand (operands[2], <MODE>mode)
+	&& (REGNO (operands[0]) == REGNO (operands[2])))
+      std::swap (operands[1], operands[2]);
+    else
+      emit_insn (gen_rtx_COND_EXEC (VOIDmode, cond,
+				    gen_rtx_SET (operands[0], operands[1])));
+  }
+  "
+  [(set_attr "length"     "4,8,8,8,12,12")
+   (set_attr "type"       "<mntab>")])
+
+(define_insn_and_split "*<ANY_EXTEND:su_optab>mulhisi3r_ce"
+  [(cond_exec
+    (match_operator 3 "arc64_comparison_operator"
+		    [(match_operand 4 "cc_register" "") (const_int 0)])
+   (set (match_operand:SI 0 "register_operand"                 "=r,r")
+	(mult:SI
+	 (ANY_EXTEND:SI (match_operand:HI 1 "register_operand" "%0,r"))
+	 (ANY_EXTEND:SI (match_operand:HI 2 "register_operand"  "r,r")))))]
+  ""
+  "@
+   mpy<ANY_EXTEND:su_optab>w.%m3\\t%0,%1,%2
+   #"
+  "reload_completed && (REGNO (operands[0]) != REGNO (operands[1]))"
+  [(cond_exec
+    (match_op_dup 3 [(match_dup 4) (const_int 0)])
+    (set (match_dup 0) (mult:SI (ANY_EXTEND:SI (match_dup 5))
+				(ANY_EXTEND:SI (match_dup 2)))))]
+  "
+ {
+   rtx cond = gen_rtx_fmt_ee (GET_CODE (operands[3]), GET_MODE (operands[4]),
+			      operands[4], const0_rtx);
+   /* Check first if the second input reg-operand is the same as the output
+      reg-operand.  */
+   if (REGNO (operands[0]) == REGNO (operands[2]))
+     {
+       std::swap (operands[1], operands[2]);
+       operands[5] = operands[1];
+     }
+   else
+     {
+       rtx tmp = simplify_gen_subreg (HImode, operands[0], SImode, 0);
+       emit_insn (gen_rtx_COND_EXEC (VOIDmode, cond,
+				     gen_rtx_SET (tmp, operands[1])));
+       operands[5] = tmp;
+     }
+ }
+  "
+  [(set_attr "length" "4,8")
+   (set_attr "type" "mpy")])
+
+(define_insn_and_split "*sign_extend<mode>si2_ce"
+  [(cond_exec
+    (match_operator 2 "arc64_comparison_operator"
+		    [(match_operand 3 "cc_register" "") (const_int 0)])
+    (set (match_operand:SI 0 "register_operand" "=r")
+	 (sign_extend:SI
+	  (match_operand:SHORT 1 "nonimmediate_operand" "r"))))]
+  ""
+  "#"
+  "reload_completed"
+  [(cond_exec
+    (match_op_dup 2 [(match_dup 3) (const_int 0)])
+    (set (match_dup 0) (ashift:SI (match_dup 1) (const_int <sexsft>))))
+   (cond_exec
+    (match_op_dup 2 [(match_dup 3) (const_int 0)])
+    (set (match_dup 0) (ashiftrt:SI (match_dup 0) (const_int <sexsft>))))]
+  "
+  operands[1] = simplify_gen_subreg (SImode, operands[1], <MODE>mode, 0);
+  "
+  [(set_attr "type" "asl")
+   (set_attr "length" "8")])
+
+;; mode:emacs-lisp
+;; comment-start: ";; "
+;; eval: (set-syntax-table (copy-sequence (syntax-table)))
+;; eval: (modify-syntax-entry ?[ "(]")
+;; eval: (modify-syntax-entry ?] ")[")
+;; eval: (modify-syntax-entry ?{ "(}")
+;; eval: (modify-syntax-entry ?} "){")
+;; eval: (setq indent-tabs-mode t)
+;; End:
diff --git a/gcc/config/arc64/constraints.md b/gcc/config/arc64/constraints.md
new file mode 100644
index 0000000000000..8e77716f9d7c1
--- /dev/null
+++ b/gcc/config/arc64/constraints.md
@@ -0,0 +1,424 @@
+;; Constraint definitions for Synopsys DesignWare ARC.
+;; Copyright (C) 2019 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Register constraints
+
+;; Register suited for short instructions.
+(define_register_constraint "q" "AC16_REGS"
+  "Registers usable in short 16-bit like instructions: @code{r0}-@code{r3},
+@code{r12}-@code{r15}")
+
+;; Register suited for sibling calls.
+(define_register_constraint "Sbreg" "SIBCALL_REGS"
+  "@internal
+   Sibling call register")
+
+(define_register_constraint "w" "ARC64_HAS_FP_BASE ? FP_REGS : NO_REGS"
+  "Floating point and SIMD vector registers.")
+
+;; Register suited for floating point instructions [r0-r31]
+(define_register_constraint "c" "CORE_REGS"
+  "@internal
+   Core register set")
+
+;; Register suited for mov_s g,h instructions like.
+;; FIXME! check if we get better code when it is register_constraint.
+(define_constraint "h"
+  "@internal
+   Register usable in short move instructions: @code{r0}-@code{r31}"
+  (and (match_code "reg")
+       (match_test "REGNO (op) < ILINK_REGNUM")))
+
+(define_constraint "accum"
+  "@internal"
+  (and (match_code "reg")
+       (match_test "REGNO (op) == R58_REGNUM")))
+
+(define_constraint "accrn"
+  "@internal"
+  (and (match_code "reg")
+       (match_test "REGNO (op) == R58_REGNUM")))
+
+; Usc constant is only used for storing long constants, hence we can
+; have only [b,s9], and [b] types of addresses.
+(define_memory_constraint "Ucnst" "@internal
+   A valid memory operand for storing constants"
+  (and (match_code "mem")
+       (match_test "!CONSTANT_P (XEXP (op, 0))")
+       (match_test "arc64_legitimate_store_address_p (mode, XEXP (op, 0))")))
+
+(define_memory_constraint "Uldms" "@internal
+  A valid memory operand for loading using short instructions"
+  (and (match_code "mem")
+       (match_test "arc64_short_access_p (op, mode, true)")))
+
+(define_memory_constraint "Ustms" "@internal
+  A valid memory operand for loading using short instructions"
+  (and (match_code "mem")
+       (match_test "arc64_short_access_p (op, mode, false)")))
+
+(define_memory_constraint "Ufpms" "@internal
+   A valid memory operand for floating point operations"
+  (and (match_code "mem")
+       (match_test "arc64_fp_access_p (op, mode)")))
+
+(define_memory_constraint "Ustor" "@internal
+   A valid memory operand for store instructions"
+  (and (match_code "mem")
+       (match_test "arc64_legitimate_store_address_p (mode, XEXP (op, 0))")))
+
+(define_memory_constraint "Ustw6" "@internal
+   A valid memory operand for restricted storing of w6 immediate"
+  (and (match_code "mem")
+       (match_test "!MEM_VOLATILE_P (op) || !TARGET_VOLATILE_DI")
+       (match_test "arc64_legitimate_store_address_p (mode, XEXP (op, 0))")))
+
+(define_constraint "Ustk<"
+  "@internal
+   Stack pre-decrement"
+  (and (match_code "mem")
+       (match_test "GET_CODE (XEXP (op, 0)) == PRE_DEC")
+       (match_test "REG_P (XEXP (XEXP (op, 0), 0))")
+       (match_test "REGNO (XEXP (XEXP (op, 0), 0)) == SP_REGNUM")))
+
+(define_constraint "Ustk>"
+  "@internal
+   Stack post-increment"
+  (and (match_code "mem")
+       (match_test "GET_CODE (XEXP (op, 0)) == POST_INC")
+       (match_test "REG_P (XEXP (XEXP (op, 0), 0))")
+       (match_test "REGNO (XEXP (XEXP (op, 0), 0)) == SP_REGNUM")))
+
+;;; Internal immediate constraint used to split move instructions.
+;(define_constraint "Cax"
+;  "@internal
+;  special const_int pattern used to split move insns"
+;  (and (match_code "const_int")
+;       (match_test "optimize_size")
+;       (match_test "arc_check_mov_const (ival)")))
+
+(define_constraint "BLsym"
+  "@internal
+  is a symbol reference allowed by the BL instruction"
+  (and (match_code "symbol_ref")
+       (match_test "!arc64_is_long_call_p (op)")))
+
+(define_constraint "U06M1"
+  "@internal
+   An unsigned 6-bit integer constant, up to 62."
+  (and (match_code "const_int")
+       (match_test "UNSIGNED_INT6 (ival + 1)")))
+
+(define_constraint "SymMV"
+  "@internal
+   Special constant/symbol which fits in limm field."
+  (and (match_code "label_ref, symbol_ref")
+       (match_test "arc64_allow_direct_access_p (op)")))
+
+(define_constraint "SymIm"
+  "@internal
+   Special constant/symbol which fits in limm field."
+  (ior (match_code "label_ref, symbol_ref")
+       (and (match_code "const_int")
+	    (match_test "SIGNED_INT32 (ival)"))))
+
+(define_constraint "S32S0"
+  "@internal
+   Special constant/symbol which fits in limm field."
+  (and (match_code "const_int")
+       (ior (match_test "UNSIGNED_INT32 (ival)")
+	    (match_test "SIGNED_INT32 (ival)"))))
+
+(define_constraint "U32S0"
+  "@internal
+   Special constant/symbol which fits in limm field."
+  (and (match_code "const_int")
+       (match_test "UNSIGNED_INT32 (ival)")))
+
+(define_constraint "S06S0" "@internal
+  A 6-bit signed integer constant"
+  (and
+    (match_code "const_int")
+    (match_test "SIGNED_INT6 (ival)")))
+
+(define_constraint "SyPic"
+  "@internal
+   Special symbol used for PIC addressing."
+  (match_code "unspec"))
+
+(define_constraint "U06Sx" "@internal
+  A 6-bit unsigned integer constant shifted by x-bit(s)"
+  (and (match_code "const_int")
+       (ior (match_test "UNSIGNED_INT9_SHIFTED (ival,3)")
+	    (match_test "UNSIGNED_INT8_SHIFTED (ival,2)")
+	    (match_test "UNSIGNED_INT7_SHIFTED (ival,1)")
+	    (match_test "UNSIGNED_INT6 (ival)"))))
+
+(define_constraint "N06Sx" "@internal
+  A negate 6-bit unsigned integer constant shifted by x-bit(s) used by add."
+  (and (match_code "const_int")
+       (match_test "ival < 0")
+       (match_test "SIGNED_INT10(ival)")
+       (ior (match_test "UNSIGNED_INT9_SHIFTED (-ival,3)")
+	    (match_test "UNSIGNED_INT8_SHIFTED (-ival,2)")
+	    (match_test "UNSIGNED_INT7_SHIFTED (-ival,1)")
+	    (match_test "UNSIGNED_INT6 (-ival)"))))
+
+(define_constraint "S12Sx" "@internal
+  A 12-bit signed integer constant shifted by x-bit(s)"
+  (and (match_code "const_int")
+       (ior (match_test "SIGNED_INT15_SHIFTED (ival,3)")
+	    (match_test "SIGNED_INT14_SHIFTED (ival,2)")
+	    (match_test "SIGNED_INT13_SHIFTED (ival,1)")
+	    (match_test "SIGNED_INT12 (ival)"))))
+
+(define_constraint "S03MV" "@internal
+  A 3-bit Integer signed constant in the interval [-1,6]"
+  (and (match_code "const_int")
+       (match_test "(ival >= -1) && (ival <= 6)")))
+
+(define_constraint "Z"
+  "Match single precision and a floating-point zero"
+  (and (match_code "const_double")
+       (ior (match_test "op == CONST0_RTX (DFmode)
+                         || op == CONST0_RTX (SFmode)"))))
+
+(define_constraint "G" "@internal
+  Match single precision and a floating-point zero"
+  (and (match_code "const_double")
+       (ior (match_test "op == CONST0_RTX (DFmode)")
+	    (match_test "GET_MODE_SIZE (GET_MODE (op)) <= 4"))))
+
+(define_constraint "U0000" "@internal
+  Match const int 0"
+  (and (match_code "const_int")
+       (match_test "ival == 0")))
+
+(define_constraint "U0001" "@internal
+  Match const int 1"
+  (and (match_code "const_int")
+       (match_test "ival == 1")))
+
+(define_constraint "U0008" "@internal
+  Match const int 8"
+  (and (match_code "const_int")
+       (match_test "ival == 8")))
+
+(define_constraint "U0016" "@internal
+  Match const int 16"
+  (and (match_code "const_int")
+       (match_test "ival == 16")))
+
+;---------------------------------------------------------
+
+(define_constraint "U06S0" "@internal
+  A 6-bit unsigned integer constant"
+  (and
+    (match_code "const_int")
+    (match_test "UNSIGNED_INT6 (ival)")))
+
+(define_constraint "U06S1" "@internal
+  A 6-bit unsigned integer constant shifted by 1-bit(s)"
+  (and
+    (match_code "const_int")
+    (match_test "UNSIGNED_INT7_SHIFTED (ival,1)")))
+
+(define_constraint "U06S2" "@internal
+  A 6-bit unsigned integer constant shifted by 2-bit(s)"
+  (and
+    (match_code "const_int")
+    (match_test "UNSIGNED_INT8_SHIFTED (ival,2)")))
+
+(define_constraint "U06S3" "@internal
+  A 6-bit unsigned integer constant shifted by 3-bit(s)"
+  (and
+    (match_code "const_int")
+    (match_test "UNSIGNED_INT9_SHIFTED (ival,3)")))
+
+(define_constraint "S12S0" "@internal
+  A 12-bit signed integer constant"
+  (and
+    (match_code "const_int")
+    (match_test "SIGNED_INT12 (ival)")))
+
+(define_constraint "S12S1" "@internal
+  A 12-bit signed integer constant shifted by 1-bit(s)"
+  (and
+    (match_code "const_int")
+    (match_test "SIGNED_INT13_SHIFTED (ival,1)")))
+
+(define_constraint "S12S2" "@internal
+  A 12-bit signed integer constant shifted by 2-bit(s)"
+  (and
+    (match_code "const_int")
+    (match_test "SIGNED_INT14_SHIFTED (ival,2)")))
+
+(define_constraint "S12S3" "@internal
+  A 12-bit signed integer constant shifted by 3-bit(s)"
+  (and
+    (match_code "const_int")
+    (match_test "SIGNED_INT15_SHIFTED (ival,3)")))
+
+(define_constraint "S03S0" "@internal
+  A 3-bit signed integer constant"
+  (and
+    (match_code "const_int")
+    (match_test "SIGNED_INT3 (ival)")))
+
+(define_constraint "U07S0" "@internal
+  A 7-bit unsigned integer constant"
+  (and
+    (match_code "const_int")
+    (match_test "UNSIGNED_INT7 (ival)")))
+
+(define_constraint "U03S0" "@internal
+  A 3-bit unsigned integer constant"
+  (and
+    (match_code "const_int")
+    (match_test "UNSIGNED_INT3 (ival)")))
+
+(define_constraint "S11S0" "@internal
+  A 11-bit signed integer constant"
+  (and
+    (match_code "const_int")
+    (match_test "SIGNED_INT11 (ival)")))
+
+(define_constraint "U05S0" "@internal
+  A 5-bit unsigned integer constant"
+  (and
+    (match_code "const_int")
+    (match_test "UNSIGNED_INT5 (ival)")))
+
+(define_constraint "S09S0" "@internal
+  A 9-bit signed integer constant"
+  (and
+    (match_code "const_int")
+    (match_test "SIGNED_INT9 (ival)")))
+
+(define_constraint "S21S0" "@internal
+  A 21-bit signed integer constant"
+  (and
+    (match_code "const_int")
+    (match_test "SIGNED_INT21 (ival)")))
+
+(define_constraint "S25S0" "@internal
+  A 25-bit signed integer constant"
+  (and
+    (match_code "const_int")
+    (match_test "SIGNED_INT25 (ival)")))
+
+(define_constraint "S10S0" "@internal
+  A 10-bit signed integer constant"
+  (and
+    (match_code "const_int")
+    (match_test "SIGNED_INT10 (ival)")))
+
+(define_constraint "S07S0" "@internal
+  A 7-bit signed integer constant"
+  (and
+    (match_code "const_int")
+    (match_test "SIGNED_INT7 (ival)")))
+
+(define_constraint "S13S0" "@internal
+  A 13-bit signed integer constant"
+  (and
+    (match_code "const_int")
+    (match_test "SIGNED_INT13 (ival)")))
+
+(define_constraint "S08S0" "@internal
+  A 8-bit signed integer constant"
+  (and
+    (match_code "const_int")
+    (match_test "SIGNED_INT8 (ival)")))
+
+(define_constraint "U10S0" "@internal
+  A 10-bit unsigned integer constant"
+  (and
+    (match_code "const_int")
+    (match_test "UNSIGNED_INT10 (ival)")))
+
+(define_constraint "U08S0" "@internal
+  A 8-bit unsigned integer constant"
+  (and
+    (match_code "const_int")
+    (match_test "UNSIGNED_INT8 (ival)")))
+
+(define_constraint "U09S0" "@internal
+  A 9-bit unsigned integer constant"
+  (and
+    (match_code "const_int")
+    (match_test "UNSIGNED_INT9 (ival)")))
+
+(define_constraint "U12S0" "@internal
+  A 16-bit unsigned integer constant"
+  (and
+    (match_code "const_int")
+    (match_test "UNSIGNED_INT12 (ival)")))
+
+(define_constraint "U16S0" "@internal
+  A 16-bit unsigned integer constant"
+  (and
+    (match_code "const_int")
+    (match_test "UNSIGNED_INT16 (ival)")))
+
+(define_constraint "S16S0" "@internal
+  A 16-bit signed integer constant"
+  (and
+    (match_code "const_int")
+    (match_test "SIGNED_INT16 (ival)")))
+
+
+; TODO: Below this line definition should be corrected
+(define_constraint "SR_R0"
+  "@internal
+   @code{R0} register."
+  (and (match_code "reg")
+       (match_test "REGNO (op) == R0_REGNUM")))
+
+(define_constraint "SR_R1"
+  "@internal
+   @code{R1} register."
+  (and (match_code "reg")
+       (match_test "REGNO (op) == R1_REGNUM")))
+
+(define_constraint "SR_SP"
+  "@internal
+   @code{SP} register."
+  (and (match_code "reg")
+       (match_test "REGNO (op) == SP_REGNUM")))
+
+; TODO: FIX THIS
+(define_constraint "SR_GP"
+  "@internal
+   @code{GP} register."
+  (and (match_code "reg")
+       (match_test "REGNO (op) == R0_REGNUM")))
+
+; TODO: FIX THIS
+(define_constraint "SRPCL"
+  "@internal
+   @code{PCL} register."
+  (and (match_code "reg")
+       (match_test "REGNO (op) == R0_REGNUM")))
+
+;; Memory constraint used for atomic ops.
+(define_memory_constraint "ATOMC"
+  "A memory with only a base register"
+  (match_operand 0 "mem_noofs_operand"))
+
diff --git a/gcc/config/arc64/elf.h b/gcc/config/arc64/elf.h
new file mode 100644
index 0000000000000..b5600fbd0c001
--- /dev/null
+++ b/gcc/config/arc64/elf.h
@@ -0,0 +1,38 @@
+/* Target macros for arc*-elf targets.
+
+   Copyright (C) 2021 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#undef DWARF2_UNWIND_INFO
+#define DWARF2_UNWIND_INFO 0
+
+/* If no specs file is enforced, default to nosys libarary.  */
+#undef LINK_GCC_C_SEQUENCE_SPEC
+#define LINK_GCC_C_SEQUENCE_SPEC				\
+  "--start-group %G %{!specs=*:%{!nolibc:-lc -lnosys}} --end-group"
+
+/* Make sure we include the crtbegin.o.  */
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC "crt0%O%s crti%O%s crtbegin%O%s"
+
+/* ...and crtend.o.  */
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC "crtend%O%s crtn%O%s"
+
+#undef UNALIGNED_ACCESS_DEFAULT
+#define UNALIGNED_ACCESS_DEFAULT 1
diff --git a/gcc/config/arc64/elf32.h b/gcc/config/arc64/elf32.h
new file mode 100644
index 0000000000000..b2e63b2ef527b
--- /dev/null
+++ b/gcc/config/arc64/elf32.h
@@ -0,0 +1,30 @@
+/* Target macros for arc32-elf targets.
+
+   Copyright (C) 2021 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#undef  LINK_SPEC
+#define LINK_SPEC "%{h*} \
+   %{static:-Bstatic}				\
+   %{shared:-shared}				\
+   %{symbolic:-Bsymbolic}			\
+   %{!static:%{rdynamic:-export-dynamic}}	\
+   %{mcpu=hs6*:-m arc64elf64}"
+
+#undef ARC64_64BIT_DEFAULT
+#define ARC64_64BIT_DEFAULT 0
diff --git a/gcc/config/arc64/elf64.h b/gcc/config/arc64/elf64.h
new file mode 100644
index 0000000000000..cf3be8fdc0be1
--- /dev/null
+++ b/gcc/config/arc64/elf64.h
@@ -0,0 +1,30 @@
+/* Target macros for arc64-elf targets.
+
+   Copyright (C) 2021 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#undef  LINK_SPEC
+#define LINK_SPEC "%{h*} \
+   %{static:-Bstatic}				\
+   %{shared:-shared}				\
+   %{symbolic:-Bsymbolic}			\
+   %{!static:%{rdynamic:-export-dynamic}}	\
+   %{mcpu=hs5*:-m arc64elf32}"
+
+#undef ARC64_64BIT_DEFAULT
+#define ARC64_64BIT_DEFAULT 1
diff --git a/gcc/config/arc64/hs6x.md b/gcc/config/arc64/hs6x.md
new file mode 100644
index 0000000000000..51410584aacf3
--- /dev/null
+++ b/gcc/config/arc64/hs6x.md
@@ -0,0 +1,92 @@
+;; DFA scheduling description of the Synopsys ARCv3 HS6x cpu
+;; for GNU C compiler
+;; Copyright (C) 2021 Free Software Foundation, Inc.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "HS6x")
+
+(define_cpu_unit "ALU0"    "HS6x")
+(define_cpu_unit "ALU1"    "HS6x")
+(define_cpu_unit "MPY32"   "HS6x")
+(define_cpu_unit "MPY64"   "HS6x")
+(define_cpu_unit "DIV"     "HS6x")
+(define_cpu_unit "DMP"     "HS6x")
+;;(define_cpu_unit "DMP_FPU" "HS6x")
+;;(define_cpu_unit "SP_FPU"  "HS6x")
+;;(define_cpu_unit "DP_FPU"  "HS6x")
+
+;; Instruction reservation for arithmetic instructions (pipe A, pipe B).
+(define_insn_reservation "alu_arith" 3
+  (eq_attr "type" "abs, adcl, add, addhl, addl, and, andl, asl, asll,
+		   asr, asrl, bclr, bic, bmsk, bset, bsetl, btst,
+		   bxor, bxorl, cmp, ext, ffs, fls, lsr, lsrl, max,
+		   maxl, min, minl, move, movecc, neg, nop, norm,
+		   normh, norml, not, notl, or, orl, rol, ror, sbcl,
+		   setcc, sex, sub, subl, swap, swape, swapel, swapl,
+		   tst, vadd, vpack, vsub, xbfu, xor, xorl")
+  "(ALU0 | ALU1), nothing*2")
+
+(define_insn_reservation "jmp_insn" 1
+  (eq_attr "type" "bbit, bi, bl, branch, branchcc, brcc, dbnz, jl,
+		   jump, return, trap")
+  "ALU0")
+
+(define_insn_reservation "div_insn" 12
+  (eq_attr "type" "div, divl, mod, modl, udiv, udivl, umod, umodl")
+  "ALU0 + DIV, DIV*11")
+
+(define_insn_reservation "mpy32_insn" 6
+  (eq_attr "type" "dmpywh, mac, mpy, qmach, qmpyh, vmac2h, vmpy2h")
+  "ALU0 + MPY32 + MPY64, nothing*5")
+
+(define_insn_reservation "mpy64_insn" 9
+  (eq_attr "type" "mpyl")
+  "ALU0 + MPY32 + MPY64, MPY64*3, nothing*5")
+
+(define_insn_reservation "load_insn" 3
+  (eq_attr "type" "atldlop, atldop, ex, ld, llock")
+  "ALU1 + DMP, nothing*2")
+
+(define_insn_reservation "store_insn" 1
+  (eq_attr "type" "scond, st")
+  "ALU1 + DMP")
+
+(define_insn_reservation "core" 1
+  (eq_attr "type" "block, brk, dmb, flag, lr, sr, sync")
+  "ALU0 + ALU1 + DMP + MPY32 + MPY64 + DIV")
+
+;; Bypasses
+(define_bypass 1 "load_insn" "alu_arith")
+(define_bypass 1 "load_insn" "mpy*_insn")
+(define_bypass 1 "load_insn" "store_insn" "store_data_bypass_p")
+(define_bypass 2 "load_insn" "load_insn")
+(define_bypass 1 "load_insn" "div_insn")
+
+;;(define_bypass 5 "mpy64_insn" "alu_arith")
+(define_bypass 6 "mpy64_insn" "mpy*_insn")
+(define_bypass 6 "mpy64_insn" "store_insn" "store_data_bypass_p")
+(define_bypass 6 "mpy64_insn" "div_insn")
+
+(define_bypass 3 "mpy32_insn" "mpy*_insn")
+(define_bypass 3 "mpy32_insn" "div_insn")
+
+(define_bypass 1 "alu_arith" "mpy*_insn" "!accumulator_bypass_p")
+(define_bypass 1 "alu_arith" "div_insn")
+(define_bypass 1 "alu_arith" "store_insn" "store_data_bypass_p")
+
+(define_bypass 1 "alu_arith" "alu_arith" "set_accumulator_p")
diff --git a/gcc/config/arc64/linux.h b/gcc/config/arc64/linux.h
new file mode 100644
index 0000000000000..9fad94ccfe763
--- /dev/null
+++ b/gcc/config/arc64/linux.h
@@ -0,0 +1,91 @@
+/* Target macros for arc*-*-linux targets.
+
+   Copyright (C) 2021 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Enable DWARF 2 exceptions.  */
+#undef DWARF2_UNWIND_INFO
+#define DWARF2_UNWIND_INFO 1
+
+#define TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+      GNU_USER_TARGET_OS_CPP_BUILTINS ();	\
+    }						\
+  while (0)
+
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC							\
+  LINUX_OR_ANDROID_LD (GNU_USER_TARGET_STARTFILE_SPEC, ANDROID_STARTFILE_SPEC)
+
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC							\
+  LINUX_OR_ANDROID_LD (GNU_USER_TARGET_ENDFILE_SPEC, ANDROID_ENDFILE_SPEC)
+
+#undef LIB_SPEC
+#define LIB_SPEC  \
+  "%{pthread:-lpthread} \
+   %{shared:-lc} \
+   %{!shared:%{profile:-lc_p}%{!profile:-lc}}"
+
+#undef TARGET_ASM_FILE_END
+#define TARGET_ASM_FILE_END file_end_indicate_exec_stack
+
+/* We do not have any MULTILIB_OPTIONS specified, so there are no
+   MULTILIB_DEFAULTS.  */
+#undef  MULTILIB_DEFAULTS
+
+#undef SUBTARGET_CPP_SPEC
+#define SUBTARGET_CPP_SPEC "\
+   %{pthread:-D_REENTRANT} \
+"
+
+#if DEFAULT_LIBC == LIBC_GLIBC
+/* Override linux.h LINK_EH_SPEC definition.
+   Signalize that because we have fde-glibc, we don't need all C shared libs
+   linked against -lgcc_s.  */
+#undef LINK_EH_SPEC
+#define LINK_EH_SPEC "--eh-frame-hdr "
+#endif
+
+/* Clear the instruction cache from `beg' to `end'.  This makes an
+   inline system call to SYS_cacheflush.  */
+#undef CLEAR_INSN_CACHE
+#define CLEAR_INSN_CACHE(beg, end)					\
+{									\
+  register unsigned long _beg __asm ("r0") = (unsigned long) (beg);	\
+  register unsigned long _end __asm ("r1") = (unsigned long) (end);	\
+  register unsigned long _xtr __asm ("r2") = 0;				\
+  register unsigned long _scno __asm ("r8") = 244;			\
+  __asm __volatile ("trap_s 0		; sys_cache_sync"		\
+		    : "=r" (_beg)					\
+		    : "0" (_beg), "r" (_end), "r" (_xtr), "r" (_scno));	\
+}
+
+/* Emit rtl for profiling.  Output assembler code to FILE
+   to call "_mcount" for profiling a function entry.  */
+#define PROFILE_HOOK(LABEL)					\
+  {								\
+   rtx fun, rt;							\
+   rt = get_hard_reg_initial_val (Pmode, BLINK_REGNUM);		\
+   fun = gen_rtx_SYMBOL_REF (Pmode, "_mcount");			\
+   emit_library_call (fun, LCT_NORMAL, VOIDmode, rt, Pmode);	\
+  }
+
+#undef UNALIGNED_ACCESS_DEFAULT
+#define UNALIGNED_ACCESS_DEFAULT 1
diff --git a/gcc/config/arc64/linux32.h b/gcc/config/arc64/linux32.h
new file mode 100644
index 0000000000000..deeb6823afca9
--- /dev/null
+++ b/gcc/config/arc64/linux32.h
@@ -0,0 +1,37 @@
+/* Target macros for arc32-*-linux targets.
+
+   Copyright (C) 2021 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define GLIBC_DYNAMIC_LINKER "/lib/ld-linux-arc32.so.2"
+
+/* Note that the default is to link against dynamic libraries, if they are
+   available.  Override with -static.  */
+#undef LINK_SPEC
+#define LINK_SPEC "%{h*} \
+  %{static:-Bstatic}					    \
+  %{shared:-shared}					    \
+  %{symbolic:-Bsymbolic}				    \
+  %{!static:						    \
+    %{rdynamic:-export-dynamic}				    \
+    %{!shared:-dynamic-linker " GNU_USER_DYNAMIC_LINKER "}} \
+  %{mcpu=hs6*:-m arc64linux64}				    \
+  -X "
+
+#undef ARC64_64BIT_DEFAULT
+#define ARC64_64BIT_DEFAULT 0
diff --git a/gcc/config/arc64/linux64.h b/gcc/config/arc64/linux64.h
new file mode 100644
index 0000000000000..05a67f3a6c59c
--- /dev/null
+++ b/gcc/config/arc64/linux64.h
@@ -0,0 +1,37 @@
+/* Target macros for arc64-*-linux targets.
+
+   Copyright (C) 2021 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define GLIBC_DYNAMIC_LINKER "/lib/ld-linux-arc64.so.2"
+
+/* Note that the default is to link against dynamic libraries, if they are
+   available.  Override with -static.  */
+#undef LINK_SPEC
+#define LINK_SPEC "%{h*} \
+  %{static:-Bstatic}					    \
+  %{shared:-shared}					    \
+  %{symbolic:-Bsymbolic}				    \
+  %{!static:						    \
+    %{rdynamic:-export-dynamic}				    \
+    %{!shared:-dynamic-linker " GNU_USER_DYNAMIC_LINKER "}} \
+  %{mcpu=hs5*:-m arc64linux32}				    \
+  -X "
+
+#undef ARC64_64BIT_DEFAULT
+#define ARC64_64BIT_DEFAULT 1
diff --git a/gcc/config/arc64/predicates.md b/gcc/config/arc64/predicates.md
new file mode 100644
index 0000000000000..2d518f9cd795c
--- /dev/null
+++ b/gcc/config/arc64/predicates.md
@@ -0,0 +1,270 @@
+;; Machine description for arc64 architecture.
+;; Copyright (C) 2019 Free Software Foundation, Inc.
+;; Contributed by Claudiu Zissulescu
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Place holder for mov operand predicate
+(define_predicate "arc64_movl_operand"
+  (and (match_code "unspec,reg, subreg, mem, const, const_int, symbol_ref, label_ref")
+       (ior (match_operand 0 "register_operand")
+	    (and (ior (match_code "label_ref")
+		      (match_code "symbol_ref"))
+		 (match_test "arc64_allow_direct_access_p (op)"))
+	    (match_operand 0 "memory_operand")
+	    (and (match_code "unspec")
+		 (ior (match_test "XINT (op,1) == ARC64_UNSPEC_PCREL")
+		      (match_test "XINT (op,1) == ARC64_UNSPEC_TLS_GD")))
+	    (and (match_code "const_int")
+		 (ior (match_test "UNSIGNED_INT32 (INTVAL (op))")
+		      (match_test "SIGNED_INT32 (INTVAL (op))"))))))
+
+(define_predicate "arc64_movf_operand"
+  (and (match_code "reg, subreg, mem, const, const_double")
+       (ior (match_operand 0 "register_operand")
+	    (match_operand 0 "memory_operand")
+	    (and (match_code "const_double")
+		 (ior (match_test "GET_MODE_SIZE (GET_MODE (op)) <= 4")
+		      (match_test "op == CONST0_RTX (DFmode)"))))))
+
+;; A restricted version of the above, still accepting symbols and label refs.
+(define_predicate "arc64_regsym_operand"
+  (ior (match_operand 0 "register_operand")
+       (and (ior (match_code "label_ref")
+		 (match_code "symbol_ref"))
+	    (match_test "arc64_allow_direct_access_p (op)"))
+       (and (match_code "const_int")
+	    (ior (match_test "UNSIGNED_INT32 (INTVAL (op))")
+		 (match_test "SIGNED_INT32 (INTVAL (op))")))))
+
+(define_predicate "arc64_nonmem_operand"
+  (ior (match_operand 0 "register_operand")
+       (and (match_code "const_int")
+	    (ior (match_test "UNSIGNED_INT32 (INTVAL (op))")
+		 (match_test "SIGNED_INT32 (INTVAL (op))")))))
+
+(define_predicate "arc64_reg_or_unsig_operand"
+  (ior (match_operand 0 "register_operand")
+       (and (match_code "const_int")
+	    (match_test "UNSIGNED_INT32 (INTVAL (op))")
+	    (match_test "INTVAL (op) > 0"))))
+
+;; Used for HIGH or LO_SUM patterns
+(define_predicate "arc64_immediate_or_pic"
+  (ior (match_operand 0 "immediate_operand")
+       (match_code "unspec")))
+
+;; Acceptable arguments for the call insn.
+(define_predicate "arc64_call_insn_operand"
+  (ior (and (match_code "symbol_ref")
+	    (match_test "!arc64_is_long_call_p (op)"))
+       (match_operand 0 "nonmemory_operand")))
+
+; to be used by <op>.f instructions
+(define_special_predicate "cc_compare_operator"
+  (match_code "compare")
+  {
+   return GET_MODE (op) == CC_Cmode
+	  || GET_MODE (op) == CC_Vmode;
+  })
+
+; to be used for b{eq/ne}_s instructions.
+(define_predicate "equality_comparison_operator"
+  (match_code "eq, ne")
+  {
+   machine_mode opmode = GET_MODE (XEXP (op, 0));
+   return (opmode != CC_Vmode);
+  })
+
+
+; to be used for b{eq/ne/...}_s instructions.
+(define_predicate "ccmode_comparison_operator"
+  (match_code "eq, ne, gt, ge, lt, le, gtu, geu, ltu, leu,
+	       unlt, unle, unge, ungt")
+  {
+   enum rtx_code code = GET_CODE (op);
+
+   switch (GET_MODE (XEXP (op, 0)))
+   {
+     case E_CC_FPUmode:
+     case E_CC_FPUEmode:
+     case E_CCmode:
+       return 1;
+
+     case E_CC_ZNmode:
+       return (code == EQ || code == NE);
+
+     default:
+       return 0;
+     }
+   })
+
+(define_predicate "core_register_operand"
+  (match_code "reg,subreg")
+  {
+   if (GET_CODE (op) == SUBREG)
+     op = SUBREG_REG (op);
+   return (REG_P (op)
+	   && (REGNO (op) <= BLINK_REGNUM
+	       || (REGNO (op)) >= FIRST_PSEUDO_REGISTER));
+  })
+
+
+;; True for integer comparisons and for FP comparisons other then LTGT or UNEQ
+(define_special_predicate "arc64_comparison_operator"
+  (match_code "eq, ne, le, lt, ge, gt, geu, gtu, leu, ltu, unordered,
+	       ordered, unlt, unle, unge, ungt"))
+
+(define_special_predicate "cc_register"
+  (match_code "reg")
+{
+  if (mode == VOIDmode)
+    {
+      mode = GET_MODE (op);
+      if (GET_MODE_CLASS (mode) != MODE_CC)
+	return FALSE;
+    }
+
+  if (mode == GET_MODE (op) && GET_CODE (op) == REG && REGNO (op) == CC_REGNUM)
+    return TRUE;
+
+  return FALSE;
+})
+
+;; Return TRUE if OP is a shimm 6bit value
+(define_predicate "S06S0_immediate_operand"
+  (and (match_code "const_int")
+       (match_test "SIGNED_INT6 (INTVAL (op))"))
+)
+
+(define_predicate "vectdup_immediate_operand"
+  (and (match_code "const_int")
+       (ior (match_test "SIGNED_INT6 (INTVAL (op))")
+	    (match_test "UNSIGNED_INT12 (INTVAL (op))")))
+)
+
+;; Return true if OP is a MEM that when used as a load or store address will
+;; require an 8 byte insn.
+;; Load and store instructions don't allow the same possibilities but they're
+;; similar enough that this one function will do.
+;; This is currently only used when calculating length attributes.  */
+(define_predicate "limm_ldst_operand"
+  (and (match_code "mem")
+       (match_test "arc64_limm_addr_p (op)")))
+
+;; Allows only 1,2,3 values.  It is used with add/sub shifted operations.
+(define_predicate "_1_2_3_operand"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) == 1 || INTVAL (op) == 2 || INTVAL (op) == 3"))
+)
+
+;; Allows only 2,4,8 values.  It is used with add/sub shifted operations.
+(define_predicate "_2_4_8_operand"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8"))
+)
+
+;; Return TRUE if OP can be used as a destination for any move
+;; (mov,st,ld) instruction.
+(define_predicate "arc64_dest_operand"
+  (match_code "reg, subreg, mem")
+  {
+   if (MEM_P (op))
+      return arc64_legitimate_store_address_p (mode, XEXP (op, 0));
+   return nonimmediate_operand (op, mode);
+  })
+
+(define_predicate "mem_noofs_operand"
+  (and (match_code "mem")
+       (match_code "reg" "0")))
+
+;; Used by vector floating point instructions.
+(define_predicate "arc64_fsimd_register"
+  (match_code "reg,subreg")
+  {
+    if (GET_CODE (op) == SUBREG)
+      op = SUBREG_REG (op);
+
+    if (REGNO (op) >= FIRST_PSEUDO_REGISTER)
+      return 1;
+
+    /* Check if it is a register. */
+    if (!REG_P (op))
+      return 0;
+
+    /* FIXME! check: REGNO_REG_CLASS (REGNO (op)) != FP_REGS */
+
+    /* Return true/false depending on the SIMD length.  */
+    switch (mode)
+      {
+	/* All vector modes equal with the size of a fp-register.  */
+      case E_V2SFmode:
+      case E_V4HFmode:
+      case E_V2HFmode:
+	return 1;
+
+	/* All vector modes double the size of a fp-register.  */
+      case E_V8HFmode:
+      case E_V4SFmode:
+      case E_V2DFmode:
+	return (ARC64_VFP_128 && ((REGNO (op) & 0x01) == 0));
+
+      default:
+	gcc_unreachable ();
+      }
+  })
+
+(define_predicate "arc64_fsimd_moperand"
+  (ior (match_operand 0 "arc64_fsimd_register")
+       (match_operand 0 "memory_operand")))
+
+(define_predicate "short_immediate_operand"
+  (and (match_code "const_int")
+       (match_test "SIGNED_INT16 (INTVAL (op))")))
+
+(define_predicate "unsign_immediate_operand"
+  (and (match_code "const_int")
+       (match_test "UNSIGNED_INT16 (INTVAL (op))")))
+
+(define_predicate "usigned32b_operand"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) > 0")
+       (match_test "UNSIGNED_INT32 (INTVAL (op))")))
+
+(define_predicate "signed32b_operand"
+  (and (match_code "const_int")
+       (match_test "SIGNED_INT32 (INTVAL (op))")))
+
+(define_predicate "bbitimm_operand"
+  (and (match_code "const_int")
+       (match_test "IS_POWEROF2_P (INTVAL (op))")))
+
+(define_special_predicate "brcc_comparison_operator"
+  (match_code "eq, ne, lt, ge, ltu, geu"))
+
+(define_special_predicate "ebrcc_comparison_operator"
+  (match_code "gt, gtu, le, leu"))
+
+;; Return true if the symbol requires a @plt34 reloc
+(define_predicate "plt34_symbol_p"
+  (and (match_code "symbol_ref")
+       (match_test "arc64_use_plt34_p (op)")))
+
+;; Return true if OP a (const_int 0) operand.
+(define_predicate "const0_operand"
+  (and (match_code "const_int")
+       (match_test "op == CONST0_RTX (mode)")))
diff --git a/gcc/config/arc64/t-arc64 b/gcc/config/arc64/t-arc64
new file mode 100644
index 0000000000000..ded54fda4a419
--- /dev/null
+++ b/gcc/config/arc64/t-arc64
@@ -0,0 +1,31 @@
+# GCC Makefile fragment for Synopsys ARC64.
+#
+# Copyright (C) 2021 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify it under the
+# terms of the GNU General Public License as published by the Free Software
+# Foundation; either version 3, or (at your option) any later version.
+#
+# GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+# details.
+#
+# You should have received a copy of the GNU General Public License along
+# with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+TM_H += $(srcdir)/config/arc64/arc64-c.def
+
+arc64-c.o: $(srcdir)/config/arc64/arc64-c.cc $(CONFIG_H) $(SYSTEM_H) \
+$(TREE_H) $(TM_H) $(TM_P_H) coretypes.h
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+		$(srcdir)/config/arc64/arc64-c.cc
+
+PASSES_EXTRA += $(srcdir)/config/arc64/arc64-passes.def
+
+# Local Variables:
+# mode: Makefile
+# End:
diff --git a/gcc/config/arc64/t-multilib b/gcc/config/arc64/t-multilib
new file mode 100644
index 0000000000000..efae736b74947
--- /dev/null
+++ b/gcc/config/arc64/t-multilib
@@ -0,0 +1,27 @@
+# Copyright (C) 2021 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 3, or (at your option) any later
+# version.
+#
+# GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+# for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+MULTILIB_OPTIONS = mfpu=fpus/mfpu=fpud/m128/mcpu=hs5x/mcpu=hs58
+
+MULTILIB_DIRNAMES = fpus fpud m128 hs5x hs58
+
+# Using m128 will default to standard implementation of memcpy/memset
+MULTILIB_REUSE = mfpu.fpud=mfpu.fpud/m128
+MULTILIB_REUSE += mfpu.fpus=mfpu.fpus/m128
+
+MULTILIB_MATCHES = m128=mcpu?hs68
diff --git a/gcc/config/arc64/t-multilib32 b/gcc/config/arc64/t-multilib32
new file mode 100644
index 0000000000000..93bc328a2f114
--- /dev/null
+++ b/gcc/config/arc64/t-multilib32
@@ -0,0 +1,21 @@
+# Copyright (C) 2021 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 3, or (at your option) any later
+# version.
+#
+# GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+# for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+MULTILIB_OPTIONS = mcpu=hs6x
+
+MULTILIB_DIRNAMES = hs6x
diff --git a/gcc/configure b/gcc/configure
index 38bec5c80df4a..29c4121e4a73b 100755
--- a/gcc/configure
+++ b/gcc/configure
@@ -25435,10 +25435,16 @@ foo:	.long	25
 	lda	$4,foo($29)			!tprel'
 	tls_as_opt=--fatal-warnings
 	;;
-  arc*-*-*)
+  arc-*-*|arc[eb]*-*-*)
     conftest_s='
 	add_s r0,r0, @foo@tpoff'
 	;;
+  arc64-*-*)
+    conftest_s='
+	addl r0,r0, @foo@tpoff'
+	tls_first_major=2
+	tls_first_minor=23
+	;;
   cris-*-*|crisv32-*-*)
     conftest_s='
 	.section ".tdata","awT",@progbits
@@ -29448,9 +29454,9 @@ esac
 # ??? Once 2.11 is released, probably need to add first known working
 # version to the per-target configury.
 case "$cpu_type" in
-  aarch64 | alpha | arc | arm | avr | bfin | cris | csky | i386 | loongarch | m32c \
-  | m68k | microblaze | mips | nds32 | nios2 | pa | riscv | rs6000 | score | sparc \
-  | tilegx | tilepro | visium | xstormy16 | xtensa)
+aarch64 | alpha | arc | arc64 | arm | avr | bfin | cris | csky | i386 | loongarch \
+  | m32c | m68k | microblaze | mips | nds32 | nios2 | pa | riscv | rs6000 | score \
+  | sparc | tilegx | tilepro | visium | xstormy16 | xtensa)
     insn="nop"
     ;;
   ia64 | s390)
diff --git a/gcc/configure.ac b/gcc/configure.ac
index 23bee7010a37b..b2470a01c506b 100644
--- a/gcc/configure.ac
+++ b/gcc/configure.ac
@@ -3703,10 +3703,16 @@ foo:	.long	25
 	lda	$4,foo($29)			!tprel'
 	tls_as_opt=--fatal-warnings
 	;;
-  arc*-*-*)
+  arc-*-*|arc[eb]*-*-*)
     conftest_s='
 	add_s r0,r0, @foo@tpoff'
 	;;
+  arc64-*-*)
+    conftest_s='
+	addl r0,r0, @foo@tpoff'
+	tls_first_major=2
+	tls_first_minor=23
+	;;
   cris-*-*|crisv32-*-*)
     conftest_s='
 	.section ".tdata","awT",@progbits
@@ -5519,9 +5525,9 @@ esac
 # ??? Once 2.11 is released, probably need to add first known working
 # version to the per-target configury.
 case "$cpu_type" in
-  aarch64 | alpha | arc | arm | avr | bfin | cris | csky | i386 | loongarch | m32c \
-  | m68k | microblaze | mips | nds32 | nios2 | pa | riscv | rs6000 | score | sparc \
-  | tilegx | tilepro | visium | xstormy16 | xtensa)
+  aarch64 | alpha | arc | arc64 | arm | avr | bfin | cris | csky | i386 | loongarch \
+  | m32c | m68k | microblaze | mips | nds32 | nios2 | pa | riscv | rs6000 | score \
+  | sparc | tilegx | tilepro | visium | xstormy16 | xtensa)
     insn="nop"
     ;;
   ia64 | s390)
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index ff6c338bedb69..8bd7150b55c8e 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -786,6 +786,11 @@ Objective-C and Objective-C++ Dialects}.
 -munalign-prob-threshold=@var{probability}  -mmpy-option=@var{multo} @gol
 -mdiv-rem  -mcode-density  -mll64  -mfpu=@var{fpu}  -mrf16  -mbranch-index}
 
+@emph{ARC64 Options}
+@gccoptlist{-mcmodel=@var{code-model} -matomic=@var{opt} -mfpu=@var{fpu} @gol
+-mdiv-rem -mbitscan -mcpu=@var{cpu} -munaligned-access -mvolatile-di @gol
+-mcode-density -msimd -mwide -m128 -mbrcc}
+
 @emph{ARM Options}
 @gccoptlist{-mapcs-frame  -mno-apcs-frame @gol
 -mabi=@var{name} @gol
@@ -18911,6 +18916,7 @@ platform.
 * Adapteva Epiphany Options::
 * AMD GCN Options::
 * ARC Options::
+* ARC64 Options::
 * ARM Options::
 * AVR Options::
 * Blackfin Options::
@@ -20464,6 +20470,82 @@ Replaced by @option{-mmultcost}.
 
 @end table
 
+@node ARC64 Options
+@subsection ARC64 Options
+@cindex ARC64 options
+
+These @samp{-m} options are defined for the ARC64 port:
+
+@table @gcctabopt
+@item -mcmodel=@var{code-model}
+@opindex mcmodel
+Select the desired code model to be generated. The possible values are
+@samp{small} for programs and its statically defined symbols which
+should be within 1MB of each other. @samp{medium} is the default model
+suited for programs and symbols within 4GB of each other. And
+@samp{large} which makes no assumptions about addresses and sizes of
+sections.
+
+@item -matomic=@var{opt}
+@opindex matomic
+Enable specific Atomic Memory Operations (AMO). The argument @var{opt}
+takes the value @samp{0}, @samp{1}, or @samp{2} to assign no, legacy
+@samp{ARCv2}, and new @samp{ARCv3} AMOs.
+
+@item -mfpu=@var{fpu}
+@opindex mfpu
+Enable support for ARCv3 floating point vector unit. Possible
+arguments are @samp{none}, @samp{fpus}, or @samp{fpud} to disable the
+unit (default), using only simple precision floating point unit, and
+use double precision floating point uint.
+
+@item -mdiv-rem
+@opindex mdiv-rem
+Enable 32bit or 64bit div/rem integer instructions.  This option is
+enabled by default.
+
+@item -mbitscan
+@opindex mbitscan
+Enable @code{NORM}, @code{NORMH}, @code{FFS}, @code{FLS},
+@code{NORML}, @code{FFSL}, and @code{FLSL} bitscan instructions.  This
+option is enabled by default.
+
+@item -mcpu=@var{cpu}
+@opindex mcpu
+Compile for specific ARCv3 CPU. Possible value is @samp{hs6x} which is
+also the default CPU.
+
+@item -munaligned-access
+@opindex munaligned-access
+Enable unaligned access for packed data.
+
+@item -mvolatile-di
+@opindex mvolatile-di
+Enable uncached access for volatile memories.
+
+@item -mcode-density
+@opindex mcode-density
+Enable code-density instructions.
+
+@item -msimd
+@opindex msimd
+Enable integer SIMD extension instructions.
+
+@item -mwide
+@opindex mwide
+Enable 128 bit wide floating point SIMD vector.
+
+@item -m128
+@opindex m128
+Enable 128bit load/store instructions.  This option is enabled by
+default.
+
+@item -mbrcc
+@opindex brcc
+Enable BRcc instructions in RTL combiner.
+
+@end table
+
 @node ARM Options
 @subsection ARM Options
 @cindex ARM options
diff --git a/gcc/testsuite/gcc.c-torture/compile/20000804-1.c b/gcc/testsuite/gcc.c-torture/compile/20000804-1.c
index 95bb0fafd7044..2af2311cbad79 100644
--- a/gcc/testsuite/gcc.c-torture/compile/20000804-1.c
+++ b/gcc/testsuite/gcc.c-torture/compile/20000804-1.c
@@ -4,6 +4,7 @@
 /* { dg-skip-if "" { { i?86-*-* x86_64-*-* } && { ia32 && { ! nonpic } } } } */
 /* { dg-skip-if "No 64-bit registers" { m32c-*-* } } */
 /* { dg-skip-if "Not enough 64-bit registers" { pdp11-*-* } { "-O0" } { "" } } */
+/* { dg-skip-if "No 64-bit registers" { arc32-*-* } } */
 /* { dg-xfail-if "Inconsistent constraint on asm" { csky-*-* } { "-O0" } { "" } } */
 /* { dg-xfail-if "Inconsistent constraint on asm" { bpf-*-* } { "-O0" } { "" } } */
 /* { dg-xfail-if "" { h8300-*-* } } */
diff --git a/gcc/testsuite/gcc.c-torture/execute/20101011-1.c b/gcc/testsuite/gcc.c-torture/execute/20101011-1.c
index d2c0f9ab7ece3..a17ba8d2160e3 100644
--- a/gcc/testsuite/gcc.c-torture/execute/20101011-1.c
+++ b/gcc/testsuite/gcc.c-torture/execute/20101011-1.c
@@ -63,7 +63,7 @@
 /* By default we emit a sequence with DIVU, which "never signals an
    exceptional condition, even when dividing by zero".  */
 # define DO_TEST 0
-#elif defined (__arc__)
+#elif defined (__arc__) || defined (__ARC64__)
   /* No SIGFPE for ARC integer division.  */
 # define DO_TEST 0
 #elif defined (__arm__) && defined (__ARM_EABI__)
diff --git a/gcc/testsuite/gcc.dg/builtin-apply2.c b/gcc/testsuite/gcc.dg/builtin-apply2.c
index 0f350f4ac16a2..69835bc299a71 100644
--- a/gcc/testsuite/gcc.dg/builtin-apply2.c
+++ b/gcc/testsuite/gcc.dg/builtin-apply2.c
@@ -1,7 +1,7 @@
 /* { dg-do run } */
 /* { dg-require-effective-target untyped_assembly } */
 /* { dg-skip-if "Variadic funcs have all args on stack. Normal funcs have args in registers." { "avr-*-* nds32*-*-* amdgcn-*-*" } } */
-/* { dg-skip-if "Variadic funcs use different argument passing from normal funcs." { "csky*-*-* riscv*-*-* or1k*-*-* msp430-*-* pru-*-* loongarch*-*-*" } } */
+/* { dg-skip-if "Variadic funcs use different argument passing from normal funcs." { "arc64*-*-* csky*-*-* riscv*-*-* or1k*-*-* msp430-*-* pru-*-* loongarch*-*-*" } } */
 /* { dg-skip-if "Variadic funcs use Base AAPCS.  Normal funcs use VFP variant." { arm*-*-* && arm_hf_eabi } } */
 
 /* PR target/12503 */
diff --git a/gcc/testsuite/gcc.dg/stack-usage-1.c b/gcc/testsuite/gcc.dg/stack-usage-1.c
index 21cce0f440c53..9bf6552f9b811 100644
--- a/gcc/testsuite/gcc.dg/stack-usage-1.c
+++ b/gcc/testsuite/gcc.dg/stack-usage-1.c
@@ -13,6 +13,10 @@
 #  define SIZE 256 /* No frame pointer for leaf functions (default) */
 #elif defined(__arc__)
 #  define SIZE (256-4)
+#elif defined(__ARC64_ARCH32__)
+#  define SIZE (256-4)  /* Frame pointer is saved onto the stack.  */
+#elif defined(__ARC64_ARCH64__)
+#  define SIZE (256-8)  /* Frame pointer is saved onto the stack.  */
 #elif defined(__i386__)
 #  define SIZE 248
 #elif defined(__x86_64__)
diff --git a/gcc/testsuite/gcc.dg/torture/stackalign/builtin-apply-2.c b/gcc/testsuite/gcc.dg/torture/stackalign/builtin-apply-2.c
index 552ca1433f4f1..e2190101bf837 100644
--- a/gcc/testsuite/gcc.dg/torture/stackalign/builtin-apply-2.c
+++ b/gcc/testsuite/gcc.dg/torture/stackalign/builtin-apply-2.c
@@ -9,7 +9,7 @@
 /* arm_hf_eabi: Variadic funcs use Base AAPCS.  Normal funcs use VFP variant.
    avr: Variadic funcs don't pass arguments in registers, while normal funcs
         do.  */
-/* { dg-skip-if "Variadic funcs use different argument passing from normal funcs" { arm_hf_eabi || { csky*-*-* avr-*-* riscv*-*-* or1k*-*-* msp430-*-* amdgcn-*-* pru-*-* loongarch*-*-* } } } */
+/* { dg-skip-if "Variadic funcs use different argument passing from normal funcs" { arm_hf_eabi || { arc64-*-* csky*-*-* avr-*-* riscv*-*-* or1k*-*-* msp430-*-* amdgcn-*-* pru-*-* loongarch*-*-* } } } */
 /* { dg-skip-if "Variadic funcs have all args on stack. Normal funcs have args in registers." { nds32*-*-* } { v850*-*-* } } */
 /* { dg-require-effective-target untyped_assembly } */
    
diff --git a/gcc/testsuite/gcc.dg/torture/vshuf-4.inc b/gcc/testsuite/gcc.dg/torture/vshuf-4.inc
index fb35df809c890..2b846c2bdf9f2 100644
--- a/gcc/testsuite/gcc.dg/torture/vshuf-4.inc
+++ b/gcc/testsuite/gcc.dg/torture/vshuf-4.inc
@@ -139,4 +139,18 @@ T (223,	0, 5, 3, 4) \
 T (224,	2, 3, 1, 4) \
 T (225,	2, 3, 5, 1) \
 T (226,	4, 3, 1, 0) \
-T (227,	2, 3, 5, 5)
+T (227,	2, 3, 5, 5) \
+T (228,	1, 1, 1, 1) \
+T (229,	2, 2, 2, 2) \
+T (230,	3, 3, 3, 3) \
+T (231,	0, 2, 0, 2) \
+T (232,	0, 2, 4, 6) \
+T (233,	1, 3, 1, 3) \
+T (234,	1, 3, 5, 7) \
+T (235,	4, 6, 0, 2) \
+T (236,	5, 7, 1, 3) \
+T (237,	1, 0, 3, 2) \
+T (238,	0, 1, 0, 1) \
+T (239,	2, 3, 2, 3) \
+T (240,	2, 3, 6, 7) \
+
diff --git a/gcc/testsuite/gcc.target/arc/arc.exp b/gcc/testsuite/gcc.target/arc/arc.exp
index f61a6f548ecf8..45723edc0ca3f 100644
--- a/gcc/testsuite/gcc.target/arc/arc.exp
+++ b/gcc/testsuite/gcc.target/arc/arc.exp
@@ -17,7 +17,7 @@
 # GCC testsuite that uses the `dg.exp' driver.
 
 # Exit immediately if this isn't an arc target.
-if ![istarget arc*-*-*] then {
+if { ![istarget arc-*-*] && ![istarget arceb-*-*] } then {
   return
 }
 
diff --git a/gcc/testsuite/gcc.target/arc64/addsubx-1.c b/gcc/testsuite/gcc.target/arc64/addsubx-1.c
new file mode 100644
index 0000000000000..b5b7f655b492d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arc64/addsubx-1.c
@@ -0,0 +1,38 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+/* Checks if the compiler generates shifted adds and subs.  */
+
+int foo (int a, int b)
+{
+  return a + (b << 1);
+}
+
+long int bar (long int a, long int b)
+{
+  return a + (b << 1);
+}
+
+int minus1 (int a, int b)
+{
+  return a - (b << 1);
+}
+
+long int minus1l (long int a, long int b)
+{
+  return a - (b << 1);
+}
+
+int plus1 (int a, int b)
+{
+  int x = a + (b << 3);
+  if (x != 0)
+    return x;
+  return 20;
+}
+
+/* { dg-final { scan-assembler "add1_s\\s+" } } */
+/* { dg-final { scan-assembler "add1l\\s+" { target { hs6x } } } } */
+/* { dg-final { scan-assembler "sub1\\s+" } } */
+/* { dg-final { scan-assembler "sub1l\\s+" { target { hs6x } } } } */
+/* { dg-final { scan-assembler "add3\\.f\\s+" } } */
diff --git a/gcc/testsuite/gcc.target/arc64/arc64.exp b/gcc/testsuite/gcc.target/arc64/arc64.exp
new file mode 100644
index 0000000000000..c6eb6cad0d9cc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arc64/arc64.exp
@@ -0,0 +1,78 @@
+# Copyright (C) 2007-2019 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# GCC testsuite that uses the `dg.exp' driver.
+
+# Exit immediately if this isn't an arc target.
+if ![istarget arc*-*-*] then {
+  return
+}
+
+# Load support procs.
+load_lib gcc-dg.exp
+
+# Return 1 if this is a compiler supporting ARCv3/32 as default processor
+proc check_effective_target_hs5x { } {
+    return [check_no_compiler_messages hs5x assembly {
+	#if !defined(__ARC64_ARCH32__)
+	#error No ARCv3/32
+	#endif
+    }]
+}
+
+# Return 1 if this is a compiler supporting ARCv3/64 as default processor
+proc check_effective_target_hs6x { } {
+    return [check_no_compiler_messages hs6x assembly {
+	#if !defined(__ARC64_ARCH64__)
+	#error No ARCv3/64
+	#endif
+    }]
+}
+
+# Return 1 if double loads/stores is enabled for the target
+proc check_effective_target_doubleaccess { } {
+    return [check_no_compiler_messages doubleaccess assembly {
+	#if !defined(__ARC64_LL64__) && !defined (__ARC64_M128__)
+	#error No double loads or stores detected
+	#endif
+    }]
+}
+
+
+# Return 1 if DP FP is enabled for the target
+proc check_effective_target_dpfp { } {
+    return [check_no_compiler_messages dpfp assembly {
+	#if !defined(__ARC_FPU_DP__)
+	#error No double loads or stores detected
+	#endif
+    }]
+}
+
+# If a testcase doesn't have special options, use these.
+global DEFAULT_CFLAGS
+if ![info exists DEFAULT_CFLAGS] then {
+    set DEFAULT_CFLAGS " -ansi -pedantic-errors"
+}
+
+# Initialize `dg'.
+dg-init
+
+# Main loop.
+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.{\[cS\],cpp}]] \
+	"" $DEFAULT_CFLAGS
+
+# All done.
+dg-finish
diff --git a/gcc/testsuite/gcc.target/arc64/cancel-1.c b/gcc/testsuite/gcc.target/arc64/cancel-1.c
new file mode 100644
index 0000000000000..e050c538157dc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arc64/cancel-1.c
@@ -0,0 +1,31 @@
+/* Test for cleanups with pthread_cancel.  Any issue with libgcc's unwinder
+   will cause this test to spin in pthread_join.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target pthread } */
+/* { dg-options "-pthread" } */
+
+#include <pthread.h>
+#include <unistd.h>
+#include <stdio.h>
+
+void *thread_loop (void *)
+{
+  while (1)
+    {
+      printf("worker: loop\n");
+      sleep(1);
+    }
+}
+
+int main ()
+{
+  pthread_t thread;
+
+  pthread_create (&thread, 0, thread_loop, 0);
+  sleep(5);
+  pthread_cancel (thread);
+  pthread_join (thread, 0);
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/arc64/cmodel-1.c b/gcc/testsuite/gcc.target/arc64/cmodel-1.c
new file mode 100644
index 0000000000000..2ba3b9dec6710
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arc64/cmodel-1.c
@@ -0,0 +1,14 @@
+/* Check if the call is made using JL instruction.  */
+/* { dg-do compile } */
+/* { dg-options "-O -mcmodel=large" } */
+
+extern int long_call(int a);
+
+int test (int a)
+{
+  return 3 * long_call(a + 1);
+}
+
+/* { dg-final { scan-assembler "movhl" { target { hs6x } } } } */
+/* { dg-final { scan-assembler "orl" { target { hs6x } } } } */
+/* { dg-final { scan-assembler "jl_s.*\[r\d+\]" } } */
diff --git a/gcc/testsuite/gcc.target/arc64/interrupt-1.c b/gcc/testsuite/gcc.target/arc64/interrupt-1.c
new file mode 100644
index 0000000000000..c229230cc5996
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arc64/interrupt-1.c
@@ -0,0 +1,8 @@
+/* Verify the return instruction is iret.  */
+/* { dg-do compile } */
+/* { dg-options "-O" } */
+void __attribute__ ((interrupt ("ilink")))
+foo (void)
+{
+}
+/* { dg-final { scan-assembler "rtie" } } */
diff --git a/gcc/testsuite/gcc.target/arc64/interrupt-2.c b/gcc/testsuite/gcc.target/arc64/interrupt-2.c
new file mode 100644
index 0000000000000..de3494108d9a6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arc64/interrupt-2.c
@@ -0,0 +1,17 @@
+/* Verify that arg regs used as temporaries get saved.  */
+/* { dg-do compile } */
+/* { dg-options "-O" } */
+void __attribute__ ((interrupt))
+foo2 (void)
+{
+  extern volatile int INTERRUPT_FLAG;
+  INTERRUPT_FLAG = 0;
+
+  extern volatile int COUNTER;
+  COUNTER++;
+}
+/* { dg-final { scan-assembler "push.*r0" { target { hs6x } } } } */
+/* { dg-final { scan-assembler "pop.*r0" { target { hs6x } } } } */
+
+/* { dg-final { scan-assembler "st\.a.*r0" { target { hs5x } } } } */
+/* { dg-final { scan-assembler "ld\.ab.*r0" { target { hs5x } } } } */
diff --git a/gcc/testsuite/gcc.target/arc64/interrupt-3.c b/gcc/testsuite/gcc.target/arc64/interrupt-3.c
new file mode 100644
index 0000000000000..f5084de008ee7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arc64/interrupt-3.c
@@ -0,0 +1,9 @@
+void __attribute__ ((interrupt("you load too")))
+handler1 (void)
+{ /* { dg-warning "is not \"ilink" } */
+}
+
+void __attribute__ ((interrupt(42)))
+hander2 (void)
+{ /* { dg-warning "is not a string constant" } */
+}
diff --git a/gcc/testsuite/gcc.target/arc64/interrupt-4.c b/gcc/testsuite/gcc.target/arc64/interrupt-4.c
new file mode 100644
index 0000000000000..21786a838d31e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arc64/interrupt-4.c
@@ -0,0 +1,9 @@
+extern int gpio_int;
+extern int read_reg (int);
+
+void __attribute__ ((interrupt))
+isr_handler (void)
+{
+  gpio_int = read_reg (1);
+}
+/* { dg-final { scan-assembler-times "\\sblink" 2 } } */
diff --git a/gcc/testsuite/gcc.target/arc64/interrupt-5.c b/gcc/testsuite/gcc.target/arc64/interrupt-5.c
new file mode 100644
index 0000000000000..66fdb36b7d3cc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arc64/interrupt-5.c
@@ -0,0 +1,128 @@
+/* { dg-options "-O" } */
+extern void will_trig_exception(void);
+
+__attribute__ ((interrupt))
+void isr_0 (void)
+{
+  will_trig_exception();
+}
+
+/* 0. There shouldn't be any need to (re)adjust the stack pointer.  */
+/* { dg-final { scan-assembler-not "\\sadd.*sp" } } */
+/* { dg-final { scan-assembler-not "\\ssub.*sp" } } */
+
+/* 1. hs6x output without double loads and stores.  */
+/* { dg-final { scan-assembler "pushl_s\\s+r58\n"   { target { hs6x && { ! doubleaccess } } } } } */
+/* { dg-final { scan-assembler "pushl_s\\s+r30\n"   { target { hs6x && { ! doubleaccess } } } } } */
+/* { dg-final { scan-assembler "pushl_s\\s+r13\n"   { target { hs6x && { ! doubleaccess } } } } } */
+/* { dg-final { scan-assembler "pushl_s\\s+r12\n"   { target { hs6x && { ! doubleaccess } } } } } */
+/* { dg-final { scan-assembler "pushl_s\\s+r11\n"   { target { hs6x && { ! doubleaccess } } } } } */
+/* { dg-final { scan-assembler "pushl_s\\s+r10\n"   { target { hs6x && { ! doubleaccess } } } } } */
+/* { dg-final { scan-assembler "pushl_s\\s+r9\n"    { target { hs6x && { ! doubleaccess } } } } } */
+/* { dg-final { scan-assembler "pushl_s\\s+r8\n"    { target { hs6x && { ! doubleaccess } } } } } */
+/* { dg-final { scan-assembler "pushl_s\\s+r7\n"    { target { hs6x && { ! doubleaccess } } } } } */
+/* { dg-final { scan-assembler "pushl_s\\s+r6\n"    { target { hs6x && { ! doubleaccess } } } } } */
+/* { dg-final { scan-assembler "pushl_s\\s+r5\n"    { target { hs6x && { ! doubleaccess } } } } } */
+/* { dg-final { scan-assembler "pushl_s\\s+r4\n"    { target { hs6x && { ! doubleaccess } } } } } */
+/* { dg-final { scan-assembler "pushl_s\\s+r3\n"    { target { hs6x && { ! doubleaccess } } } } } */
+/* { dg-final { scan-assembler "pushl_s\\s+r2\n"    { target { hs6x && { ! doubleaccess } } } } } */
+/* { dg-final { scan-assembler "pushl_s\\s+r1\n"    { target { hs6x && { ! doubleaccess } } } } } */
+/* { dg-final { scan-assembler "pushl_s\\s+r0\n"    { target { hs6x && { ! doubleaccess } } } } } */
+/* { dg-final { scan-assembler "pushl_s\\s+blink\n" { target { hs6x && { ! doubleaccess } } } } } */
+/* { dg-final { scan-assembler "popl_s\\s+blink\n"  { target { hs6x && { ! doubleaccess } } } } } */
+/* { dg-final { scan-assembler "popl_s\\s+r0\n"     { target { hs6x && { ! doubleaccess } } } } } */
+/* { dg-final { scan-assembler "popl_s\\s+r1\n"     { target { hs6x && { ! doubleaccess } } } } } */
+/* { dg-final { scan-assembler "popl_s\\s+r2\n"     { target { hs6x && { ! doubleaccess } } } } } */
+/* { dg-final { scan-assembler "popl_s\\s+r3\n"     { target { hs6x && { ! doubleaccess } } } } } */
+/* { dg-final { scan-assembler "popl_s\\s+r4\n"     { target { hs6x && { ! doubleaccess } } } } } */
+/* { dg-final { scan-assembler "popl_s\\s+r5\n"     { target { hs6x && { ! doubleaccess } } } } } */
+/* { dg-final { scan-assembler "popl_s\\s+r6\n"     { target { hs6x && { ! doubleaccess } } } } } */
+/* { dg-final { scan-assembler "popl_s\\s+r7\n"     { target { hs6x && { ! doubleaccess } } } } } */
+/* { dg-final { scan-assembler "popl_s\\s+r8\n"     { target { hs6x && { ! doubleaccess } } } } } */
+/* { dg-final { scan-assembler "popl_s\\s+r9\n"     { target { hs6x && { ! doubleaccess } } } } } */
+/* { dg-final { scan-assembler "popl_s\\s+r10\n"    { target { hs6x && { ! doubleaccess } } } } } */
+/* { dg-final { scan-assembler "popl_s\\s+r11\n"    { target { hs6x && { ! doubleaccess } } } } } */
+/* { dg-final { scan-assembler "popl_s\\s+r12\n"    { target { hs6x && { ! doubleaccess } } } } } */
+/* { dg-final { scan-assembler "popl_s\\s+r13\n"    { target { hs6x && { ! doubleaccess } } } } } */
+/* { dg-final { scan-assembler "popl_s\\s+r30\n"    { target { hs6x && { ! doubleaccess } } } } } */
+/* { dg-final { scan-assembler "popl_s\\s+r58\n"    { target { hs6x && { ! doubleaccess } } } } } */
+
+/* 2. hs6x output with double loads and stores.  */
+/* { dg-final { scan-assembler "pushl_s\\s+r58\n"   { target { hs6x && doubleaccess } } } } */
+/* { dg-final { scan-assembler "pushl_s\\s+r30\n"   { target { hs6x && doubleaccess } } } } */
+/* { dg-final { scan-assembler "stdl.a\\s+r12,"     { target { hs6x && doubleaccess } } } } */
+/* { dg-final { scan-assembler "stdl.a\\s+r10,"     { target { hs6x && doubleaccess } } } } */
+/* { dg-final { scan-assembler "stdl.a\\s+r8,"      { target { hs6x && doubleaccess } } } } */
+/* { dg-final { scan-assembler "stdl.a\\s+r6,"      { target { hs6x && doubleaccess } } } } */
+/* { dg-final { scan-assembler "stdl.a\\s+r4,"      { target { hs6x && doubleaccess } } } } */
+/* { dg-final { scan-assembler "stdl.a\\s+r2,"      { target { hs6x && doubleaccess } } } } */
+/* { dg-final { scan-assembler "stdl.a\\s+r0,"      { target { hs6x && doubleaccess } } } } */
+/* { dg-final { scan-assembler "pushl_s\\s+blink\n" { target { hs6x && doubleaccess } } } } */
+/* { dg-final { scan-assembler "popl_s\\s+blink\n"  { target { hs6x && doubleaccess } } } } */
+/* { dg-final { scan-assembler "lddl.ab\\s+r0,"     { target { hs6x && doubleaccess } } } } */
+/* { dg-final { scan-assembler "lddl.ab\\s+r2,"     { target { hs6x && doubleaccess } } } } */
+/* { dg-final { scan-assembler "lddl.ab\\s+r4,"     { target { hs6x && doubleaccess } } } } */
+/* { dg-final { scan-assembler "lddl.ab\\s+r6,"     { target { hs6x && doubleaccess } } } } */
+/* { dg-final { scan-assembler "lddl.ab\\s+r8,"     { target { hs6x && doubleaccess } } } } */
+/* { dg-final { scan-assembler "lddl.ab\\s+r10,"    { target { hs6x && doubleaccess } } } } */
+/* { dg-final { scan-assembler "lddl.ab\\s+r12,"    { target { hs6x && doubleaccess } } } } */
+/* { dg-final { scan-assembler "popl_s\\s+r30\n"    { target { hs6x && doubleaccess } } } } */
+/* { dg-final { scan-assembler "popl_s\\s+r58\n"    { target { hs6x && doubleaccess } } } } */
+
+/* 3. hs5x output without double loads and stores.  */
+/* { dg-final { scan-assembler "st.a\\s+r58,"    { target { hs5x && { ! doubleaccess } } } } } */
+/* { dg-final { scan-assembler "st.a\\s+r30,"    { target { hs5x && { ! doubleaccess } } } } } */
+/* { dg-final { scan-assembler "st.a\\s+r13,"    { target { hs5x && { ! doubleaccess } } } } } */
+/* { dg-final { scan-assembler "st.a\\s+r12,"    { target { hs5x && { ! doubleaccess } } } } } */
+/* { dg-final { scan-assembler "st.a\\s+r11,"    { target { hs5x && { ! doubleaccess } } } } } */
+/* { dg-final { scan-assembler "st.a\\s+r10,"    { target { hs5x && { ! doubleaccess } } } } } */
+/* { dg-final { scan-assembler "st.a\\s+r9,"     { target { hs5x && { ! doubleaccess } } } } } */
+/* { dg-final { scan-assembler "st.a\\s+r8,"     { target { hs5x && { ! doubleaccess } } } } } */
+/* { dg-final { scan-assembler "st.a\\s+r7,"     { target { hs5x && { ! doubleaccess } } } } } */
+/* { dg-final { scan-assembler "st.a\\s+r6,"     { target { hs5x && { ! doubleaccess } } } } } */
+/* { dg-final { scan-assembler "st.a\\s+r5,"     { target { hs5x && { ! doubleaccess } } } } } */
+/* { dg-final { scan-assembler "st.a\\s+r4,"     { target { hs5x && { ! doubleaccess } } } } } */
+/* { dg-final { scan-assembler "st.a\\s+r3,"     { target { hs5x && { ! doubleaccess } } } } } */
+/* { dg-final { scan-assembler "st.a\\s+r2,"     { target { hs5x && { ! doubleaccess } } } } } */
+/* { dg-final { scan-assembler "st.a\\s+r1,"     { target { hs5x && { ! doubleaccess } } } } } */
+/* { dg-final { scan-assembler "st.a\\s+r0,"     { target { hs5x && { ! doubleaccess } } } } } */
+/* { dg-final { scan-assembler "st.a\\s+blink,"  { target { hs5x && { ! doubleaccess } } } } } */
+/* { dg-final { scan-assembler "ld.ab\\s+blink," { target { hs5x && { ! doubleaccess } } } } } */
+/* { dg-final { scan-assembler "ld.ab\\s+r0,"    { target { hs5x && { ! doubleaccess } } } } } */
+/* { dg-final { scan-assembler "ld.ab\\s+r1,"    { target { hs5x && { ! doubleaccess } } } } } */
+/* { dg-final { scan-assembler "ld.ab\\s+r2,"    { target { hs5x && { ! doubleaccess } } } } } */
+/* { dg-final { scan-assembler "ld.ab\\s+r3,"    { target { hs5x && { ! doubleaccess } } } } } */
+/* { dg-final { scan-assembler "ld.ab\\s+r4,"    { target { hs5x && { ! doubleaccess } } } } } */
+/* { dg-final { scan-assembler "ld.ab\\s+r5,"    { target { hs5x && { ! doubleaccess } } } } } */
+/* { dg-final { scan-assembler "ld.ab\\s+r6,"    { target { hs5x && { ! doubleaccess } } } } } */
+/* { dg-final { scan-assembler "ld.ab\\s+r7,"    { target { hs5x && { ! doubleaccess } } } } } */
+/* { dg-final { scan-assembler "ld.ab\\s+r8,"    { target { hs5x && { ! doubleaccess } } } } } */
+/* { dg-final { scan-assembler "ld.ab\\s+r9,"    { target { hs5x && { ! doubleaccess } } } } } */
+/* { dg-final { scan-assembler "ld.ab\\s+r10,"   { target { hs5x && { ! doubleaccess } } } } } */
+/* { dg-final { scan-assembler "ld.ab\\s+r11,"   { target { hs5x && { ! doubleaccess } } } } } */
+/* { dg-final { scan-assembler "ld.ab\\s+r12,"   { target { hs5x && { ! doubleaccess } } } } } */
+/* { dg-final { scan-assembler "ld.ab\\s+r13,"   { target { hs5x && { ! doubleaccess } } } } } */
+/* { dg-final { scan-assembler "ld.ab\\s+r30,"   { target { hs5x && { ! doubleaccess } } } } } */
+/* { dg-final { scan-assembler "ld.ab\\s+r58,"   { target { hs5x && { ! doubleaccess } } } } } */
+
+/* 4. hs5x output with double loads and stores.  */
+/* { dg-final { scan-assembler "st.a\\s+r58,"    { target { hs5x && doubleaccess } } } } */
+/* { dg-final { scan-assembler "st.a\\s+r30,"    { target { hs5x && doubleaccess } } } } */
+/* { dg-final { scan-assembler "std.a\\s+r12,"   { target { hs5x && doubleaccess } } } } */
+/* { dg-final { scan-assembler "std.a\\s+r10,"   { target { hs5x && doubleaccess } } } } */
+/* { dg-final { scan-assembler "std.a\\s+r8,"    { target { hs5x && doubleaccess } } } } */
+/* { dg-final { scan-assembler "std.a\\s+r6,"    { target { hs5x && doubleaccess } } } } */
+/* { dg-final { scan-assembler "std.a\\s+r4,"    { target { hs5x && doubleaccess } } } } */
+/* { dg-final { scan-assembler "std.a\\s+r2,"    { target { hs5x && doubleaccess } } } } */
+/* { dg-final { scan-assembler "std.a\\s+r0,"    { target { hs5x && doubleaccess } } } } */
+/* { dg-final { scan-assembler "st.a\\s+blink,"  { target { hs5x && doubleaccess } } } } */
+/* { dg-final { scan-assembler "ld.ab\\s+blink," { target { hs5x && doubleaccess } } } } */
+/* { dg-final { scan-assembler "ldd.ab\\s+r0,"   { target { hs5x && doubleaccess } } } } */
+/* { dg-final { scan-assembler "ldd.ab\\s+r2,"   { target { hs5x && doubleaccess } } } } */
+/* { dg-final { scan-assembler "ldd.ab\\s+r4,"   { target { hs5x && doubleaccess } } } } */
+/* { dg-final { scan-assembler "ldd.ab\\s+r6,"   { target { hs5x && doubleaccess } } } } */
+/* { dg-final { scan-assembler "ldd.ab\\s+r8,"   { target { hs5x && doubleaccess } } } } */
+/* { dg-final { scan-assembler "ldd.ab\\s+r10,"  { target { hs5x && doubleaccess } } } } */
+/* { dg-final { scan-assembler "ldd.ab\\s+r12,"  { target { hs5x && doubleaccess } } } } */
+/* { dg-final { scan-assembler "ld.ab\\s+r30,"   { target { hs5x && doubleaccess } } } } */
+/* { dg-final { scan-assembler "ld.ab\\s+r58,"   { target { hs5x && doubleaccess } } } } */
diff --git a/gcc/testsuite/gcc.target/arc64/movd-1.c b/gcc/testsuite/gcc.target/arc64/movd-1.c
new file mode 100644
index 0000000000000..ba9920bd2f8ed
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arc64/movd-1.c
@@ -0,0 +1,38 @@
+/* { dg-do compile } */
+/* { dg-options "-O0 -fomit-frame-pointer" } */
+
+/* "movdi" is about assigning 32-bit of data (byte).  *
+ *                                                    *
+ * This is one of the tests in series of testing for  *
+ * "movdi" support. The test scenarios are as the     *
+ * following table:                                   *
+ *                                                    *
+ * ,--------.----------.----------.-----------------. *
+ * | test   | notation | dest.    | source          | *
+ * |--------|---------------------------------------| *
+ * | movd-1 | r <- i   | register | immediate       | *
+ * | movd-2 | r <- r   |          | register        | *
+ * | movd-3 | r <- m   |          | memory          | *
+ * |--------+----------+----------+-----------------| *
+ * | movd-4 | m <- r   |          | register        | *
+ * | movd-5 | m <- i   | memroy   | immediate small | *
+ * | movd-6 | m <- I   |          | immediate big   | *
+ * | movd-7 | m <- m   |          | memory          | *
+ * `--------^----------^----------^-----------------' */
+
+/* assign immediate to register */
+volatile int dummy;
+void foo(void)
+{
+  volatile register int dst;
+  dst = 0x12344321;
+  dst = 0x40000000;
+  dst = 0x7FFFFFFF;   /* largest positive number in 32-bit */
+  dst = -2147483648;  /* smallest negative number in 32-bit */
+  dst = 0xFFFFFFFF;  /* -1 */
+  dummy = dst;
+}
+/* { dg-final { scan-assembler "mov_s\\s+r\[0-9\]+,305414945" } } */
+/* { dg-final { scan-assembler "mov_s\\s+r\[0-9\]+,1073741824" } } */
+/* { dg-final { scan-assembler "mov_s\\s+r\[0-9\]+,2147483647" } } */
+/* { dg-final { scan-assembler "mov_s\\s+r\[0-9\]+,-2147483648" } } */
diff --git a/gcc/testsuite/gcc.target/arc64/movd-2.c b/gcc/testsuite/gcc.target/arc64/movd-2.c
new file mode 100644
index 0000000000000..43e786d3c0998
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arc64/movd-2.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+/* check "movd-1.c" for further details. */
+
+/* assign a register to register */
+int foo(int a, int b)
+{
+  return b;
+}
+/* { dg-final { scan-assembler "mov_s\\s+r\[0-9\]+,r\[0-9\]+" } } */
diff --git a/gcc/testsuite/gcc.target/arc64/movd-3.c b/gcc/testsuite/gcc.target/arc64/movd-3.c
new file mode 100644
index 0000000000000..50b45e302647f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arc64/movd-3.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O0 -fomit-frame-pointer" } */
+
+/* check "movd-1.c" for further details. */
+
+/* assign memory to register */
+volatile int mem;
+void foo(void)
+{
+  register int dst = mem;
+}
+/* { dg-final { scan-assembler "ld\[_s\\s\]+r\[0-9\]+,\\\[" } } */
+
diff --git a/gcc/testsuite/gcc.target/arc64/movd-4.c b/gcc/testsuite/gcc.target/arc64/movd-4.c
new file mode 100644
index 0000000000000..07609d7e6ee78
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arc64/movd-4.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O0 -fomit-frame-pointer" } */
+
+/* check "movd-1.c" for further details. */
+
+/* register to memory */
+int mem;
+void foo(void)
+{
+  register int reg_int;
+  mem = reg_int;
+}
+/* { dg-final { scan-assembler "st\[_s\\s\]+r\[0-9\]+,\\\[" } } */
diff --git a/gcc/testsuite/gcc.target/arc64/movd-5.c b/gcc/testsuite/gcc.target/arc64/movd-5.c
new file mode 100644
index 0000000000000..eed0f346e2970
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arc64/movd-5.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O0 -fomit-frame-pointer" } */
+
+/* check "movd-1.c" for further details. */
+
+/* assign immediate to a memory: this immediate is small *
+ * enough to be covered by w6 (signed 6 bit number).     */
+int mem;
+void foo(void)
+{
+  mem = 0x00;    /* the usual suspect: 0 */
+  mem =  31;     /* largest positive number in w6 */
+  mem = -32;     /* smallest negative number in w6 */
+  mem = -1;      /* just a -1 (all bits 1) */
+}
+/* { dg-final { scan-assembler "st\\s+0,\\\[" } } */
+/* { dg-final { scan-assembler "st\\s+31,\\\[" } } */
+/* { dg-final { scan-assembler "st\\s+-32,\\\[" } } */
+/* { dg-final { scan-assembler "st\\s+-1,\\\[" } } */
diff --git a/gcc/testsuite/gcc.target/arc64/movd-6.c b/gcc/testsuite/gcc.target/arc64/movd-6.c
new file mode 100644
index 0000000000000..d5f9b58507b76
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arc64/movd-6.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O0 -fomit-frame-pointer" } */
+
+/* check "movd-1.c" for further details. */
+
+/* assign immediate to a memory: this immediate cannot be   *
+ * represented by 6-bit, hence stb w6, mem is not an option */
+int mem;
+void foo(void)
+{
+  mem = 0x40;    /* smallest 7-bit number */
+  mem = 0x7F;    /* largest  7-bit number */
+  mem = 0x80;    /* 128 */
+  mem = -128;    /* smallest 8-bit number */
+  mem = -33;     /* first negative number outside w6's range. else,
+                    it would use something like 'sth -32, [@mem]'  */
+}
+/* { dg-final { scan-assembler "mov_s\\s+r\\d,64" } } */
+/* { dg-final { scan-assembler "mov_s\\s+r\\d,127" } } */
+/* { dg-final { scan-assembler "mov_s\\s+r\\d,128" } } */
+/* { dg-final { scan-assembler "mov\\s+r\\d,-128" } } */
+/* { dg-final { scan-assembler "mov\\s+r\\d,-33" } } */
+/* { dg-final { scan-assembler-times "st\[_s\\s\]+r\\d,\\\[" 5 } } */
diff --git a/gcc/testsuite/gcc.target/arc64/movd-7.c b/gcc/testsuite/gcc.target/arc64/movd-7.c
new file mode 100644
index 0000000000000..0836cc5660527
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arc64/movd-7.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -fomit-frame-pointer" } */
+
+/* check "movd-1.c" for further details. */
+
+/* assign memory to a memory */
+int mem_dst, mem_src;
+void foo(void)
+{
+  mem_dst = mem_src;
+}
+/* { dg-final { scan-assembler "ld.x\\s+r\[0-9\]+,\\\[" { target hs6x } } } */
+/* { dg-final { scan-assembler-not "ext\[bhw\]\\s+" } } */
+/* { dg-final { scan-assembler "st\[_s\\s\]+r\\d,\\\[" } } */
+
+/* { dg-final { scan-assembler "ld\\s+r\[0-9\]+,\\\[" { target hs5x } } } */
diff --git a/gcc/testsuite/gcc.target/arc64/movh-1.c b/gcc/testsuite/gcc.target/arc64/movh-1.c
new file mode 100644
index 0000000000000..4667f357af7b6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arc64/movh-1.c
@@ -0,0 +1,40 @@
+/* { dg-do compile } */
+/* { dg-options "-O0 -fomit-frame-pointer" } */
+
+/* "movhi" is about assigning 16-bit of data (byte).  *
+ *                                                    *
+ * This is one of the tests in series of testing for  *
+ * "movhi" support. The test scenarios are as the     *
+ * following table:                                   *
+ *                                                    *
+ * ,--------.----------.----------.-----------------. *
+ * | test   | notation | dest.    | source          | *
+ * |--------|---------------------------------------| *
+ * | movh-1 | r <- i   | register | immediate       | *
+ * | movh-2 | r <- r   |          | register        | *
+ * | movh-3 | r <- m   |          | memory          | *
+ * |--------+----------+----------+-----------------| *
+ * | movh-4 | m <- r   |          | register        | *
+ * | movh-5 | m <- i   | memroy   | immediate small | *
+ * | movh-6 | m <- I   |          | immediate big   | *
+ * | movh-7 | m <- m   |          | memory          | *
+ * | movh-8 | m <- m   | volatile causes sex, why?  | *
+ * `--------^----------^----------^-----------------' */
+
+/* assign immediate to register */
+volatile short dummy;
+void foo(void)
+{
+  volatile register short dst;
+  dst = 0x1234;
+  dst = 0x4000;
+  dst = 0x7FFF;  /* largest positive number in short */
+  dst = -32768;  /* smallest negative number in short */
+  dst = 0xFFFF;  /* -1 */
+  dummy = dst;
+}
+/* { dg-final { scan-assembler "mov_s\\s+r\[0-9\]+,4660" } } */
+/* { dg-final { scan-assembler "mov_s\\s+r\[0-9\]+,16384" } } */
+/* { dg-final { scan-assembler "mov_s\\s+r\[0-9\]+,32767" } } */
+/* { dg-final { scan-assembler "mov_s\\s+r\[0-9\]+,-32768" } } */
+/* { dg-final { scan-assembler "sth\.as\\s+-1,\\\[" } } */
diff --git a/gcc/testsuite/gcc.target/arc64/movh-2.c b/gcc/testsuite/gcc.target/arc64/movh-2.c
new file mode 100644
index 0000000000000..b0780523a2251
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arc64/movh-2.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+/* check "movh-1.c" for further details. */
+
+/* assign a register to register */
+short foo(short a, short b)
+{
+  return b;
+}
+/* { dg-final { scan-assembler "sexh_s\\s+r\[0-9\]+,r\[0-9\]+" } } */
diff --git a/gcc/testsuite/gcc.target/arc64/movh-3.c b/gcc/testsuite/gcc.target/arc64/movh-3.c
new file mode 100644
index 0000000000000..af17466c18153
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arc64/movh-3.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O0 -fomit-frame-pointer" } */
+
+/* check "movh-1.c" for further details. */
+
+/* assign memory to register */
+volatile short mem;
+void foo(void)
+{
+  register short dst = mem;
+}
+/* { dg-final { scan-assembler "ldh\[_s\\s\]+r\[0-9\]+,\\\[" } } */
+
diff --git a/gcc/testsuite/gcc.target/arc64/movh-4.c b/gcc/testsuite/gcc.target/arc64/movh-4.c
new file mode 100644
index 0000000000000..c87f762c92692
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arc64/movh-4.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O0 -fomit-frame-pointer" } */
+
+/* check "movh-1.c" for further details. */
+
+/* register to memory */
+short mem;
+void foo(void)
+{
+  register short reg_short;
+  mem = reg_short;
+}
+/* { dg-final { scan-assembler "sth\[_s\\s\]+r\[0-9\]+,\\\[" } } */
diff --git a/gcc/testsuite/gcc.target/arc64/movh-5.c b/gcc/testsuite/gcc.target/arc64/movh-5.c
new file mode 100644
index 0000000000000..25c64ae3c19fa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arc64/movh-5.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O0 -fomit-frame-pointer" } */
+
+/* check "movh-1.c" for further details. */
+
+/* assign immediate to a memory: this immediate is small *
+ * enough to be covered by w6 (signed 6 bit number).     */
+short mem;
+void foo(void)
+{
+  mem = 0x00;    /* the usual suspect: 0 */
+  mem =  31;     /* largest positive number in w6 */
+  mem = -32;     /* smallest negative number in w6 */
+  mem = -1;      /* just a -1 (all bits 1) */
+}
+/* { dg-final { scan-assembler "sth\\s+0,\\\[" } } */
+/* { dg-final { scan-assembler "sth\\s+31,\\\[" } } */
+/* { dg-final { scan-assembler "sth\\s+-32,\\\[" } } */
+/* { dg-final { scan-assembler "sth\\s+-1,\\\[" } } */
diff --git a/gcc/testsuite/gcc.target/arc64/movh-6.c b/gcc/testsuite/gcc.target/arc64/movh-6.c
new file mode 100644
index 0000000000000..508236cd6de43
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arc64/movh-6.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O0 -fomit-frame-pointer" } */
+
+/* check "movh-1.c" for further details. */
+
+/* assign immediate to a memory: this immediate cannot be   *
+ * represented by 6-bit, hence stb w6, mem is not an option */
+short mem;
+void foo(void)
+{
+  mem = 0x40;    /* smallest 7-bit number */
+  mem = 0x7F;    /* largest  7-bit number */
+  mem = 0x80;    /* 128 */
+  mem = -128;    /* smallest 8-bit number */
+  mem = -33;     /* first negative number outside w6's range. else,
+                    it would use something like 'sth -32, [@mem]'  */
+}
+/* { dg-final { scan-assembler "mov_s\\s+r\\d,64" } } */
+/* { dg-final { scan-assembler "mov_s\\s+r\\d,127" } } */
+/* { dg-final { scan-assembler "mov_s\\s+r\\d,128" } } */
+/* { dg-final { scan-assembler "mov\\s+r\\d,-128" } } */
+/* { dg-final { scan-assembler "mov\\s+r\\d,-33" } } */
+/* { dg-final { scan-assembler-times "sth\[_s\\s\]+r\\d,\\\[" 5 } } */
diff --git a/gcc/testsuite/gcc.target/arc64/movh-7.c b/gcc/testsuite/gcc.target/arc64/movh-7.c
new file mode 100644
index 0000000000000..8042248dc6bde
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arc64/movh-7.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -fomit-frame-pointer" } */
+
+/* check "movh-1.c" for further details. */
+
+/* assign memory to a memory */
+short mem_dst, mem_src;
+void foo(void)
+{
+  mem_dst = mem_src;
+}
+/* { dg-final { scan-assembler "ldh\\s+r\[0-9\]+,\\\[" } } */
+/* { dg-final { scan-assembler-not "ext\[bhw\]\\s+" } } */
+/* { dg-final { scan-assembler "sth\[_s\\s\]+r\\d,\\\[" } } */
diff --git a/gcc/testsuite/gcc.target/arc64/movh-8.c b/gcc/testsuite/gcc.target/arc64/movh-8.c
new file mode 100644
index 0000000000000..4ce0cc71bbd47
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arc64/movh-8.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -fomit-frame-pointer" } */
+
+/* check "movh-1.c" for further details. */
+
+/* FIXME: with a 'volatile' this test generates an unnecessary sexh */
+/* assign memory to a memory */
+volatile short mem_dst, mem_src;
+void foo(void)
+{
+  mem_dst = mem_src;
+}
+/* { dg-final { scan-assembler "ldh\\s+r\[0-9\]+,\\\[" } } */
+/* { dg-final { scan-assembler-not "sexh" } } */
+/* { dg-final { scan-assembler "sth\[_s\\s\]+r\\d,\\\[" } } */
diff --git a/gcc/testsuite/gcc.target/arc64/movh-9.c b/gcc/testsuite/gcc.target/arc64/movh-9.c
new file mode 100644
index 0000000000000..3e50106307f06
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arc64/movh-9.c
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -fomit-frame-pointer" } */
+
+/* "movqi" is about assigning 8-bit of data (byte).   *
+ *                                                    *
+ * This is one of the tests in series of testing for  *
+ * "movqi" support. The test scenarios are as the     *
+ * following table:                                   *
+ *                                                    *
+ * ,--------.----------.----------.-----------------. *
+ * | test   | notation | dest.    | source          | *
+ * |--------|---------------------------------------| *
+ * | movq-1 | r <- i   | register | immediate       | *
+ * | movq-2 | r <- r   |          | register        | *
+ * | movq-3 | r <- m   |          | memory          | *
+ * |--------+----------+----------+-----------------| *
+ * | movq-4 | m <- r   |          | register        | *
+ * | movq-5 | m <- i   | memroy   | immediate small | *
+ * | movq-6 | m <- I   |          | immediate big   | *
+ * | movq-7 | m <- m   |          | memory          | *
+ * `--------^----------^----------^-----------------' */
+
+/* FIXME: this test fails trying to assign memory to memory directly */
+/* assign memory to a memory */
+char mem_dst, mem_src;
+void foo(void)
+{
+  mem_dst = mem_src;
+}
+/* { dg-final { scan-assembler "ldb\\s+r\[0-9\]+,\\\[" } } */
+/* { dg-final { scan-assembler "stb\[_s\\s\]+r\\d,\\\[" } } */
diff --git a/gcc/testsuite/gcc.target/arc64/movq-1.c b/gcc/testsuite/gcc.target/arc64/movq-1.c
new file mode 100644
index 0000000000000..3ea1c665cf9f1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arc64/movq-1.c
@@ -0,0 +1,42 @@
+/* { dg-do compile } */
+/* { dg-options "-O0 -fomit-frame-pointer" } */
+
+/* "movqi" is about assigning 8-bit of data (byte).   *
+ *                                                    *
+ * This is one of the tests in series of testing for  *
+ * "movqi" support. The test scenarios are as the     *
+ * following table:                                   *
+ *                                                    *
+ * ,--------.----------.----------.-----------------. *
+ * | test   | notation | dest.    | source          | *
+ * |--------|---------------------------------------| *
+ * | movq-1 | r <- i   | register | immediate       | *
+ * | movq-2 | r <- r   |          | register        | *
+ * | movq-3 | r <- m   |          | memory          | *
+ * |--------+----------+----------+-----------------| *
+ * | movq-4 | m <- r   |          | register        | *
+ * | movq-5 | m <- i   | memroy   | immediate small | *
+ * | movq-6 | m <- I   |          | immediate big   | *
+ * | movq-7 | m <- m   |          | memory          | *
+ * | movq-8 | m <- m   | volatile causes ext, why?  | *
+ * `--------^----------^----------^-----------------' */
+
+/* assign immediate to register */
+volatile char dummy;
+void foo(void)
+{
+  volatile register char dst;
+  dst = 0x0;
+  dst = 0x22;
+  dst = 0x40;
+  dst = 0x80;
+  dst = -128;
+  dst = 0xFF;
+  dummy = dst;
+}
+/* { dg-final { scan-assembler "stb\\s+0,\\\[" } } */
+/* { dg-final { scan-assembler "mov_s\\s+r\\d,34" } } */
+/* { dg-final { scan-assembler "mov_s\\s+r\\d,64" } } */
+/* expecting "mov_s r0,128" twice to cover both dst=-/+128 */
+/* { dg-final { scan-assembler-times "mov\\s+r\\d,-128" 2 } } */
+/* { dg-final { scan-assembler "stb\\s+-1,\\\[" } } */
diff --git a/gcc/testsuite/gcc.target/arc64/movq-2.c b/gcc/testsuite/gcc.target/arc64/movq-2.c
new file mode 100644
index 0000000000000..78515c1f63b9c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arc64/movq-2.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+/* check "movq-1.c" for further details. */
+
+/* assign a register to register */
+char foo(char a, char b)
+{
+  return b;
+}
+/* { dg-final { scan-assembler "extb_s\\s+r\\d+,r\\d+" } } */
diff --git a/gcc/testsuite/gcc.target/arc64/movq-3.c b/gcc/testsuite/gcc.target/arc64/movq-3.c
new file mode 100644
index 0000000000000..3e9288f72d7f1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arc64/movq-3.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O0 -fomit-frame-pointer" } */
+
+/* check "movq-1.c" for further details. */
+
+/* assign memory to register */
+volatile char mem;
+void foo(void)
+{
+  register char dst = mem;
+}
+/* { dg-final { scan-assembler "ldb\[_s\\s\]+r\[0-9\]+,\\\[" } } */
+
diff --git a/gcc/testsuite/gcc.target/arc64/movq-4.c b/gcc/testsuite/gcc.target/arc64/movq-4.c
new file mode 100644
index 0000000000000..e1c9b863257e2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arc64/movq-4.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O0 -fomit-frame-pointer" } */
+
+/* check "movq-1.c" for further details. */
+
+/* register to memory */
+char mem;
+void foo(void)
+{
+  register char reg_char;
+  mem = reg_char;
+}
+/* { dg-final { scan-assembler "stb\[_s\\s\]+r\[0-9\]+,\\\[" } } */
diff --git a/gcc/testsuite/gcc.target/arc64/movq-5.c b/gcc/testsuite/gcc.target/arc64/movq-5.c
new file mode 100644
index 0000000000000..8546af1183233
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arc64/movq-5.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O0 -fomit-frame-pointer" } */
+
+/* check "movq-1.c" for further details. */
+
+/* assign immediate to a memory: this immediate is small *
+ * enough to be covered by w6 (signed 6 bit number).     */
+volatile char mem;
+void foo(void)
+{
+  mem = 0x00;    /* the usual suspect: 0 */
+  mem =  31;     /* largest positive number in w6 */
+  mem = -32;     /* smallest negative number in w6 */
+  mem = -1;      /* just a -1 (all bits 1) */
+}
+/* { dg-final { scan-assembler "stb\\s+0,\\\[" } } */
+/* { dg-final { scan-assembler "stb\\s+31,\\\[" } } */
+/* { dg-final { scan-assembler "stb\\s+-32,\\\[" } } */
+/* { dg-final { scan-assembler "stb\\s+-1,\\\[" } } */
diff --git a/gcc/testsuite/gcc.target/arc64/movq-6.c b/gcc/testsuite/gcc.target/arc64/movq-6.c
new file mode 100644
index 0000000000000..4f5295f7d2683
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arc64/movq-6.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O0 -fomit-frame-pointer" } */
+
+/* check "movq-1.c" for further details. */
+
+/* assign immediate to a memory: this immediate cannot be   *
+ * represented by 6-bit, hence stb w6, mem is not an option */
+char mem;
+void foo(void)
+{
+  mem = 0x40;    /* smallest 7-bit number */
+  mem = 0x7F;    /* largest  7-bit number */
+  mem = 0x80;    /* 128 */
+  mem = -128;    /* smallest 8-bit number */
+  mem = -33;     /* first negative number outside w6's range. else,
+                    it would use something like 'stb -32, [@mem]'  */
+}
+/* { dg-final { scan-assembler "mov_s\\s+r\\d,64" } } */
+/* { dg-final { scan-assembler "mov_s\\s+r\\d,127" } } */
+/* { dg-final { scan-assembler-times "mov\\s+r\\d,-128" 2 } } */
+/* { dg-final { scan-assembler "mov\\s+r\\d,-33" } } */
+/* { dg-final { scan-assembler-times "stb\[_s\\s\]+r\\d,\\\[" 5 } } */
diff --git a/gcc/testsuite/gcc.target/arc64/movq-7.c b/gcc/testsuite/gcc.target/arc64/movq-7.c
new file mode 100644
index 0000000000000..48e2b3d88c586
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arc64/movq-7.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -fomit-frame-pointer" } */
+
+/* check "movq-1.c" for further details. */
+
+/* assign memory to a memory */
+char mem_dst, mem_src;
+void foo(void)
+{
+  mem_dst = mem_src;
+}
+/* { dg-final { scan-assembler "ldb\\s+r\[0-9\]+,\\\[" } } */
+/* { dg-final { scan-assembler-not "ext\[bhw\]\\s+" } } */
+/* { dg-final { scan-assembler "stb\[_s\\s\]+r\\d,\\\[" } } */
diff --git a/gcc/testsuite/gcc.target/arc64/movq-8.c b/gcc/testsuite/gcc.target/arc64/movq-8.c
new file mode 100644
index 0000000000000..cd27779c36679
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arc64/movq-8.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O0 -fomit-frame-pointer" } */
+
+/* check "movq-1.c" for further details. */
+
+/* FIXME: with a 'volatile' this test generates an unnecessary extb */
+/* assign memory to a memory */
+volatile char mem_dst, mem_src;
+void foo(void)
+{
+  mem_dst = mem_src;
+}
+/* { dg-final { scan-assembler "ldb\[_s\\s\]+r\[0-9\]+,\\\[" } } */
+/* { dg-final { scan-assembler "stb\[_s\\s\]+r\\d,\\\[" } } */
diff --git a/gcc/testsuite/gcc.target/arc64/mpyf.c b/gcc/testsuite/gcc.target/arc64/mpyf.c
new file mode 100644
index 0000000000000..6c567a92ba238
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arc64/mpyf.c
@@ -0,0 +1,33 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+/* "Checking if the result is zero" can safely use mpy.f.  The pattern of
+   interest is:
+
+   mpy.f   0,r0,r1
+   mov.ne  r0,12345678  */
+/* { dg-final { scan-assembler "mpy.f\\s+.*,r0,r1\n\\s*mov\.ne\\s+r0,12345678" } } */
+int check_z_flag (int a, int b)
+{
+  if (a * b == 0)
+    return a;
+  else
+    return 12345678;
+}
+
+/* "Checking if the result is negative" should not rely on .f notion of mpy,
+   because in that case N will be only set if bit 63 is set.  The pattern
+   of interest is something like:
+
+   mpy_s   r1,r1,r0
+   tst_s   r1,r1
+   mov.n   r0,87654321 */
+/* { dg-final { scan-assembler "mpy(_s)\\s+(\[^,\]*).*\n\\s*tst(_s)\\s+\\2,\\2\n\\s*mov\.n\\s+r0,87654321" } } */
+int check_n_flag (int a, int b)
+{
+  if (a * b >= 0)
+    return a;
+  else
+    return 87654321;
+}
+
diff --git a/gcc/testsuite/gcc.target/arc64/naked-1.c b/gcc/testsuite/gcc.target/arc64/naked-1.c
new file mode 100644
index 0000000000000..16771f2a6d268
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arc64/naked-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O0" } */
+/* Check that naked functions don't place arguments on the stack at
+   optimisation level '-O0'.  */
+extern void bar (int);
+
+void __attribute__((naked))
+foo (int n, int m)
+{
+  bar (n + m);
+}
+/* { dg-final { scan-assembler "\tbl\t@bar" } } */
+
+/* Look for things that would appear in a non-naked function, but which
+   should not appear in a naked function.  */
+/* { dg-final { scan-assembler-not "\tj.*\\\[blink\\\]" } } */
+/* { dg-final { scan-assembler-not "\tst.* " } } */
+/* { dg-final { scan-assembler-not "\tmov.*fp,sp" } } */
diff --git a/gcc/testsuite/gcc.target/arc64/naked-2.c b/gcc/testsuite/gcc.target/arc64/naked-2.c
new file mode 100644
index 0000000000000..7bd0bcae9126a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arc64/naked-2.c
@@ -0,0 +1,5 @@
+/* Verify proper errors are generated for invalid code.  */
+void __attribute__ ((interrupt, naked))
+foo (void)
+{ /* { dg-error "are mutually exclusive" } */
+}
diff --git a/gcc/testsuite/gcc.target/arc64/scaling-128.c b/gcc/testsuite/gcc.target/arc64/scaling-128.c
new file mode 100644
index 0000000000000..43a701112059c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arc64/scaling-128.c
@@ -0,0 +1,49 @@
+/* { dg-do compile } */
+/* { dg-options "-O0 -mno-fpmov" } */
+/* { dg-additional-options "-m128" { target { hs6x } } } */
+/* { dg-additional-options "-mll64" { target { hs5x } } } */
+
+/* Address scaling for double loads and stores must multiply
+   the "offset" with the size of a single element and not
+   the double of it.  For example:
+
+   Wrong code generation:
+     ldd     r2,[r1]
+     std     r2,[r0]
+     lddl.as r2,[r1,1]
+     stdl.as r2,[r0,1]
+     ...
+
+   Correct code generation:
+     lddl    r2,[r1]
+     stdl    r2,[r0]
+     lddl.as r2,[r1,2]
+     stdl.as r2,[r0,2]
+     ...
+*/
+
+/* Must generate:
+     lddl    r0,[r3]
+     stdl    r0,[r2]
+     lddl.as r0,[r3,2]
+     stdl.as r0,[r2,2]
+*/
+void func()
+{
+  char buf[32];
+  __builtin_strcpy(buf, "ABCDEFGHIJKLMNOPQRSTUVWXYZ23456");
+}
+
+/* { dg-final { scan-assembler "lddl\\s+r.,\\\[r.\\\]" { target { hs6x } }  } } */
+/* { dg-final { scan-assembler "stdl\\s+r.,\\\[r.\\\]" { target { hs6x } }  } } */
+/* { dg-final { scan-assembler "lddl.as\\s+r.,\\\[r.,2\\\]" { target { hs6x } } } } */
+/* { dg-final { scan-assembler "stdl.as\\s+r.,\\\[r.,2\\\]" { target { hs6x } } } } */
+
+/* { dg-final { scan-assembler "ldd\\s+r.,\\\[r.\\\]" { target { hs5x } }  } } */
+/* { dg-final { scan-assembler "std\\s+r.,\\\[r.\\\]" { target { hs5x } }  } } */
+/* { dg-final { scan-assembler "ldd.as\\s+r.,\\\[r.,2\\\]" { target { hs5x } } } } */
+/* { dg-final { scan-assembler "std.as\\s+r.,\\\[r.,2\\\]" { target { hs5x } } } } */
+/* { dg-final { scan-assembler "ldd.as\\s+r.,\\\[r.,4\\\]" { target { hs5x } } } } */
+/* { dg-final { scan-assembler "std.as\\s+r.,\\\[r.,4\\\]" { target { hs5x } } } } */
+/* { dg-final { scan-assembler "ldd.as\\s+r.,\\\[r.,6\\\]" { target { hs5x } } } } */
+/* { dg-final { scan-assembler "std.as\\s+r.,\\\[r.,6\\\]" { target { hs5x } } } } */
diff --git a/gcc/testsuite/gcc.target/arc64/scaling-64.c b/gcc/testsuite/gcc.target/arc64/scaling-64.c
new file mode 100644
index 0000000000000..4474bee50b913
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arc64/scaling-64.c
@@ -0,0 +1,48 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target hs5x } */
+/* { dg-options "-mll64 -O0 -mno-fpmov" } */
+
+/* Address scaling for double loads and stores must multiply
+   the "offset" with the size of a single element and not
+   the double of it.  For example:
+
+   Wrong code generation:
+     ldd    r2,[r1]
+     std    r2,[r0]
+     ldd.as r2,[r1,1]
+     std.as r2,[r0,1]
+     ...
+
+   Correct code generation:
+     ldd    r2,[r1]
+     std    r2,[r0]
+     ldd.as r2,[r1,2]
+     std.as r2,[r0,2]
+     ...
+*/
+
+/* Must generate:
+
+     ldd     r2,[r1]
+     std     r2,[r0]
+     ldd.as  r2,[r1,2]
+     std.as  r2,[r0,2]
+     ldd.as  r2,[r1,4]
+     std.as  r2,[r0,4]
+     ldd.as  r2,[r1,6]
+     std.as  r2,[r0,6]
+*/
+void func()
+{
+  char buf[32];
+  __builtin_strcpy(buf, "ABCDEFGHIJKLMNOPQRSTUVWXYZ23456");
+}
+
+/* { dg-final { scan-assembler "ldd\\s+r.,\\\[r.\\\]"   } } */
+/* { dg-final { scan-assembler "std\\s+r.,\\\[r.\\\]"   } } */
+/* { dg-final { scan-assembler "ldd.as\\s+r.,\\\[r.,2\\\]" } } */
+/* { dg-final { scan-assembler "std.as\\s+r.,\\\[r.,2\\\]" } } */
+/* { dg-final { scan-assembler "ldd.as\\s+r.,\\\[r.,4\\\]" } } */
+/* { dg-final { scan-assembler "std.as\\s+r.,\\\[r.,4\\\]" } } */
+/* { dg-final { scan-assembler "ldd.as\\s+r.,\\\[r.,6\\\]" } } */
+/* { dg-final { scan-assembler "std.as\\s+r.,\\\[r.,6\\\]" } } */
diff --git a/gcc/testsuite/gcc.target/arc64/split128b.c b/gcc/testsuite/gcc.target/arc64/split128b.c
new file mode 100644
index 0000000000000..9a7e8f977da11
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arc64/split128b.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target hs6x } */
+/* { dg-options "-O2 -m128 -fpic" } */
+
+struct tst {
+  long a;
+  long b;
+}  static var;
+
+void bar (long, struct tst);
+
+void foo (void)
+{
+  bar (0, var);
+}
+
+/* { dg-final { scan-assembler "ldl\\s+r1,\\\[pcl,@var@pcl\\\]" } } */
+/* { dg-final { scan-assembler "ldl\\s+r2,\\\[pcl,@var@pcl\\\+8\\\]" } } */
diff --git a/gcc/testsuite/gcc.target/arc64/symbol-1.c b/gcc/testsuite/gcc.target/arc64/symbol-1.c
new file mode 100644
index 0000000000000..f1feea02289a6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arc64/symbol-1.c
@@ -0,0 +1,14 @@
+/* { dg-options "-O2 -fPIC" } */
+
+struct {
+  int a;
+  int b;
+} _rtld_local;
+
+extern void bar (int *);
+void foo (void)
+{
+  bar(&_rtld_local.b);
+}
+
+/* { dg-final { scan-assembler "_rtld_local@gotpc" } } */
diff --git a/gcc/testsuite/gcc.target/arc64/symbol-2.c b/gcc/testsuite/gcc.target/arc64/symbol-2.c
new file mode 100644
index 0000000000000..6179e388aaf8a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arc64/symbol-2.c
@@ -0,0 +1,23 @@
+/* Test whether a symbol ends up to be accessed directly.  */
+/* { dg-do compile { target fpic } } */
+/* { dg-options "-O2 -fpic -w" } */
+/* { dg-final { scan-assembler-times "@_nl_C_locobj" 1 } } */
+/* { dg-final { scan-assembler-times "@_nl_C_locobj@gotpc" 1 } } */
+struct {
+  int a;
+} _nl_C_locobj;
+
+int b;
+c()
+{
+  char *d;
+  for (; d[0];)
+    if (b)
+      if (c < '9')
+	{
+	  char e = ({ (&_nl_C_locobj)->a; });
+	  if (e == 'i' && f())
+	    f(0, 0, 0, &_nl_C_locobj);
+	  e == 'n' && f(0, 0, 0, _nl_C_locobj);
+	}
+}
diff --git a/gcc/testsuite/gcc.target/arc64/tswap.c b/gcc/testsuite/gcc.target/arc64/tswap.c
new file mode 100644
index 0000000000000..5800e7cf9875d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arc64/tswap.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target hs6x } */
+/* { dg-options "-O2 -msimd -ftree-vectorize" } */
+
+/* Enable this test when HS5x recognizes various vector permutations
+   operations.  */
+
+struct{
+  unsigned short x1;
+  unsigned short x2;
+} vara, varb;
+
+void foo (void)
+{
+  vara.x1 = varb.x2;
+  vara.x2 = varb.x1;
+}
+/* { dg-final { scan-assembler "swap\\s+r\\d+,r\\d+" } } */
diff --git a/gcc/testsuite/gcc.target/arc64/vshuf-4.inc b/gcc/testsuite/gcc.target/arc64/vshuf-4.inc
new file mode 100644
index 0000000000000..309671ace09f5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arc64/vshuf-4.inc
@@ -0,0 +1,154 @@
+#define N 4
+#define TESTS \
+T (0,	0, 1, 2, 3) \
+T (1,	0, 0, 0, 0) \
+T (2,	6, 3, 1, 5) \
+T (3,	1, 3, 5, 0) \
+T (4,	6, 3, 5, 4) \
+T (5,	6, 4, 1, 0) \
+T (6,	6, 1, 4, 2) \
+T (7,	3, 7, 4, 4) \
+T (8,	3, 2, 4, 4) \
+T (9,	3, 2, 6, 1) \
+T (10,	5, 4, 5, 6) \
+T (11,	1, 4, 0, 7) \
+T (12,	1, 5, 7, 2) \
+T (13,	2, 3, 0, 4) \
+T (14,	7, 6, 4, 2) \
+T (15,	6, 1, 3, 4) \
+T (16,	0, 2, 4, 6) \
+T (17,	1, 3, 5, 7) \
+T (18,	3, 3, 3, 3) \
+T (19,	3, 2, 1, 0) \
+T (20,	0, 4, 1, 5) \
+T (21,	2, 6, 3, 7) \
+T (22,	1, 2, 3, 0) \
+T (23,	2, 1, 0, 3) \
+T (24,	2, 5, 6, 3) \
+T (25,	0, 1, 4, 5)
+#define EXPTESTS \
+T (116,	1, 2, 4, 3) \
+T (117,	7, 3, 3, 0) \
+T (118,	5, 3, 2, 7) \
+T (119,	0, 3, 5, 6) \
+T (120,	0, 0, 1, 5) \
+T (121,	4, 6, 2, 1) \
+T (123,	4, 6, 3, 2) \
+T (124,	4, 7, 5, 6) \
+T (125,	0, 4, 2, 4) \
+T (126,	2, 4, 6, 0) \
+T (127,	4, 3, 5, 1) \
+T (128,	5, 4, 7, 3) \
+T (129,	7, 5, 6, 4) \
+T (130,	2, 0, 5, 3) \
+T (131,	7, 4, 3, 0) \
+T (132,	6, 1, 3, 1) \
+T (133,	3, 4, 1, 7) \
+T (134,	0, 0, 6, 4) \
+T (135,	6, 4, 0, 1) \
+T (136,	6, 0, 2, 4) \
+T (137,	1, 3, 4, 2) \
+T (138,	3, 1, 2, 4) \
+T (139,	3, 1, 5, 0) \
+T (140,	1, 6, 0, 2) \
+T (141,	0, 2, 6, 4) \
+T (142,	1, 7, 5, 2) \
+T (143,	7, 0, 4, 1) \
+T (144,	7, 3, 5, 6) \
+T (145,	0, 7, 5, 4) \
+T (146,	6, 4, 2, 3) \
+T (147,	1, 5, 7, 6) \
+T (148,	5, 7, 4, 7) \
+T (149,	5, 2, 2, 0) \
+T (150,	7, 1, 6, 4) \
+T (151,	5, 2, 4, 6) \
+T (152,	5, 0, 4, 6) \
+T (153,	4, 2, 7, 3) \
+T (154,	7, 1, 0, 6) \
+T (155,	0, 4, 2, 5) \
+T (156,	3, 4, 3, 2) \
+T (157,	2, 0, 6, 1) \
+T (158,	5, 1, 7, 4) \
+T (159,	2, 1, 5, 6) \
+T (160,	1, 6, 5, 7) \
+T (161,	2, 4, 1, 6) \
+T (162,	3, 7, 1, 6) \
+T (163,	2, 1, 4, 7) \
+T (164,	4, 2, 1, 0) \
+T (165,	0, 7, 1, 3) \
+T (166,	7, 4, 2, 3) \
+T (167,	4, 5, 3, 5) \
+T (168,	1, 5, 6, 7) \
+T (169,	6, 3, 2, 0) \
+T (170,	6, 2, 1, 5) \
+T (171,	5, 6, 1, 3) \
+T (172,	2, 2, 3, 1) \
+T (173,	5, 4, 3, 5) \
+T (174,	7, 3, 4, 1) \
+T (175,	4, 2, 3, 6) \
+T (176,	7, 6, 5, 3) \
+T (177,	7, 2, 0, 6) \
+T (178,	1, 3, 0, 2) \
+T (179,	5, 3, 0, 5) \
+T (180,	4, 6, 7, 2) \
+T (181,	4, 5, 2, 0) \
+T (182,	5, 0, 1, 2) \
+T (183,	2, 3, 4, 1) \
+T (184,	2, 6, 5, 1) \
+T (185,	0, 6, 7, 4) \
+T (186,	4, 1, 6, 2) \
+T (187,	1, 3, 2, 3) \
+T (188,	2, 5, 4, 3) \
+T (189,	2, 5, 6, 4) \
+T (190,	4, 0, 5, 0) \
+T (191,	2, 1, 6, 0) \
+T (192,	7, 5, 0, 1) \
+T (193,	3, 5, 6, 7) \
+T (194,	0, 1, 2, 7) \
+T (195,	3, 1, 0, 2) \
+T (196,	2, 4, 6, 3) \
+T (197,	6, 0, 5, 4) \
+T (198,	6, 5, 7, 1) \
+T (199,	2, 5, 4, 6) \
+T (200,	7, 2, 3, 6) \
+T (201,	3, 5, 7, 3) \
+T (202,	1, 7, 4, 6) \
+T (203,	4, 0, 7, 1) \
+T (204,	7, 1, 0, 4) \
+T (205,	5, 1, 3, 4) \
+T (206,	0, 7, 3, 5) \
+T (207,	3, 2, 1, 5) \
+T (208,	7, 5, 0, 2) \
+T (209,	7, 0, 6, 3) \
+T (210,	6, 6, 7, 7) \
+T (211,	5, 6, 0, 4) \
+T (212,	5, 1, 2, 2) \
+T (213,	7, 1, 2, 6) \
+T (214,	5, 4, 2, 6) \
+T (215,	1, 5, 6, 4) \
+T (216,	7, 0, 2, 1) \
+T (217,	1, 5, 3, 6) \
+T (218,	3, 3, 6, 5) \
+T (219,	2, 3, 5, 7) \
+T (220,	2, 4, 3, 0) \
+T (221,	1, 5, 6, 3) \
+T (222,	7, 5, 1, 5) \
+T (223,	0, 5, 3, 4) \
+T (224,	2, 3, 1, 4) \
+T (225,	2, 3, 5, 1) \
+T (226,	4, 3, 1, 0) \
+T (227,	2, 3, 5, 5) \
+T (228,	1, 1, 1, 1) \
+T (229,	2, 2, 2, 2) \
+T (230,	3, 3, 3, 3) \
+T (231,	0, 2, 0, 2) \
+T (232,	0, 2, 4, 6) \
+T (233,	1, 3, 1, 3) \
+T (234,	1, 3, 5, 7) \
+T (235,	4, 6, 0, 2) \
+T (236,	5, 7, 1, 3) \
+T (237,	1, 0, 3, 2) \
+T (238,	0, 1, 0, 1) \
+T (239,	2, 3, 2, 3) \
+T (240,	2, 3, 6, 7) \
+
diff --git a/gcc/testsuite/gcc.target/arc64/vshuf-8.inc b/gcc/testsuite/gcc.target/arc64/vshuf-8.inc
new file mode 100644
index 0000000000000..31b24d40e6ab3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arc64/vshuf-8.inc
@@ -0,0 +1,158 @@
+#define N 8
+#define TESTS \
+T (0,	0, 1, 2, 3, 4, 5, 6, 7) \
+T (1,	0, 0, 0, 0, 0, 0, 0, 0) \
+T (2,	0, 1, 2, 3, 0, 1, 2, 3) \
+T (3,	0, 10, 3, 8, 1, 11, 10, 2) \
+T (4,	0, 1, 2, 3, 3, 0, 2, 1) \
+T (5,	12, 5, 14, 9, 8, 15, 1, 7) \
+T (6,	9, 10, 11, 8, 4, 15, 14, 12) \
+T (7,	4, 10, 14, 9, 11, 1, 12, 11) \
+T (8,	15, 3, 3, 8, 5, 11, 2, 10) \
+T (9,	13, 11, 10, 15, 8, 5, 14, 8) \
+T (10,	9, 13, 12, 14, 10, 1, 5, 14) \
+T (11,	5, 11, 12, 6, 3, 2, 4, 15) \
+T (12,	5, 13, 14, 8, 4, 10, 4, 12) \
+T (13,	14, 8, 12, 3, 13, 9, 5, 4) \
+T (14,	15, 3, 13, 6, 14, 12, 10, 0) \
+T (15,	0, 5, 11, 7, 4, 6, 14, 1) \
+T (16,	0, 2, 4, 6, 8, 10, 12, 14) \
+T (17,	1, 3, 5, 7, 9, 11, 13, 15) \
+T (18,	3, 3, 3, 3, 3, 3, 3, 3) \
+T (19,	7, 6, 5, 4, 3, 2, 1, 0) \
+T (20,	0, 8, 1, 9, 2, 10, 3, 11) \
+T (21,	4, 12, 5, 13, 6, 14, 7, 15) \
+T (22,	1, 2, 3, 4, 5, 6, 7, 0) \
+T (23,	6, 5, 4, 3, 2, 1, 0, 7) \
+T (24,	0, 1, 2, 3, 8, 9, 10, 11) \
+T (25,	0, 1, 2, 3, 12, 13, 14, 15) \
+T (26,	0, 1, 8, 9, 10, 11, 12, 13) \
+T (27,	0, 8, 9, 10, 11, 12, 13, 14)
+#define EXPTESTS \
+T (116,	9, 3, 9, 4, 7, 0, 0, 6) \
+T (117,	4, 14, 12, 8, 9, 6, 0, 10) \
+T (118,	10, 12, 1, 3, 4, 11, 9, 2) \
+T (119,	4, 11, 9, 5, 8, 14, 0, 2) \
+T (120,	0, 10, 8, 6, 4, 9, 7, 5) \
+T (121,	10, 15, 0, 4, 12, 9, 7, 3) \
+T (122,	13, 6, 0, 7, 5, 12, 12, 2) \
+T (123,	4, 10, 11, 15, 12, 7, 3, 8) \
+T (124,	1, 13, 15, 9, 6, 5, 7, 4) \
+T (125,	7, 13, 2, 5, 2, 8, 1, 6) \
+T (126,	4, 5, 2, 12, 14, 13, 8, 6) \
+T (127,	4, 10, 2, 7, 11, 15, 9, 0) \
+T (128,	14, 8, 12, 10, 13, 3, 11, 2) \
+T (129,	10, 8, 14, 9, 5, 1, 15, 7) \
+T (130,	12, 2, 9, 13, 5, 14, 1, 15) \
+T (131,	10, 4, 1, 14, 11, 15, 9, 2) \
+T (132,	9, 12, 7, 4, 2, 1, 0, 9) \
+T (133,	11, 15, 4, 10, 3, 12, 13, 5) \
+T (134,	1, 5, 6, 13, 14, 15, 3, 10) \
+T (135,	0, 14, 6, 10, 1, 13, 3, 2) \
+T (136,	2, 11, 1, 5, 12, 13, 3, 5) \
+T (137,	7, 11, 4, 1, 12, 10, 14, 10) \
+T (138,	3, 6, 7, 13, 11, 15, 10, 0) \
+T (139,	10, 8, 7, 14, 1, 11, 0, 13) \
+T (140,	5, 3, 13, 11, 8, 5, 12, 6) \
+T (141,	15, 2, 12, 13, 10, 8, 5, 0) \
+T (142,	13, 9, 10, 2, 11, 3, 4, 1) \
+T (143,	2, 14, 6, 5, 1, 8, 15, 0) \
+T (144,	12, 9, 14, 10, 1, 3, 11, 13) \
+T (145,	12, 13, 11, 2, 5, 6, 4, 8) \
+T (146,	5, 0, 2, 1, 6, 8, 15, 13) \
+T (147,	8, 7, 1, 3, 5, 11, 14, 15) \
+T (148,	0, 9, 2, 15, 3, 1, 8, 10) \
+T (149,	4, 14, 11, 6, 8, 5, 12, 7) \
+T (150,	5, 9, 10, 12, 14, 15, 2, 7) \
+T (151,	11, 6, 5, 7, 11, 14, 2, 1) \
+T (152,	13, 1, 7, 4, 6, 8, 15, 9) \
+T (153,	10, 12, 9, 1, 6, 7, 8, 15) \
+T (154,	8, 4, 5, 1, 3, 0, 7, 13) \
+T (155,	13, 9, 3, 4, 10, 1, 15, 7) \
+T (156,	13, 8, 2, 7, 0, 6, 3, 6) \
+T (157,	15, 15, 13, 6, 0, 5, 14, 4) \
+T (158,	13, 1, 2, 2, 7, 9, 2, 6) \
+T (159,	5, 12, 10, 13, 6, 1, 4, 7) \
+T (160,	0, 2, 9, 1, 5, 11, 14, 11) \
+T (161,	14, 6, 5, 10, 3, 2, 15, 4) \
+T (162,	3, 10, 0, 1, 13, 14, 11, 15) \
+T (163,	13, 7, 5, 9, 5, 0, 11, 4) \
+T (164,	2, 11, 1, 12, 3, 13, 4, 9) \
+T (165,	1, 0, 10, 11, 5, 13, 4, 3) \
+T (166,	3, 9, 1, 12, 15, 14, 10, 5) \
+T (167,	3, 10, 11, 14, 5, 1, 8, 12) \
+T (168,	10, 15, 5, 14, 4, 13, 6, 3) \
+T (169,	1, 8, 6, 4, 11, 13, 7, 10) \
+T (170,	8, 7, 1, 15, 11, 9, 0, 3) \
+T (171,	4, 0, 11, 7, 1, 15, 3, 13) \
+T (172,	14, 7, 3, 4, 9, 11, 0, 6) \
+T (173,	7, 3, 11, 4, 8, 2, 10, 15) \
+T (174,	7, 9, 14, 2, 0, 5, 13, 3) \
+T (175,	4, 8, 5, 9, 3, 11, 1, 14) \
+T (176,	13, 12, 3, 3, 1, 4, 8, 5) \
+T (177,	7, 12, 9, 13, 10, 4, 5, 8) \
+T (178,	14, 3, 12, 7, 2, 6, 5, 5) \
+T (179,	7, 0, 8, 6, 2, 14, 12, 15) \
+T (180,	2, 12, 0, 4, 1, 15, 11, 10) \
+T (181,	0, 7, 5, 12, 15, 10, 14, 3) \
+T (182,	3, 10, 2, 5, 11, 6, 13, 14) \
+T (183,	3, 2, 7, 11, 0, 13, 8, 10) \
+T (184,	0, 12, 15, 1, 9, 2, 11, 4) \
+T (185,	11, 15, 8, 10, 9, 1, 13, 3) \
+T (186,	12, 13, 15, 12, 3, 9, 5, 7) \
+T (187,	4, 10, 5, 6, 1, 11, 0, 11) \
+T (188,	11, 6, 7, 9, 0, 8, 14, 8) \
+T (189,	1, 15, 6, 9, 12, 6, 7, 14) \
+T (190,	1, 5, 6, 11, 12, 13, 3, 0) \
+T (191,	0, 8, 15, 13, 12, 6, 1, 4) \
+T (192,	12, 15, 8, 4, 2, 0, 9, 5) \
+T (193,	14, 5, 13, 10, 12, 11, 0, 1) \
+T (194,	12, 1, 9, 8, 10, 9, 0, 2) \
+T (195,	11, 0, 13, 4, 6, 2, 14, 15) \
+T (196,	0, 10, 6, 2, 12, 4, 9, 13) \
+T (197,	7, 12, 8, 10, 1, 0, 5, 0) \
+T (198,	12, 13, 0, 5, 3, 14, 11, 4) \
+T (199,	9, 1, 4, 14, 10, 12, 15, 6) \
+T (200,	3, 12, 13, 6, 14, 2, 1, 6) \
+T (201,	5, 14, 8, 10, 1, 12, 2, 0) \
+T (202,	5, 8, 2, 7, 4, 15, 14, 2) \
+T (203,	14, 13, 10, 9, 11, 15, 7, 8) \
+T (204,	12, 13, 14, 2, 4, 9, 5, 7) \
+T (205,	0, 7, 5, 4, 7, 13, 6, 8) \
+T (206,	7, 0, 15, 6, 12, 2, 5, 4) \
+T (207,	8, 6, 0, 1, 1, 11, 1, 9) \
+T (208,	11, 6, 14, 9, 5, 3, 7, 13) \
+T (209,	12, 3, 15, 9, 1, 0, 8, 13) \
+T (210,	11, 1, 9, 8, 7, 6, 12, 2) \
+T (211,	10, 9, 2, 6, 8, 11, 0, 4) \
+T (212,	10, 13, 15, 9, 6, 15, 14, 10) \
+T (213,	9, 5, 8, 3, 4, 7, 11, 4) \
+T (214,	1, 2, 13, 5, 8, 4, 3, 6) \
+T (215,	8, 3, 2, 4, 9, 14, 12, 13) \
+T (216,	5, 7, 8, 15, 3, 1, 10, 4) \
+T (217,	2, 9, 3, 2, 14, 11, 5, 7) \
+T (218,	15, 6, 4, 10, 14, 3, 13, 2) \
+T (219,	0, 8, 14, 5, 15, 7, 10, 1) \
+T (220,	14, 0, 6, 10, 8, 2, 7, 4) \
+T (221,	15, 13, 3, 14, 11, 2, 14, 6) \
+T (222,	8, 2, 10, 13, 1, 0, 4, 11) \
+T (223,	7, 15, 2, 9, 1, 12, 11, 3) \
+T (224,	13, 15, 3, 12, 15, 7, 0, 8) \
+T (225,	0, 2, 1, 11, 14, 3, 9, 14) \
+T (226,	12, 14, 3, 15, 8, 5, 1, 7) \
+T (227,	0, 5, 13, 8, 4, 2, 1, 3) \
+T (228, 0, 2, 4, 6, 0, 2, 4, 6) \
+T (229, 0, 1, 4, 5, 0, 1, 4, 5) \
+T (230, 0, 1, 2, 3, 0, 1, 2, 3) \
+T (231, 1, 3, 5, 7, 1, 3, 5, 7) \
+T (232, 2, 3, 6, 7, 2, 3, 6, 7) \
+T (233, 4, 5, 6, 7, 4, 5, 6, 7) \
+T (234, 1, 0, 3, 2, 5, 4, 7, 6) \
+T (235, 2, 3, 0, 1, 6, 7, 4, 5) \
+T (236, 4, 5, 6, 7, 0, 1, 2, 3) \
+T (237, 0, 1, 0, 1, 2, 3, 2, 3) \
+T (238, 4, 5, 4, 5, 6, 7, 6, 7) \
+T (239, 0, 0, 2, 2, 4, 4, 6, 6) \
+T (240, 0, 1, 0, 1, 4, 5, 4, 5) \
+T (241, 1, 1, 3, 3, 5, 5, 7, 7) \
+T (242, 2, 3, 2, 3, 6, 7, 6, 7)
diff --git a/gcc/testsuite/gcc.target/arc64/vshuf-main.inc b/gcc/testsuite/gcc.target/arc64/vshuf-main.inc
new file mode 100644
index 0000000000000..52b75ee7365b2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arc64/vshuf-main.inc
@@ -0,0 +1,55 @@
+/* Driver fragment for __builtin_shuffle of any vector shape.  */
+
+extern void abort (void);
+
+#ifndef UNSUPPORTED
+V a, b, c, d;
+
+#define T(num, msk...) \
+__attribute__((noinline, noclone)) void		\
+test_##num (void)				\
+{						\
+  VI mask = { msk };				\
+  int i;					\
+  c = __builtin_shuffle (a, mask);		\
+  d = __builtin_shuffle (a, b, mask);		\
+  __asm ("" : : "r" (&c), "r" (&d) : "memory");	\
+  for (i = 0; i < N; ++i)			\
+    if (c[i] != a[mask[i] & (N - 1)])		\
+      abort ();					\
+    else if ((mask[i] & N))			\
+      {						\
+	if (d[i] != b[mask[i] & (N - 1)])	\
+	  abort ();				\
+      }						\
+    else if (d[i] != a[mask[i] & (N - 1)])	\
+      abort ();					\
+}
+TESTS
+#ifdef EXPENSIVE
+EXPTESTS
+#endif
+#endif
+
+int
+main ()
+{
+#ifndef UNSUPPORTED
+  int i;
+  for (i = 0; i < N; ++i)
+    {
+      a[i] = i + 2;
+      b[i] = N + i + 2;
+    }
+
+#undef T
+#define T(num, msk...) \
+  test_##num ();
+  TESTS
+#ifdef EXPENSIVE
+  EXPTESTS
+#endif
+#endif
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/arc64/vshuf-v2hi.c b/gcc/testsuite/gcc.target/arc64/vshuf-v2hi.c
new file mode 100644
index 0000000000000..19a32ca30e4e7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arc64/vshuf-v2hi.c
@@ -0,0 +1,17 @@
+/* { dg-options "-msimd -O2" } */
+
+typedef short V __attribute__((vector_size(4)));
+typedef unsigned short VI __attribute__((vector_size(4)));
+
+V a, b, c;
+const VI maskl = {0, 0};
+const VI maskh = {1, 1};
+
+void foo (void)
+{
+  b = __builtin_shuffle (a, maskl);
+  c = __builtin_shuffle (a, maskh);
+}
+
+/* { dg-final { scan-assembler "vpack2hl" } } */
+/* { dg-final { scan-assembler "vpack2hm" } } */
diff --git a/gcc/testsuite/gcc.target/arc64/vshuf-v4hf.c b/gcc/testsuite/gcc.target/arc64/vshuf-v4hf.c
new file mode 100644
index 0000000000000..d32324a02ba91
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arc64/vshuf-v4hf.c
@@ -0,0 +1,9 @@
+/* { dg-do run } */
+/* { dg-require-effective-target dpfp } */
+/* { dg-options "-DEXPENSIVE -O2" } */
+
+typedef _Float16 V __attribute__((vector_size(8)));
+typedef unsigned short VI __attribute__((vector_size(8)));
+
+#include "vshuf-4.inc"
+#include "vshuf-main.inc"
diff --git a/gcc/testsuite/gcc.target/arc64/vshuf-v8hf.c b/gcc/testsuite/gcc.target/arc64/vshuf-v8hf.c
new file mode 100644
index 0000000000000..b0bf4f01983fd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arc64/vshuf-v8hf.c
@@ -0,0 +1,9 @@
+/* { dg-do run } */
+/* { dg-require-effective-target dpfp } */
+/* { dg-options "-DEXPENSIVE -O2" } */
+
+typedef _Float16 V __attribute__((vector_size(16)));
+typedef unsigned short VI __attribute__((vector_size(16)));
+
+#include "vshuf-8.inc"
+#include "vshuf-main.inc"
diff --git a/gcc/testsuite/gcc.target/arc64/widening_mult.c b/gcc/testsuite/gcc.target/arc64/widening_mult.c
new file mode 100644
index 0000000000000..a568fcc36b6b5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arc64/widening_mult.c
@@ -0,0 +1,16 @@
+/* { dg-options "-O2 -msimd -ftree-vectorize" } */
+
+void
+foo (int N, int* c, short* a, short val)
+{
+  int i,j;
+  for (i = 0; i < N; i++)
+    {
+      for (j = 0; j < N; j++)
+	{
+	  c[i * N + j]=(int)a[i * N + j] * (int)val;
+	}
+    }
+}
+
+/* { dg-final { scan-assembler-times "vmpy2h" 2 } } */
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 244fe2306f457..b8b766079af4b 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -775,6 +775,10 @@ proc check_effective_target_freestanding { } {
 
 # Check to see that file I/O functions are available.
 proc check_effective_target_fileio { } {
+    if { [istarget arc64-*-*] } {
+	return 0
+    }
+
     return [check_no_compiler_messages fileio_available executable {
 #include <stdio.h>
 int main() {
@@ -3902,6 +3906,15 @@ proc check_effective_target_arc_atomic { } {
     }]
 }
 
+# Return 1 if this is a compiler supporting ARC atomic operations
+proc check_effective_target_arc64_atomic { } {
+    return [check_no_compiler_messages arc_atomic assembly {
+	#if !defined(__ARC64_ATOMIC__)
+	#error NO_ATOMIC64
+	#endif
+    }]
+}
+
 # Return 1 if this is an arm target using 32-bit instructions
 proc check_effective_target_arm32 { } {
     if { ![istarget arm*-*-*] } {
@@ -8510,6 +8523,7 @@ proc check_effective_target_sync_int_128_runtime { } {
 proc check_effective_target_sync_long_long { } {
     if { [istarget i?86-*-*] || [istarget x86_64-*-*]
 	 || [istarget aarch64*-*-*]
+	 || ([istarget arc64-*-*] && [check_effective_target_arc64_atomic])
 	 || [istarget arm*-*-*]
 	 || [istarget alpha*-*-*]
 	 || ([istarget sparc*-*-*] && [check_effective_target_lp64])
@@ -8574,6 +8588,7 @@ proc check_effective_target_sync_long_long_runtime { } {
 	      } ""
 	  }])
 	 || [istarget aarch64*-*-*]
+	 || ([istarget arc64-*-*] && [check_effective_target_arc64_atomic])
 	 || [istarget arm*-*-uclinuxfdpiceabi]
 	 || ([istarget arm*-*-linux-*]
 	     && [check_runtime sync_longlong_runtime {
@@ -8643,6 +8658,7 @@ proc check_effective_target_sync_int_long { } {
 	     || [istarget powerpc*-*-*]
 	     || [istarget cris-*-*]
 	     || ([istarget sparc*-*-*] && [check_effective_target_sparc_v9])
+	     || ([istarget arc64-*-*] && [check_effective_target_arc64_atomic])
 	     || ([istarget arc*-*-*] && [check_effective_target_arc_atomic])
 	     || [check_effective_target_mips_llsc]
 	     || [istarget nvptx*-*-*]
@@ -8678,7 +8694,9 @@ proc check_effective_target_sync_char_short { } {
 	     || [istarget powerpc*-*-*]
 	     || [istarget cris-*-*]
 	     || ([istarget sparc*-*-*] && [check_effective_target_sparc_v9])
-	     || ([istarget arc*-*-*] && [check_effective_target_arc_atomic])
+	     || ([istarget arc64-*-*] && [check_effective_target_arc64_atomic])
+	     || ([istarget arc-*-*] && [check_effective_target_arc_atomic])
+	     || ([istarget arceb-*-*] && [check_effective_target_arc_atomic])
 	     || [istarget loongarch*-*-*]
 	     || [check_effective_target_mips_llsc] }}]
 }
diff --git a/libgcc/config.host b/libgcc/config.host
index 15c64989c7cf8..79739a7bd0e81 100644
--- a/libgcc/config.host
+++ b/libgcc/config.host
@@ -95,9 +95,12 @@ amdgcn*-*-*)
 	cpu_type=gcn
 	tmake_file="${tmake_file} t-softfp-sfdf t-softfp"
 	;;
-arc*-*-*)
+arc-*-* | arceb-*-*)
 	cpu_type=arc
 	;;
+arc[6432]*-*-*)
+	cpu_type=arc64
+	;;
 arm*-*-*)
 	cpu_type=arm
 	;;
@@ -445,17 +448,30 @@ amdgcn*-*-amdhsa)
 	tmake_file="$tmake_file gcn/t-amdgcn"
 	extra_parts="crt0.o"
 	;;
-arc*-*-elf*)
+arc-*-elf* | arceb-*-elf*)
 	tmake_file="arc/t-arc"
 	extra_parts="crti.o crtn.o crtend.o crtbegin.o crtendS.o crtbeginS.o"
 	extra_parts="$extra_parts crttls.o"
 	;;
-arc*-*-linux*)
+arc-*-linux* | arceb-*-linux*)
 	tmake_file="${tmake_file} t-slibgcc-libgcc t-slibgcc-nolc-override arc/t-arc-uClibc arc/t-arc"
 	extra_parts="$extra_parts crti.o crtn.o"
 	extra_parts="$extra_parts crttls.o"
 	md_unwind_header=arc/linux-unwind.h
 	;;
+arc32-*-elf | arc64-*-elf)
+	tmake_file="${tmake_file} arc64/t-arc64 arc64/t-softfp t-softfp t-softfp-sfdf"
+	;;
+arc32-*-linux*)
+	tmake_file="${tmake_file} arc64/t-softfp t-softfp t-softfp-sfdf"
+	tmake_file="${tmake_file} t-slibgcc-libgcc t-slibgcc-nolc-override"
+	md_unwind_header=arc64/linux-unwind.h
+	;;
+arc64-*-linux*)
+	tmake_file="${tmake_file} arc64/t-arc64 arc64/t-softfp t-softfp t-softfp-sfdf"
+	tmake_file="${tmake_file} t-slibgcc-libgcc t-slibgcc-nolc-override"
+	md_unwind_header=arc64/linux-unwind.h
+	;;
 arm-wrs-vxworks7*)
 	tmake_file="$tmake_file arm/t-arm arm/t-elf arm/t-bpabi arm/t-vxworks7"
         tmake_file="$tmake_file t-softfp-sfdf t-softfp-excl arm/t-softfp t-softfp"
diff --git a/libgcc/config/arc64/lib2funcs.c b/libgcc/config/arc64/lib2funcs.c
new file mode 100644
index 0000000000000..55f0993d0b37d
--- /dev/null
+++ b/libgcc/config/arc64/lib2funcs.c
@@ -0,0 +1,151 @@
+/* libgcc routines for ARC64
+   Copyright (C) 2019 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+
+typedef          int  sint64_t   __attribute__ ((mode (DI)));
+typedef unsigned int  uint64_t   __attribute__ ((mode (DI)));
+typedef int           word_t     __attribute__ ((mode (__word__)));
+
+
+sint64_t __muldi3 (sint64_t, sint64_t);
+sint64_t __divdi3 (sint64_t, sint64_t);
+sint64_t __moddi3 (sint64_t, sint64_t);
+
+sint64_t
+__muldi3 (sint64_t a, sint64_t b)
+{
+  sint64_t res = 0;
+  uint64_t cnt = a;
+
+  while (cnt)
+    {
+      if (cnt & 1)
+        res += b;
+      b <<= 1;
+      cnt >>= 1;
+    }
+  return res;
+}
+
+/* Unsigned integer division/modulus.  */
+
+static inline __attribute__ ((__always_inline__))
+uint64_t
+udivmoddi4 (uint64_t num, uint64_t den, word_t modwanted)
+{
+  uint64_t bit = 1;
+  uint64_t res = 0;
+
+  while (den < num && bit && !(den & (1LL << 63)))
+    {
+      den <<= 1;
+      bit <<= 1;
+    }
+  while (bit)
+    {
+      if (num >= den)
+	{
+	  num -= den;
+	  res |= bit;
+	}
+      bit >>= 1;
+      den >>= 1;
+    }
+  if (modwanted)
+    return num;
+  return res;
+}
+
+sint64_t
+__divdi3 (sint64_t a, sint64_t b)
+{
+  word_t neg = 0;
+  sint64_t res;
+
+  if (a < 0)
+    {
+      a = -a;
+      neg = !neg;
+    }
+
+  if (b < 0)
+    {
+      b = -b;
+      neg = !neg;
+    }
+
+  res = udivmoddi4 (a, b, 0);
+
+  if (neg)
+    res = -res;
+
+  return res;
+}
+
+sint64_t
+__moddi3 (sint64_t a, sint64_t b)
+{
+  word_t neg = 0;
+  sint64_t res;
+
+  if (a < 0)
+    {
+      a = -a;
+      neg = 1;
+    }
+
+  if (b < 0)
+    b = -b;
+
+  res = udivmoddi4 (a, b, 1);
+
+  if (neg)
+    res = -res;
+
+  return res;
+}
+
+uint64_t
+__udivdi3 (uint64_t a, uint64_t b)
+{
+  return udivmoddi4 (a, b, 0);
+}
+
+uint64_t
+__umoddi3 (uint64_t a, uint64_t b)
+{
+  return udivmoddi4 (a, b, 1);
+}
+
+/* We need 32bit version for some of the functions defined in
+   libgcc2.c.  */
+#define LIBGCC2_UNITS_PER_WORD 4
+
+#define L_clzsi2
+#define L_ctzsi2
+#define L_ffssi2
+#define L_paritysi2
+#define L_popcountsi2
+
+#include "libgcc2.c"
diff --git a/libgcc/config/arc64/linux-unwind.h b/libgcc/config/arc64/linux-unwind.h
new file mode 100644
index 0000000000000..359f4c634d46a
--- /dev/null
+++ b/libgcc/config/arc64/linux-unwind.h
@@ -0,0 +1,144 @@
+/* DWARF2 EH unwinding support for ARC64 Linux.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License
+   and a copy of the GCC Runtime Library Exception along with this
+   program; see the files COPYING3 and COPYING.RUNTIME respectively.
+   If not, see <http://www.gnu.org/licenses/>.  */
+
+#ifndef inhibit_libc
+/* Do code reading to identify a signal frame, and set the frame
+   state data appropriately.  See unwind-dw2.c for the structs.  */
+
+#include <signal.h>
+#include <asm/unistd.h>
+
+/* The corresponding index in "reg_offset_map".  */
+enum reg_id {
+  REG_RET   = 5
+};
+
+#define SKIP (-1)
+
+/* This order is defined by a structure in the kernel, in file
+   arch/arc/kernel/signal.c.  */
+
+const int
+reg_offset_map[] = {
+  SKIP,	/* bta	    */
+  SKIP,	/* lp_start */
+  SKIP,	/* lp_end   */
+  SKIP,	/* lp_count */
+  SKIP,	/* status32 */
+  SKIP,	/* ret	    */
+  31,	/* blink    */
+  27,	/* fp	    */
+  26,	/* gp	    */
+  12,	/* r12	    */
+  11,	/* r11	    */
+  10,	/* r10	    */
+  9,	/* r9	    */
+  8,	/* r8	    */
+  7,	/* r7	    */
+  6,	/* r6	    */
+  5,	/* r5	    */
+  4,	/* r4	    */
+  3,	/* r3	    */
+  2,	/* r2	    */
+  1,	/* r1	    */
+  0,	/* r0	    */
+  28	/* sp	    */
+};
+
+const size_t
+reg_offset_map_size = sizeof (reg_offset_map) / sizeof (reg_offset_map[0]);
+
+#define MOV_R8_139	  0x12c2208a
+#define TRAP_S_0	  0x781e
+#define J_S_BLINK	  0x7ee0
+
+#define MD_FALLBACK_FRAME_STATE_FOR arc_fallback_frame_state
+
+static __attribute__((noinline)) _Unwind_Reason_Code
+arc_fallback_frame_state (struct _Unwind_Context *context,
+			  _Unwind_FrameState *fs)
+{
+  /* The kernel creates an rt_sigframe on the stack immediately prior
+     to delivering a signal.
+
+     This structure must have the same shape as the linux kernel
+     equivalent.  */
+  struct rt_sigframe {
+    siginfo_t info;
+    ucontext_t uc;
+    unsigned int sigret_magic;
+  };
+
+  struct rt_sigframe *rt_;
+  u_int16_t *pc = (u_int16_t *) context->ra;
+  struct sigcontext *sc;
+  _Unwind_Ptr new_cfa;
+  size_t i;
+
+
+  /* A signal frame will have a return address pointing to
+     __default_sa_restorer. This code is hardwired as:
+
+  <__default_rt_sa_restorer>:
+     208a 12c2           	mov	r8,139
+     781e                	trap_s	0
+     7ee0                	j_s	[blink]
+  */
+  if (pc[0] != (u_int16_t)MOV_R8_139 || pc[1] != (u_int16_t)(MOV_R8_139 >> 16)
+      || pc[2] != TRAP_S_0 || pc[3] != J_S_BLINK)
+    return _URC_END_OF_STACK;
+
+  rt_ = context->cfa;
+  sc = (struct sigcontext *)&rt_->uc.uc_mcontext;
+
+  new_cfa = (_Unwind_Ptr)sc;
+  fs->regs.cfa_how = CFA_REG_OFFSET;
+  fs->regs.cfa_reg = __LIBGCC_STACK_POINTER_REGNUM__;
+  fs->regs.cfa_offset = new_cfa - (_Unwind_Ptr)context->cfa;
+
+  unsigned long *regs = &sc->regs.scratch.bta;
+  for (i = 0; i < reg_offset_map_size; ++i)
+    {
+      if (reg_offset_map[i] == SKIP)
+	continue;
+      fs->regs.reg[reg_offset_map[i]].how = REG_SAVED_OFFSET;
+      fs->regs.reg[reg_offset_map[i]].loc.offset
+	= ((_Unwind_Ptr)&(regs[i])) - new_cfa;
+    }
+
+  fs->signal_frame = 1;
+  fs->retaddr_column = __LIBGCC_DWARF_ALT_FRAME_RETURN_COLUMN__;
+  fs->regs.reg[fs->retaddr_column].how = REG_SAVED_VAL_OFFSET;
+  fs->regs.reg[fs->retaddr_column].loc.offset =
+    ((_Unwind_Ptr) (regs[REG_RET])) - new_cfa;
+
+  return _URC_NO_REASON;
+}
+
+#endif	  /* ifndef inhibit_libc */
+
+/* TODO: There was once an arc_frob_update_context () dwelling here.
+   Check if it is still needed. "cleanup" tests are fine without it.
+   glibc tests (nptl/tst-* and debug/tst-backtrace*) should shed more
+   light on it.  */
diff --git a/libgcc/config/arc64/sfp-machine.h b/libgcc/config/arc64/sfp-machine.h
new file mode 100644
index 0000000000000..59c2d89619122
--- /dev/null
+++ b/libgcc/config/arc64/sfp-machine.h
@@ -0,0 +1,84 @@
+#ifdef __ARC64_ARCH32__
+
+#define _FP_W_TYPE_SIZE         32
+#define _FP_W_TYPE              unsigned long
+#define _FP_WS_TYPE             signed long
+#define _FP_I_TYPE              long
+
+#define _FP_MUL_MEAT_S(R,X,Y)                           \
+  _FP_MUL_MEAT_1_wide(_FP_WFRACBITS_S,R,X,Y,umul_ppmm)
+#define _FP_MUL_MEAT_D(R,X,Y)                           \
+  _FP_MUL_MEAT_2_wide(_FP_WFRACBITS_D,R,X,Y,umul_ppmm)
+#define _FP_MUL_MEAT_Q(R,X,Y)                           \
+  _FP_MUL_MEAT_4_wide(_FP_WFRACBITS_Q,R,X,Y,umul_ppmm)
+
+#define _FP_DIV_MEAT_S(R,X,Y)   _FP_DIV_MEAT_1_udiv_norm(S,R,X,Y)
+#define _FP_DIV_MEAT_D(R,X,Y)   _FP_DIV_MEAT_2_udiv(D,R,X,Y)
+#define _FP_DIV_MEAT_Q(R,X,Y)   _FP_DIV_MEAT_4_udiv(Q,R,X,Y)
+
+#define _FP_NANFRAC_S           _FP_QNANBIT_S
+#define _FP_NANFRAC_D           _FP_QNANBIT_D, 0
+#define _FP_NANFRAC_Q           _FP_QNANBIT_Q, 0, 0, 0
+
+#else
+
+#define _FP_W_TYPE_SIZE         64
+#define _FP_W_TYPE              unsigned long long
+#define _FP_WS_TYPE             signed long long
+#define _FP_I_TYPE              long long
+
+#define _FP_MUL_MEAT_S(R,X,Y)                                   \
+  _FP_MUL_MEAT_1_imm(_FP_WFRACBITS_S,R,X,Y)
+#define _FP_MUL_MEAT_D(R,X,Y)                                   \
+  _FP_MUL_MEAT_1_wide(_FP_WFRACBITS_D,R,X,Y,umul_ppmm)
+#define _FP_MUL_MEAT_Q(R,X,Y)                                   \
+  _FP_MUL_MEAT_2_wide_3mul(_FP_WFRACBITS_Q,R,X,Y,umul_ppmm)
+
+#define _FP_DIV_MEAT_S(R,X,Y)   _FP_DIV_MEAT_1_imm(S,R,X,Y,_FP_DIV_HELP_imm)
+#define _FP_DIV_MEAT_D(R,X,Y)   _FP_DIV_MEAT_1_udiv_norm(D,R,X,Y)
+#define _FP_DIV_MEAT_Q(R,X,Y)   _FP_DIV_MEAT_2_udiv(Q,R,X,Y)
+
+#define _FP_NANFRAC_H		_FP_QNANBIT_H
+#define _FP_NANFRAC_S           _FP_QNANBIT_S
+#define _FP_NANFRAC_D           _FP_QNANBIT_D
+#define _FP_NANFRAC_Q           _FP_QNANBIT_Q, 0
+#endif /* !__ARC64_ARC32__ */
+
+#ifdef __ARC64_ARCH64__
+typedef int TItype __attribute__ ((mode (TI)));
+typedef unsigned int UTItype __attribute__ ((mode (TI)));
+#define TI_BITS (__CHAR_BIT__ * (int)sizeof(TItype))
+#endif
+
+/* The type of the result of a floating point comparison.  This must
+   match __libgcc_cmp_return__ in GCC for the target.  */
+typedef int __gcc_CMPtype __attribute__ ((mode (__libgcc_cmp_return__)));
+#define CMPtype __gcc_CMPtype
+
+#define _FP_NANSIGN_H           0
+#define _FP_NANSIGN_S           0
+#define _FP_NANSIGN_D           0
+#define _FP_NANSIGN_Q           0
+
+#define _FP_KEEPNANFRACP 0
+#define _FP_QNANNEGATEDP 0
+
+#define _FP_CHOOSENAN(fs, wc, R, X, Y, OP)      \
+  do {                                          \
+    R##_s = _FP_NANSIGN_##fs;                   \
+    _FP_FRAC_SET_##wc(R,_FP_NANFRAC_##fs);      \
+    R##_c = FP_CLS_NAN;                         \
+  } while (0)
+
+/* Not checked.  */
+#define _FP_TININESS_AFTER_ROUNDING 0
+
+#define __LITTLE_ENDIAN 1234
+#define __BIG_ENDIAN    4321
+
+# define __BYTE_ORDER __LITTLE_ENDIAN
+
+/* Define ALIASNAME as a strong alias for NAME.  */
+# define strong_alias(name, aliasname) _strong_alias(name, aliasname)
+# define _strong_alias(name, aliasname) \
+  extern __typeof (name) aliasname __attribute__ ((alias (#name)));
diff --git a/libgcc/config/arc64/t-arc64 b/libgcc/config/arc64/t-arc64
new file mode 100644
index 0000000000000..aeb982ec088b2
--- /dev/null
+++ b/libgcc/config/arc64/t-arc64
@@ -0,0 +1,21 @@
+# GCC Makefile fragment for Synopsys DesignWare ARC
+
+# Copyright (C) 2019 Free Software Foundation, Inc.
+
+# This file is part of GCC.
+
+# GCC is free software; you can redistribute it and/or modify it under the
+# terms of the GNU General Public License as published by the Free Software
+# Foundation; either version 3, or (at your option) any later version.
+
+# GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+# details.
+
+# You should have received a copy of the GNU General Public License along
+# with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+LIB2ADD += $(srcdir)/config/arc64/lib2funcs.c
+
diff --git a/libgcc/config/arc64/t-softfp b/libgcc/config/arc64/t-softfp
new file mode 100644
index 0000000000000..4110a0b22a432
--- /dev/null
+++ b/libgcc/config/arc64/t-softfp
@@ -0,0 +1,7 @@
+include $(srcdir)/config/arc64/t-softfp32
+
+ARC_ARCH32:=$(findstring __ARC64_ARCH32__,$(shell $(gcc_compile_bare)  $(MULTILIB_CFLAGS) -dM -E - </dev/null))
+
+ifndef ARC_ARCH32
+softfp_int_modes += ti
+endif
diff --git a/libgcc/config/arc64/t-softfp32 b/libgcc/config/arc64/t-softfp32
new file mode 100644
index 0000000000000..b9cf19880c4ff
--- /dev/null
+++ b/libgcc/config/arc64/t-softfp32
@@ -0,0 +1,8 @@
+softfp_float_modes := sf df
+softfp_int_modes := si di
+softfp_extensions := sfdf
+softfp_truncations := dfsf
+softfp_exclude_libgcc2 := n
+#softfp_extras := fixhfti fixunshfti floattihf floatuntihf
+
+TARGET_LIBGCC2_CFLAGS += -Wno-missing-prototypes