();
+ return machine;
+}
+
+static tree
+arc64_builtin_decl (unsigned id, bool initialize_p ATTRIBUTE_UNUSED)
+{
+ if (id < ARC64_BUILTIN_COUNT)
+ return arc_bdesc[id].fndecl;
+
+ return error_mark_node;
+}
+
+/* Transform UP into lowercase and write the result to LO.
+ You must provide enough space for LO. Return LO. */
+
+static char*
+arc64_tolower (char *lo, const char *up)
+{
+ char *lo0 = lo;
+
+ for (; *up; up++, lo++)
+ *lo = TOLOWER (*up);
+
+ *lo = '\0';
+
+ return lo0;
+}
+
+/* Helper for adding the builtins. */
+static void
+arc64_init_builtins (void)
+{
+ tree void_ftype_usint_usint
+ = build_function_type_list (void_type_node, unsigned_type_node,
+ unsigned_type_node, NULL_TREE);
+ tree usint_ftype_usint
+ = build_function_type_list (long_unsigned_type_node,
+ unsigned_type_node, NULL_TREE);
+ tree void_ftype_void
+ = build_function_type_list (void_type_node, NULL_TREE);
+ tree void_ftype_usint
+ = build_function_type_list (void_type_node, unsigned_type_node,
+ NULL_TREE);
+ tree long_ftype_long
+ = build_function_type_list (long_long_integer_type_node,
+ long_long_integer_type_node, NULL_TREE);
+
+ tree void_ftype_long_long
+ = build_function_type_list (void_type_node, long_long_integer_type_node,
+ long_long_integer_type_node, NULL_TREE);
+
+ /* Add the builtins. */
+#define DEF_BUILTIN(NAME, N_ARGS, TYPE, ICODE, MASK) \
+ { \
+ int id = ARC64_BUILTIN_ ## NAME; \
+ const char *Name = "__builtin_arc_" #NAME; \
+ char *name = (char*) alloca (1 + strlen (Name)); \
+ \
+ gcc_assert (id < ARC64_BUILTIN_COUNT); \
+ if (MASK) \
+ arc_bdesc[id].fndecl \
+ = add_builtin_function (arc64_tolower(name, Name), TYPE, id, \
+ BUILT_IN_MD, NULL, NULL_TREE); \
+ }
+#include "builtins.def"
+#undef DEF_BUILTIN
+}
+
+/* Helper arc_expand_builtin, generates a pattern for the given icode
+ and arguments. */
+
+static rtx_insn *
+apply_GEN_FCN (enum insn_code icode, rtx *arg)
+{
+ switch (insn_data[icode].n_generator_args)
+ {
+ case 0:
+ return GEN_FCN (icode) ();
+ case 1:
+ return GEN_FCN (icode) (arg[0]);
+ case 2:
+ return GEN_FCN (icode) (arg[0], arg[1]);
+ case 3:
+ return GEN_FCN (icode) (arg[0], arg[1], arg[2]);
+ case 4:
+ return GEN_FCN (icode) (arg[0], arg[1], arg[2], arg[3]);
+ case 5:
+ return GEN_FCN (icode) (arg[0], arg[1], arg[2], arg[3], arg[4]);
+ default:
+ gcc_unreachable ();
+ }
+}
+
+/* Expand an expression EXP that calls a built-in function,
+ with result going to TARGET if that's convenient
+ (and in mode MODE if that's convenient).
+ SUBTARGET may be used as the target for computing one of EXP's operands.
+ IGNORE is nonzero if the value is to be ignored. */
+
+static rtx
+arc64_expand_builtin (tree exp,
+ rtx target,
+ rtx subtarget ATTRIBUTE_UNUSED,
+ machine_mode mode ATTRIBUTE_UNUSED,
+ int ignore ATTRIBUTE_UNUSED)
+{
+ tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+ unsigned int id = DECL_FUNCTION_CODE (fndecl);
+ const struct arc64_builtin_description *d = &arc_bdesc[id];
+ int i, j, n_args = call_expr_nargs (exp);
+ rtx pat = NULL_RTX;
+ rtx xop[5];
+ enum insn_code icode = d->icode;
+ machine_mode tmode = insn_data[icode].operand[0].mode;
+ int nonvoid;
+ tree arg0;
+ rtx op0;
+
+ if (id >= ARC64_BUILTIN_COUNT)
+ internal_error ("bad builtin fcode");
+
+ /* 1st part: Expand special builtins. */
+ switch (id)
+ {
+ case ARC64_BUILTIN_NOP:
+ emit_insn (gen_nopv ());
+ return NULL_RTX;
+
+ case ARC64_BUILTIN_BRK:
+ gcc_assert (icode != 0);
+ emit_insn (GEN_FCN (icode) (const1_rtx));
+ return NULL_RTX;
+
+ case ARC64_BUILTIN_TRAP_S:
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ fold (arg0);
+ op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
+
+ gcc_assert (icode != 0);
+ emit_insn (GEN_FCN (icode) (op0));
+ return NULL_RTX;
+ default:
+ break;
+ }
+
+ /* 2nd part: Expand regular builtins. */
+ if (icode == 0)
+ internal_error ("bad builtin fcode");
+
+ nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
+ j = 0;
+
+ if (nonvoid)
+ {
+ if (target == NULL_RTX
+ || GET_MODE (target) != tmode
+ || !insn_data[icode].operand[0].predicate (target, tmode))
+ {
+ target = gen_reg_rtx (tmode);
+ }
+ xop[j++] = target;
+ }
+
+ gcc_assert (n_args <= 4);
+ for (i = 0; i < n_args; i++, j++)
+ {
+ tree arg = CALL_EXPR_ARG (exp, i);
+ machine_mode mode = insn_data[icode].operand[j].mode;
+ rtx op = expand_expr (arg, NULL_RTX, mode, EXPAND_NORMAL);
+ machine_mode opmode = GET_MODE (op);
+
+ if (CONST_INT_P (op))
+ opmode = mode;
+
+ if ((opmode == SImode) && (mode == HImode))
+ {
+ opmode = HImode;
+ op = gen_lowpart (HImode, op);
+ }
+
+ /* In case the insn wants input operands in modes different from
+ the result, abort. */
+ gcc_assert (opmode == mode || opmode == VOIDmode);
+
+ if (!insn_data[icode].operand[i + nonvoid].predicate (op, mode))
+ op = copy_to_mode_reg (mode, op);
+
+ xop[j] = op;
+ }
+
+ pat = apply_GEN_FCN (icode, xop);
+ if (pat == NULL_RTX)
+ return NULL_RTX;
+
+ emit_insn (pat);
+
+ if (nonvoid)
+ return target;
+ else
+ return const0_rtx;
+}
+
+/* A callback for the hw-doloop pass. Called when a loop we have discovered
+ turns out not to be optimizable; we have to split the loop_end pattern into
+ a subtract and a test. */
+
+static void
+hwloop_fail (hwloop_info loop)
+{
+ rtx test;
+ rtx insn;
+
+ if (TARGET_64BIT)
+ emit_insn_before (gen_adddi_cmp0 (loop->iter_reg,
+ loop->iter_reg,
+ constm1_rtx),
+ loop->loop_end);
+ else
+ emit_insn_before (gen_addsi_cmp0 (loop->iter_reg,
+ loop->iter_reg,
+ constm1_rtx),
+ loop->loop_end);
+
+ test = gen_rtx_NE (VOIDmode, gen_rtx_REG (CC_ZNmode, CC_REGNUM), const0_rtx);
+ test = gen_rtx_IF_THEN_ELSE (VOIDmode, test,
+ gen_rtx_LABEL_REF (Pmode, loop->start_label),
+ pc_rtx);
+ insn = emit_jump_insn_before (gen_rtx_SET (pc_rtx, test),
+ loop->loop_end);
+
+ JUMP_LABEL (insn) = loop->start_label;
+ LABEL_NUSES (loop->start_label)++;
+ delete_insn (loop->loop_end);
+}
+
+/* Optimize LOOP. We just are checking that the loop isn't too long,
+ returns true if so. Return true if successful, false if the loop
+ should be marked bad. If it returns false, the FAIL function is
+ called. */
+
+static bool
+hwloop_optimize (hwloop_info loop)
+{
+ unsigned int length;
+
+ /* Call shorten_branches to calculate the insn lengths. */
+ shorten_branches (get_insns());
+
+ if (!INSN_ADDRESSES_SET_P ())
+ {
+ fprintf (dump_file, ";; loop %d has an unknown length\n", loop->loop_no);
+ return false;
+ }
+
+ length = INSN_ADDRESSES (INSN_UID (loop->loop_end))
+ - INSN_ADDRESSES (INSN_UID (loop->start_label));
+ loop->length = length;
+ if (dump_file)
+ fprintf (dump_file, ";; loop %d with length %d\n", loop->loop_no,
+ loop->length);
+ if (loop->length > MAX_LOOP_LENGTH
+ || loop->length < MIN_LOOP_LENGTH)
+ {
+ if (dump_file)
+ fprintf (dump_file, ";; loop %d is too long\n", loop->loop_no);
+ return false;
+ }
+ if (loop->length == 0)
+ {
+ if (dump_file)
+ fprintf (dump_file, ";; loop %d is empty\n", loop->loop_no);
+ return false;
+ }
+
+ return true;
+}
+
+/* A callback for the hw-doloop pass. This function examines INSN; if
+ it is a loop_end pattern we recognize, return the reg rtx for the
+ loop counter. Otherwise, return NULL_RTX. */
+
+static rtx
+hwloop_pattern_reg (rtx_insn *insn)
+{
+ rtx reg;
+
+ if (!JUMP_P (insn)
+ || (TARGET_64BIT && (recog_memoized (insn) != CODE_FOR_dbnzdi))
+ || (!TARGET_64BIT && (recog_memoized (insn) != CODE_FOR_dbnzsi)))
+ return NULL_RTX;
+
+ reg = SET_DEST (XVECEXP (PATTERN (insn), 0, 1));
+ if (!REG_P (reg))
+ return NULL_RTX;
+ return reg;
+}
+
+static struct hw_doloop_hooks arc64_doloop_hooks =
+{
+ hwloop_pattern_reg,
+ hwloop_optimize,
+ hwloop_fail
+};
+
+/* Machine specific reorg step. */
+static void
+arc64_reorg (void)
+{
+ compute_bb_for_insn ();
+ df_analyze ();
+ reorg_loops (true, &arc64_doloop_hooks);
+
+ /* Search MAC instructions and remove the super-flu move from
+ accumulator to a register. Hence, we try to repair what we do in
+ madd expands or in mac* splits. */
+ for (rtx_insn *insn = get_insns (); insn; insn = next_real_insn (insn))
+ {
+ rtx op0, op1, op2, tmp;
+ enum insn_code icode = CODE_FOR_nothing;
+ machine_mode mode = E_VOIDmode;
+
+ if (!INSN_P (insn))
+ continue;
+
+ /* 1st find the MAC instruction with null (accumulator)
+ output. */
+ switch (INSN_CODE (insn))
+ {
+ case CODE_FOR_umachi0:
+ icode = CODE_FOR_umachi;
+ mode = E_SImode;
+ break;
+
+ case CODE_FOR_machi0:
+ icode = CODE_FOR_machi;
+ mode = E_SImode;
+ break;
+
+ case CODE_FOR_umacd0:
+ icode = CODE_FOR_umacd;
+ mode = E_DImode;
+ break;
+
+ case CODE_FOR_macd0:
+ icode = CODE_FOR_macd;
+ mode = E_DImode;
+ break;
+
+ case CODE_FOR_macsi0:
+ icode = CODE_FOR_macsi;
+ mode = E_SImode;
+ break;
+
+ case CODE_FOR_dmach0:
+ icode = CODE_FOR_dmach;
+ mode = E_HImode;
+ break;
+
+ default:
+ continue;
+ }
+
+ gcc_assert (REGNO (SET_DEST (PATTERN (insn))) == R58_REGNUM);
+ rtx_insn *nxt = next_real_insn (insn);
+
+ /* 2nd Check if it is a move instruction. */
+ tmp = PATTERN (nxt);
+ if (GET_CODE (tmp) != SET
+ || (GET_CODE (SET_SRC (tmp)) != REG)
+ || (GET_CODE (SET_DEST (tmp)) != REG))
+ continue;
+
+ op0 = SET_DEST (tmp);
+ op1 = SET_SRC (tmp);
+ if (REGNO (op1) != R58_REGNUM)
+ continue;
+
+ /* Make the new MAC instruction. */
+ switch (INSN_CODE (insn))
+ {
+ case CODE_FOR_umachi0:
+ case CODE_FOR_umacd0:
+ case CODE_FOR_machi0:
+ case CODE_FOR_macd0:
+ if (!TARGET_64BIT && ((REGNO (op0) & 1) != 0))
+ continue;
+ tmp = SET_SRC (PATTERN (insn));
+ op1 = XEXP (XEXP (XEXP (tmp, 0), 0), 0);
+ op2 = XEXP (XEXP (XEXP (tmp, 0), 1), 0);
+ break;
+
+ case CODE_FOR_dmach0:
+ case CODE_FOR_macsi0:
+ tmp = SET_SRC (PATTERN (insn));
+ op1 = XEXP (XEXP (tmp, 0), 0);
+ op2 = XEXP (XEXP (tmp, 0), 1);
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ emit_insn_before (GEN_FCN (icode) (op0, op1, op2,
+ gen_rtx_REG (mode, R58_REGNUM)),
+ insn);
+
+ /* Remove the old MAC and MOV instruction. */
+ set_insn_deleted (insn);
+ set_insn_deleted (nxt);
+ }
+}
+
+/* Expand a compare and swap pattern. */
+
+static void
+emit_unlikely_jump (rtx insn)
+{
+ rtx_insn *jump = emit_jump_insn (insn);
+ add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
+}
+
+/* Expand code to perform a 8 or 16-bit compare and swap by doing
+ 32-bit compare and swap on the word containing the byte or
+ half-word. The difference between a weak and a strong CAS is that
+ the weak version may simply fail. The strong version relies on two
+ loops, one checks if the SCOND op is succsfully or not, the other
+ checks if the 32 bit accessed location which contains the 8 or 16
+ bit datum is not changed by other thread. The first loop is
+ implemented by the atomic_compare_and_swapsdi_1 pattern. The second
+ loops is implemented by this routine. */
+
+static void
+arc_expand_compare_and_swap_qh (rtx bool_result, rtx result, rtx mem,
+ rtx oldval, rtx newval, rtx weak,
+ rtx mod_s, rtx mod_f)
+{
+ rtx addr1 = force_reg (Pmode, XEXP (mem, 0));
+ rtx addr = gen_reg_rtx (Pmode);
+ rtx off = gen_reg_rtx (SImode);
+ rtx oldv = gen_reg_rtx (SImode);
+ rtx newv = gen_reg_rtx (SImode);
+ rtx oldvalue = gen_reg_rtx (SImode);
+ rtx newvalue = gen_reg_rtx (SImode);
+ rtx res = gen_reg_rtx (SImode);
+ rtx resv = gen_reg_rtx (SImode);
+ rtx memsi, val, mask, end_label, loop_label, cc, x;
+ machine_mode mode;
+ bool is_weak = (weak != const0_rtx);
+
+ /* Truncate the address. */
+ emit_insn (gen_rtx_SET (addr,
+ gen_rtx_AND (Pmode, addr1, GEN_INT (-4))));
+
+ /* Compute the datum offset. */
+
+ emit_insn (gen_rtx_SET (off, gen_rtx_AND (SImode,
+ gen_lowpart(SImode, addr1),
+ GEN_INT (3))));
+
+ /* Normal read from truncated address. */
+ memsi = gen_rtx_MEM (SImode, addr);
+ set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER);
+ MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem);
+
+ val = copy_to_reg (memsi);
+
+ /* Convert the offset in bits. */
+ emit_insn (gen_rtx_SET (off,
+ gen_rtx_ASHIFT (SImode, off, GEN_INT (3))));
+
+ /* Get the proper mask. */
+ if (GET_MODE (mem) == QImode)
+ mask = force_reg (SImode, GEN_INT (0xff));
+ else
+ mask = force_reg (SImode, GEN_INT (0xffff));
+
+ emit_insn (gen_rtx_SET (mask,
+ gen_rtx_ASHIFT (SImode, mask, off)));
+
+ /* Prepare the old and new values. */
+ emit_insn (gen_rtx_SET (val,
+ gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
+ val)));
+
+ oldval = gen_lowpart (SImode, oldval);
+ emit_insn (gen_rtx_SET (oldv,
+ gen_rtx_ASHIFT (SImode, oldval, off)));
+
+ newval = gen_lowpart_common (SImode, newval);
+ emit_insn (gen_rtx_SET (newv,
+ gen_rtx_ASHIFT (SImode, newval, off)));
+
+ emit_insn (gen_rtx_SET (oldv,
+ gen_rtx_AND (SImode, oldv, mask)));
+
+ emit_insn (gen_rtx_SET (newv,
+ gen_rtx_AND (SImode, newv, mask)));
+
+ if (!is_weak)
+ {
+ end_label = gen_label_rtx ();
+ loop_label = gen_label_rtx ();
+ emit_label (loop_label);
+ }
+
+ /* Make the old and new values. */
+ emit_insn (gen_rtx_SET (oldvalue,
+ gen_rtx_IOR (SImode, oldv, val)));
+
+ emit_insn (gen_rtx_SET (newvalue,
+ gen_rtx_IOR (SImode, newv, val)));
+
+ /* Try an 32bit atomic compare and swap. It clobbers the CC
+ register. */
+ if (GET_MODE (mem) == SImode)
+ emit_insn (gen_atomic_compare_and_swapsi_1 (res, memsi, oldvalue, newvalue,
+ weak, mod_s, mod_f));
+ else /* DImode */
+ emit_insn (gen_atomic_compare_and_swapdi_1 (res, memsi, oldvalue, newvalue,
+ weak, mod_s, mod_f));
+
+ /* Regardless of the weakness of the operation, a proper boolean
+ result needs to be provided. */
+ x = gen_rtx_REG (CC_Zmode, CC_REGNUM);
+ x = gen_rtx_EQ (SImode, x, const0_rtx);
+ emit_insn (gen_rtx_SET (bool_result, x));
+
+ if (!is_weak)
+ {
+ /* Check the results: if the atomic op is successfully the goto
+ to end label. */
+ x = gen_rtx_REG (CC_Zmode, CC_REGNUM);
+ x = gen_rtx_EQ (VOIDmode, x, const0_rtx);
+ x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
+ gen_rtx_LABEL_REF (Pmode, end_label), pc_rtx);
+ emit_jump_insn (gen_rtx_SET (pc_rtx, x));
+
+ /* Wait for the right moment when the accessed 32-bit location
+ is stable. */
+ emit_insn (gen_rtx_SET (resv,
+ gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
+ res)));
+ mode = SELECT_CC_MODE (NE, resv, val);
+ cc = gen_rtx_REG (mode, CC_REGNUM);
+ emit_insn (gen_rtx_SET (cc, gen_rtx_COMPARE (mode, resv, val)));
+
+ /* Set the new value of the 32 bit location, proper masked. */
+ emit_insn (gen_rtx_SET (val, resv));
+
+ /* Try again if location is unstable. Fall through if only
+ scond op failed. */
+ x = gen_rtx_NE (VOIDmode, cc, const0_rtx);
+ x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
+ gen_rtx_LABEL_REF (Pmode, loop_label), pc_rtx);
+ emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
+
+ emit_label (end_label);
+ }
+
+ /* End: proper return the result for the given mode. */
+ emit_insn (gen_rtx_SET (res,
+ gen_rtx_AND (SImode, res, mask)));
+
+ emit_insn (gen_rtx_SET (res,
+ gen_rtx_LSHIFTRT (SImode, res, off)));
+
+ emit_move_insn (result, gen_lowpart (GET_MODE (result), res));
+}
+
+
+/* This hook may conditionally modify five variables: fixed_regs,
+ call_used_regs, global_regs, reg_names and reg_class_contents. */
+
+static void
+arc64_conditional_register_usage (void)
+{
+ int regno;
+
+ /* When having floating point, we enable the registers to be used by compiler
+ and set the appropriate call used registers (i.e., f0-f15). */
+ if (ARC64_HAS_FP_BASE)
+ {
+ for (regno = F0_REGNUM; regno <= F31_REGNUM; regno++)
+ {
+ fixed_regs[regno] = 0;
+ call_used_regs[regno] = (regno < F16_REGNUM) ? 1 : 0;
+ }
+ }
+}
+
+/* Implement TARGET_LIBGCC_FLOATING_POINT_MODE_SUPPORTED_P - return TRUE
+ if MODE is HFmode, and punt to the generic implementation otherwise. */
+
+static bool
+arc64_libgcc_floating_mode_supported_p (scalar_float_mode mode)
+{
+ return (mode == HFmode
+ ? ARC64_HAS_FPUH
+ : default_libgcc_floating_mode_supported_p (mode));
+}
+
+/* Implement TARGET_SCALAR_MODE_SUPPORTED_P - return TRUE
+ if MODE is HFmode, and punt to the generic implementation otherwise. */
+
+static bool
+arc64_scalar_mode_supported_p (scalar_mode mode)
+{
+ return (mode == HFmode
+ ? ARC64_HAS_FPUH
+ : default_scalar_mode_supported_p (mode));
+}
+
+/* Implements target hook vector_mode_supported_p. */
+
+static bool
+arc64_vector_mode_supported_p (machine_mode mode)
+{
+ switch (mode)
+ {
+ /* 32-bit fp SIMD vectors. */
+ case E_V2HFmode:
+ return ARC64_VFP_32;
+ /* 64-bit fp SIMD vectors. */
+ case E_V4HFmode:
+ case E_V2SFmode:
+ return ARC64_VFP_64;
+ /* 128-bit fp SIMD vectors. */
+ case E_V8HFmode:
+ case E_V4SFmode:
+ case E_V2DFmode:
+ return ARC64_VFP_128;
+
+ /* 32-bit SIMD vectors. */
+ case E_V2HImode:
+ /* 64-bit SIMD vectors. */
+ case E_V4HImode:
+ case E_V2SImode:
+ return TARGET_SIMD;
+
+ default:
+ return false;
+ }
+}
+
+/* Implements target hook TARGET_VECTORIZE_PREFERRED_SIMD_MODE. */
+
+static machine_mode
+arc64_preferred_simd_mode (scalar_mode mode)
+{
+ switch (mode)
+ {
+ case E_HFmode:
+ if (ARC64_VFP_128)
+ return V8HFmode;
+ if (ARC64_VFP_64)
+ return V4HFmode;
+ if (ARC64_VFP_32)
+ return V2HFmode;
+ return word_mode;
+
+ case E_SFmode:
+ if (ARC64_VFP_128)
+ return V4SFmode;
+ if (ARC64_VFP_64)
+ return V2SFmode;
+ return word_mode;
+
+ case E_DFmode:
+ if (ARC64_VFP_128)
+ return V2DFmode;
+ return word_mode;
+
+ case E_HImode:
+ return TARGET_SIMD ? V4HImode : word_mode;
+ case E_SImode:
+ return TARGET_SIMD ? V2SImode : word_mode;
+
+ default:
+ return word_mode;
+ }
+}
+
+/* Implements target hook
+ TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES. */
+
+static unsigned int
+arc64_autovectorize_vector_modes (vector_modes *modes, bool)
+{
+ if (ARC64_VFP_128)
+ {
+ modes->quick_push (V8HFmode);
+ modes->quick_push (V4SFmode);
+ modes->quick_push (V2DFmode);
+ }
+ else if (ARC64_VFP_64)
+ {
+ modes->quick_push (V4HFmode);
+ modes->quick_push (V2SFmode);
+ }
+ else if (ARC64_VFP_32)
+ modes->quick_push (V2HFmode);
+
+ if (TARGET_SIMD)
+ {
+ modes->quick_push (V4HImode);
+ modes->quick_push (V2SImode);
+ }
+ return 0;
+}
+
+/* Vectorization costs. */
+static int
+arc64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
+ tree vectype,
+ int misalign ATTRIBUTE_UNUSED)
+{
+ unsigned elements;
+
+ switch (type_of_cost)
+ {
+ case scalar_stmt:
+ return 1;
+
+ case scalar_load:
+ return 1;
+
+ case scalar_store:
+ return 1;
+
+ case vector_stmt:
+ return 1; /* fp operations are more efficient than int. */
+
+ case vector_load:
+ return 1;
+
+ case vector_store:
+ return 1;
+
+ case vec_to_scalar:
+ return 1; /* We have extract instructions. */
+
+ case scalar_to_vec:
+ return 1; /* fp is more efficient than int. */
+
+ case unaligned_load:
+ case vector_gather_load:
+ return 1; /* Maybe I need to reflect unaligned flag here. */
+
+ case unaligned_store:
+ case vector_scatter_store:
+ return 1; /* Likewise. */
+
+ case cond_branch_taken:
+ return 3; /* A jump is always expensive. */
+
+ case cond_branch_not_taken:
+ return 1;
+
+ case vec_perm:
+ return 1; /* We don't really have vec_perm. */
+
+ case vec_promote_demote:
+ return 1;
+
+ case vec_construct:
+ elements = estimated_poly_value (TYPE_VECTOR_SUBPARTS (vectype));
+ return elements / 2;
+
+ default:
+ gcc_unreachable ();
+ }
+}
+
+/* Return a new RTX holding the result of moving POINTER forward by
+ AMOUNT bytes. */
+
+static rtx
+arc64_move_pointer (rtx pointer, poly_int64 amount)
+{
+ rtx next = plus_constant (Pmode, XEXP (pointer, 0), amount);
+
+ return adjust_automodify_address (pointer, GET_MODE (pointer),
+ next, amount);
+}
+
+/* Return a new RTX holding the result of moving POINTER forward by the
+ size of the mode it points to. */
+
+static rtx
+arc64_progress_pointer (rtx pointer)
+{
+ return arc64_move_pointer (pointer, GET_MODE_SIZE (GET_MODE (pointer)));
+}
+
+/* Copy one MODE sized block from SRC to DST, then progress SRC and DST by
+ MODE bytes. */
+
+static void
+arc64_copy_one_block_and_progress_pointers (rtx *src, rtx *dst,
+ machine_mode mode)
+{
+ rtx reg = gen_reg_rtx (mode);
+
+ /* "Cast" the pointers to the correct mode. */
+ *src = adjust_address (*src, mode, 0);
+ *dst = adjust_address (*dst, mode, 0);
+ /* Emit the memcpy. */
+ emit_move_insn (reg, *src);
+ emit_move_insn (*dst, reg);
+ /* Move the pointers forward. */
+ *src = arc64_progress_pointer (*src);
+ *dst = arc64_progress_pointer (*dst);
+}
+
+/* Moving f regs to r regs is not a very good idea. */
+static int
+arc64_register_move_cost (machine_mode,
+ reg_class_t from_class, reg_class_t to_class)
+{
+ if ((from_class == FP_REGS && to_class == GENERAL_REGS)
+ || (to_class == FP_REGS && from_class == GENERAL_REGS))
+ return 200;
+ return 2;
+}
+
+/* Check/emit vector duplicate instructions. */
+
+static bool
+arc64_simd_dup (struct e_vec_perm_d *d)
+{
+ machine_mode vmode = d->vmode;
+ HOST_WIDE_INT elt;
+ rtx t0, parallel, select;
+ rtx in0 = d->op0;
+ rtx out = d->target;
+
+ if (!TARGET_64BIT
+ || !d->one_vector_p
+ || vmode == E_V2HImode
+ || d->perm.encoding ().encoded_nelts () != 1
+ || !d->perm[0].is_constant (&elt)
+ /* elt is zero, then the vec_dup pattern does as good as we do here. */
+ || elt == 0)
+ return false;
+
+ if (d->testing_p)
+ return true;
+
+ switch (vmode)
+ {
+ case E_V8HFmode:
+ case E_V4HFmode:
+ case E_V2HFmode:
+ case E_V2SFmode:
+ case E_V4SFmode:
+ if (elt != 0)
+ {
+ t0 = gen_reg_rtx (GET_MODE_INNER (vmode));
+ parallel = gen_rtx_PARALLEL (vmode, gen_rtvec (1, GEN_INT (elt)));
+ select = gen_rtx_VEC_SELECT (GET_MODE_INNER (vmode), in0, parallel);
+ emit_set_insn (t0, select);
+ emit_set_insn (out, gen_rtx_VEC_DUPLICATE (vmode, t0));
+ return true;
+ }
+
+ /* FALLTHRU */
+ case E_V2DFmode:
+ case E_V2SImode:
+ parallel = gen_rtx_PARALLEL (vmode, gen_rtvec (1, GEN_INT (elt)));
+ select = gen_rtx_VEC_SELECT (GET_MODE_INNER (vmode), in0, parallel);
+ emit_set_insn (out, gen_rtx_VEC_DUPLICATE (vmode, select));
+ return true;
+
+ case E_V4HImode:
+ if (elt == 0)
+ {
+ t0 = gen_reg_rtx (vmode);
+ emit_insn (gen_arc64_sel_lane2_0v4hi (t0, in0, in0));
+ emit_insn (gen_arc64_sel_lane2_0v4hi (out, t0, t0));
+ return true;
+ }
+ else if (elt == 1)
+ {
+ t0 = gen_reg_rtx (vmode);
+ emit_insn (gen_arc64_sel_lane3_1v4hi (t0, in0, in0));
+ emit_insn (gen_arc64_sel_lane2_0v4hi (out, t0, t0));
+ return true;
+ }
+ else if (elt == 2)
+ {
+ t0 = gen_reg_rtx (vmode);
+ emit_insn (gen_arc64_sel_lane2_0v4hi (t0, in0, in0));
+ emit_insn (gen_arc64_sel_lane3_1v4hi (out, t0, t0));
+ return true;
+ }
+ else if (elt == 3)
+ {
+ t0 = gen_reg_rtx (vmode);
+ emit_insn (gen_arc64_sel_lane3_1v4hi (t0, in0, in0));
+ emit_insn (gen_arc64_sel_lane3_1v4hi (out, t0, t0));
+ return true;
+ }
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ gcc_unreachable ();
+}
+
+/* Recognize VPACK instructions. */
+
+static bool
+arc64_simd_vpack (struct e_vec_perm_d *d)
+{
+ HOST_WIDE_INT odd;
+ poly_uint64 nelt = d->perm.length ();
+ rtx out, in0, in1;
+ machine_mode vmode = d->vmode;
+
+ if (FLOAT_MODE_P (vmode)
+ || !d->perm[0].is_constant (&odd)
+ || (odd != 0 && odd != 1)
+ || !d->perm.series_p (0, 1, odd, 2)
+ || !d->perm.series_p (2, 1, nelt + odd, 2))
+ return false;
+
+ switch (vmode)
+ {
+ case E_V2SImode:
+ case E_V4HImode:
+ if (!TARGET_64BIT)
+ return false;
+ break;
+
+ case E_V2HImode:
+ break;
+
+ default:
+ return false;
+ }
+
+ /* Success! */
+ if (d->testing_p)
+ return true;
+
+ in0 = d->op0;
+ in1 = d->op1;
+ out = d->target;
+ switch (vmode)
+ {
+ case E_V4HImode:
+ if (odd)
+ emit_insn (gen_arc64_sel_lane3_1v4hi (out, in0, in1));
+ else
+ emit_insn (gen_arc64_sel_lane2_0v4hi (out, in0, in1));
+ break;
+
+ case E_V2SImode:
+ if (odd)
+ emit_insn (gen_arc64_sel_lane1_v2si (out, in0, in1));
+ else
+ emit_insn (gen_arc64_sel_lane0_v2si (out, in0, in1));
+ break;
+
+ case E_V2HImode:
+ if (odd)
+ emit_insn (gen_arc64_sel_lane1_v2hi (out, in0, in1));
+ else
+ emit_insn (gen_arc64_sel_lane0_v2hi (out, in0, in1));
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+ return true;
+}
+
+/* Reverse vector, recognize swapl and vfexch instructions. */
+
+static bool
+arc64_simd_swapl (struct e_vec_perm_d *d)
+{
+ poly_uint64 nelt = d->perm.length ();
+ machine_mode vmode = d->vmode;
+ rtx t0, t1, t2, out, in0;
+ rtx src;
+ unsigned int unspec;
+
+ if (GET_MODE_UNIT_SIZE (vmode) > 4
+ || !TARGET_64BIT)
+ return false;
+
+ if (!d->one_vector_p)
+ return false;
+
+ if (!d->perm.series_p (0, 1, nelt - 1, -1))
+ return false;
+
+ /* Success! */
+ if (d->testing_p)
+ return true;
+
+ in0 = d->op0;
+ out = d->target;
+ t0 = d->target;
+ t1 = d->target;
+
+ switch (vmode)
+ {
+ case E_V4HImode:
+ t0 = gen_reg_rtx (vmode);
+ t1 = gen_reg_rtx (vmode);
+ t2 = gen_reg_rtx (vmode);
+ emit_insn (gen_arc64_swapl (t0, in0));
+ emit_insn (gen_arc64_swapv4hi (t1, in0));
+ emit_insn (gen_arc64_swapv4hi (t2, t0));
+ emit_insn (gen_arc64_swp_lane0_v4hi (out, t2, t1));
+ break;
+
+ case E_V2SImode:
+ emit_insn (gen_arc64_swaplv2si (out, in0));
+ break;
+
+ case E_V2HImode:
+ emit_insn (gen_arc64_swapv2hi (out, in0));
+ break;
+
+ case E_V8HFmode:
+ t1 = gen_reg_rtx (vmode);
+ /* Fall through. */
+ case E_V4SFmode:
+ t0 = gen_reg_rtx (vmode);
+ /* Fall through. */
+ case E_V2DFmode:
+ unspec = ARC64_UNSPEC_DEXCH;
+ src = gen_rtx_UNSPEC (vmode, gen_rtvec (1, in0), unspec);
+ emit_set_insn (t0, src);
+ if (vmode == E_V2DFmode)
+ return true;
+
+ unspec = ARC64_UNSPEC_SEXCH;
+ src = gen_rtx_UNSPEC (vmode, gen_rtvec (1, t0), unspec);
+ emit_set_insn (t1, src);
+ if (vmode == E_V4SFmode)
+ return true;
+
+ unspec = ARC64_UNSPEC_HEXCH;
+ src = gen_rtx_UNSPEC (vmode, gen_rtvec (1, t1), unspec);
+ emit_set_insn (out, src);
+ break;
+
+ case E_V4HFmode:
+ t1 = gen_reg_rtx (vmode);
+ /* Fall through. */
+ case E_V2SFmode:
+ unspec = ARC64_UNSPEC_SEXCH;
+ src = gen_rtx_UNSPEC (vmode, gen_rtvec (1, in0), unspec);
+ emit_set_insn (t1, src);
+ if (vmode == E_V2SFmode)
+ return true;
+ in0 = t1;
+ /* Fall through. */
+
+ case E_V2HFmode:
+ unspec = ARC64_UNSPEC_HEXCH;
+ src = gen_rtx_UNSPEC (vmode, gen_rtvec (1, in0), unspec);
+ emit_set_insn (out, src);
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+ return true;
+}
+
+/* Detect cases when we can use swap instruction. */
+
+static bool
+arc64_simd_swap (struct e_vec_perm_d *d)
+{
+ rtx t0, t1, t2, out, in0;
+ machine_mode vmode = d->vmode;
+
+ if (vmode != E_V4HImode
+ || !TARGET_64BIT)
+ return false;
+
+ if (!d->one_vector_p)
+ return false;
+
+ if (!d->perm.series_p (0, 2, 1, 2)
+ || !d->perm.series_p (1, 2, 0, 2))
+ return false;
+
+ /* Success! */
+ if (d->testing_p)
+ return true;
+
+ in0 = d->op0;
+ out = d->target;
+
+ t0 = gen_reg_rtx (vmode);
+ t1 = gen_reg_rtx (vmode);
+ t2 = gen_reg_rtx (vmode);
+ emit_insn (gen_arc64_swapl (t0, in0));
+ emit_insn (gen_arc64_swapv4hi (t1, in0));
+ emit_insn (gen_arc64_swapv4hi (t2, t0));
+ emit_insn (gen_arc64_swp_lane0_v4hi (out, t1, t2));
+ return true;
+}
+
+/* Detect cases when we can use vapck2wl for 4xVectors. */
+
+static bool
+arc64_simd_vpack2wl (struct e_vec_perm_d *d)
+{
+ machine_mode vmode = d->vmode;
+
+ if (vmode != E_V4HImode
+ || !TARGET_64BIT)
+ return false;
+
+ if (d->perm[0] != 0
+ || d->perm[1] != 1
+ || (d->perm[2] != 4 && d->perm[2] != 0)
+ || (d->perm[3] != 5 && d->perm[3] != 1))
+ return false;
+
+ /* Success! */
+ if (d->testing_p)
+ return true;
+
+ emit_insn (gen_arc64_swp_lane0_v4hi (d->target, d->op0, d->op1));
+ return true;
+}
+
+static bool
+arc64_simd_vpack2wm (struct e_vec_perm_d *d)
+{
+ machine_mode vmode = d->vmode;
+
+ if (vmode != E_V4HImode
+ || !TARGET_64BIT)
+ return false;
+
+ if (d->perm[0] != 2
+ || d->perm[1] != 3
+ || (d->perm[2] != 6 && d->perm[2] != 2)
+ || (d->perm[3] != 7 && d->perm[3] != 3))
+ return false;
+
+ /* Success! */
+ if (d->testing_p)
+ return true;
+
+ emit_insn (gen_arc64_swp_lane1_v4hi (d->target, d->op0, d->op1));
+ return true;
+}
+
+/* Recognize patterns for {H,S,D}EXCH insns, which reverse elements:
+ VFHEXCH (v2hf): h0 h1
+ VFHEXCH (v4hf): h2 h3 h0 h1
+ VFHEXCH (v8hf): h6 h7 h4 h5 h2 h3 h0 h1
+
+ VFSEXCH (v4hf): h1h0 h3h2
+ VFSEXCH (v8hf): h5h4 h7h6 h1h0 h3h2
+
+ VFDEXCH (v8hf): h3h2h1h0 h7h6h5h4
+
+ VFSEXCH (v2sf): s0 s1
+ VFSEXCH (v4sf): s2 s3 s0 s1
+
+ VFDEXCH (v4sf): s1s0 s3s2
+
+ VFDEXCH (v2df): d0 d1
+ */
+
+static bool
+arc64_simd_exch (struct e_vec_perm_d *d)
+{
+ HOST_WIDE_INT diff;
+ unsigned int i, size, unspec;
+ machine_mode vmode = d->vmode;
+
+ if (!ARC64_HAS_FP_BASE
+ || !FLOAT_MODE_P (vmode)
+ || !d->one_vector_p
+ || !d->perm[0].is_constant (&diff)
+ || !diff)
+ return false;
+
+ size = diff * GET_MODE_UNIT_BITSIZE (vmode);
+ if (size == 64)
+ {
+ if (!ARC64_HAS_FPUD)
+ return false;
+ unspec = ARC64_UNSPEC_DEXCH;
+ }
+ else if (size == 32)
+ {
+ unspec = ARC64_UNSPEC_SEXCH;
+ }
+ else if (size == 16)
+ {
+ unspec = ARC64_UNSPEC_HEXCH;
+ }
+ else
+ return false;
+
+ switch (diff)
+ {
+ case 1:
+ for (i = 0; i < 2; i++)
+ if (!d->perm.series_p (i, 2, diff - i, 2))
+ return false;
+ break;
+
+ case 2:
+ case 4:
+ for (i = 0; i < diff; i++)
+ if (!d->perm.series_p (i, diff, diff + i, -diff))
+ return false;
+ break;
+
+ default:
+ return false;
+ }
+
+ /* Success! */
+ if (d->testing_p)
+ return true;
+
+ rtx src = gen_rtx_UNSPEC (vmode, gen_rtvec (1, d->op0), unspec);
+ emit_set_insn (d->target, src);
+ return true;
+}
+
+/* Recognize FVUNPACKL/FV
UNPACKM instructions.
+
+ VFHUNPKL (v2hf): Ch0 Bh0
+ VFHUNPKL (v4hf): Ch2 Ch0 Bh2 Bh0
+ VFHUNPKL (v8hf): Ch6 Ch4 Ch2 Ch0 Bh6 Bh4 Bh2 Bh0
+
+ VFSUNPKL (v4hf): Ch1Ch0 Bh1Bh0
+ VFSUNPKL (v8hf): Ch5Ch4 Ch1Ch0 Bh5Bh4 Bh1Bh0
+
+ VFDUNPKL (v8hf): Ch3Ch2Ch1Ch0 Bh3Bh2Bh1Bh0
+
+ VFSUNPKL (v2sf): Cs0 Bs0
+ VFSUNPKL (v4sf): Cs2 Cs0 Bs2 Bs0
+
+ VFDUNPKL (v4sf): Cs1Cs0 Bs1Bs0
+
+ VFDUNPKL (v2df): Cd0 Bd0
+
+ VFHUNPKM (v2hf): Ch1 Bh1
+ VFHUNPKM (v4hf): Ch3 Ch1 Bh3 Bh1
+ VFHUNPKM (v8hf): Ch7 Ch5 Ch3 Ch1 Bh7 Bh5 Bh3 Bh1
+
+ VFSUNPKM (v4hf): Ch3Ch2 Bh3Bh2
+ VFSUNPKM (v8hf): Ch7Ch6 Ch3Ch2 Bh7Bh6 Bh3Bh2
+
+ VFDUNPKM (v8hf): Ch7Ch6Ch5Ch4 Bh7Bh6Bh5Bh4
+
+ VFSUNPKM (v2sf): Cs1 Bs1
+ VFSUNPKM (v4sf): Cs3 Cs1 Bs3 Bs1
+
+ VFDUNPKM (v4sf): Cs3Cs2 Bs3Bs2
+
+ VFDUNPKM (v2df): Cd1 Bd1
+ */
+
+static bool
+arc64_simd_unpk (struct e_vec_perm_d *d)
+{
+ HOST_WIDE_INT odd, lo;
+ poly_uint64 nelt = d->perm.length ();
+ unsigned int i, j, size, unspec, diff = 0;
+ machine_mode vmode = d->vmode;
+
+ if (!ARC64_HAS_FP_BASE
+ || !FLOAT_MODE_P (vmode)
+ || !d->perm[0].is_constant (&odd)
+ || (odd == 3)
+ || (odd < 0 && odd > (HOST_WIDE_INT)(nelt >> 1)))
+ return false;
+
+ /* If ODD is set, then diff == odd. Thus, the below condition should
+ hold. */
+ lo = (odd == 0) ? 1 : odd;
+ for (i = 4; (i >= lo) && (diff == 0); i >>= 1)
+ {
+ bool found = true;
+ for (j = 0; (j < i) && found; j++)
+ if (!d->perm.series_p (j, i, odd + j, i * 2 )
+ || !d->perm.series_p ((nelt >> 1) + j, i, nelt + odd + j, i * 2))
+ found = false;
+ if (found)
+ diff = i;
+ }
+
+ size = diff * GET_MODE_UNIT_BITSIZE (vmode);
+ if (size == 64)
+ {
+ if (!ARC64_HAS_FPUD)
+ return false;
+ unspec = odd ? ARC64_UNSPEC_DUNPKM : ARC64_UNSPEC_DUNPKL;
+ }
+ else if (size == 32)
+ {
+ unspec = odd ? ARC64_UNSPEC_SUNPKM : ARC64_UNSPEC_SUNPKL;
+ }
+ else if (size == 16)
+ {
+ unspec = odd ? ARC64_UNSPEC_HUNPKM : ARC64_UNSPEC_HUNPKL;
+ }
+ else
+ return false;
+
+ /* Success! */
+ if (d->testing_p)
+ return true;
+
+ rtx src = gen_rtx_UNSPEC (vmode, gen_rtvec (2, d->op0, d->op1), unspec);
+ emit_set_insn (d->target, src);
+ return true;
+}
+
+/* Recognize VF
PACKL and VF
PACKM instructions.
+
+ VFHPACKL (v2hf): Ch0 Bh0
+ VFHPACKL (v4hf): Ch1 Bh1 Ch0 Bh0
+ VFHPACKL (v8hf): Ch3 Bh3 Ch2 Bh2 Ch1 Bh1 Ch0 Bh0
+
+ VFSPACKL (v4hf): Ch1Ch0 Bh1Bh0
+ VFSPACKL (v8hf): Ch3Ch2 Bh3Bh2 Ch1Ch0 Bh1Bh0
+
+ VFDPACKL (v8hf): Ch3Ch2Ch1Ch0 Bh3Bh2Bh1Bh0
+
+ VFSPACKL (v2sf): Cs0 Bs0
+ VFSPACKL (v4sf): Cs1 Bs1 Cs0 Bs0
+
+ VFDPACKL (v4sf): Cs1Cs0 Bs1Bs0
+
+ VFDPACKL (v2df): Cd0 Bd0
+
+
+ VFHPACKM (v2hf): Ch1 Bh1
+ VFHPACKM (v4hf): Ch3 Bh3 Ch2 Bh2
+ VFHPACKM (v8hf): Ch7 Bh7 Ch6 Bh6 Ch5 Bh5 Ch4 Bh4
+
+ VFSPACKM (v4hf): Ch3Ch2 Bh3Bh2
+ VFSPACKM (v8hf): Ch7Ch6 Bh7Bh6 Ch5Ch4 Bh5Bh4
+
+ VFDPACKM (v8hf): Ch7Ch6Ch5Ch4 Bh7Bh6Bh5Bh4
+
+ VFSPACKM (v2sf): Cs1 Bs1
+ VFSPACKM (v4sf): Cs3 Bs3 Cs2 Bs2
+
+ VFDPACKM (v4sf): Cs3Cs2 Bs3Bs2
+
+ VFDPACKM (v2df): Cd1 Bd1
+ */
+
+static bool
+arc64_simd_pack (struct e_vec_perm_d *d)
+{
+ HOST_WIDE_INT odd;
+ poly_uint64 nelt = d->perm.length ();
+ unsigned int i, j, size, unspec, diff = 0;
+ machine_mode vmode = d->vmode;
+
+ if (!ARC64_HAS_FP_BASE
+ || !FLOAT_MODE_P (vmode)
+ || !d->perm[0].is_constant (&odd)
+ || (odd != 0 && odd != (HOST_WIDE_INT)(nelt >> 1)))
+ return false;
+
+ for (i = 4; (i > 0) && (diff == 0); i >>= 1)
+ {
+ bool found = true;
+ for (j = 0; (j < i) && found; j++)
+ if (!d->perm.series_p (j, 2 * i, odd + j, i)
+ || !d->perm.series_p (i + j, 2 * i, nelt + odd + j, i))
+ found = false;
+ if (found)
+ diff = i;
+ }
+
+ size = diff * GET_MODE_UNIT_BITSIZE (vmode);
+ if (size == 64)
+ {
+ if (!ARC64_HAS_FPUD)
+ return false;
+ unspec = odd ? ARC64_UNSPEC_DPACKM : ARC64_UNSPEC_DPACKL;
+ }
+ else if (size == 32)
+ {
+ unspec = odd ? ARC64_UNSPEC_SPACKM : ARC64_UNSPEC_SPACKL;
+ }
+ else if (size == 16)
+ {
+ unspec = odd ? ARC64_UNSPEC_HPACKM : ARC64_UNSPEC_HPACKL;
+ }
+ else
+ return false;
+
+ /* Success! */
+ if (d->testing_p)
+ return true;
+
+ rtx src = gen_rtx_UNSPEC (vmode, gen_rtvec (2, d->op0, d->op1), unspec);
+ emit_set_insn (d->target, src);
+ return true;
+}
+
+/* Recognize VF
BFLYL and VF
BFLYM instructions.
+
+ VFHBFLYL (v2hf): Ch0 Bh0
+ VFHBFLYL (v4hf): Ch2 Bh2 Ch0 Bh0
+ VFHBFLYL (v8hf): Ch6 Bh6 Ch4 Bh4 Ch2 Bh2 Ch0 Bh0
+
+ VFSBFLYL (v4hf): Ch1Ch0 Bh1Bh0
+ VFSBFLYL (v8hf): Ch5Ch4 Bh5Bh4 Ch1Ch0 Bh1Bh0
+
+ VFDBFLYL (v8hf): Ch3Ch2Ch1Ch0 Bh3Bh2Bh1Bh0
+
+ VFSBFLYL (v2sf): Cs0 Bs0
+ VFSBFLYL (v4sf): Cs2 Bs2 Cs0 Bs0
+
+ VFDBFLYL (v4sf): Cs1Cs0 Bs1Bs0
+
+ VFDBFLYL (v2df): Cd0 Bd0
+
+
+ VFHBFLYM (v2hf): Ch1 Bh1
+ VFHBFLYM (v4hf): Ch3 Bh3 Ch1 Bh1
+ VFHBFLYM (v8hf): Ch7 Bh7 Ch5 Bh5 Ch3 Bh3 Ch1 Bh1
+
+ VFSBFLYM (v4hf): Ch3Ch2 Bh3Bh2
+ VFSBFLYM (v8hf): Ch7Ch6 Bh7Bh6 Ch3Ch2 Bh3Bh2
+
+ VFDBFLYM (v8hf): Ch7Ch6Ch5Ch4 Bh7Bh6Bh5Bh4
+
+ VFSBFLYM (v2sf): Cs1 Bs1
+ VFSBFLYM (v4sf): Cs3 Bs3 Cs1 Bs1
+
+ VFDBFLYM (v4sf): Cs3Cs2 Bs3Bs2
+
+ VFDBFLYM (v2df): Cd1 Bd1
+ */
+
+static bool
+arc64_simd_bfly (struct e_vec_perm_d *d)
+{
+ HOST_WIDE_INT odd;
+ poly_uint64 nelt = d->perm.length ();
+ unsigned int i, j, size, unspec, diff = 0;
+ machine_mode vmode = d->vmode;
+
+ if (!ARC64_HAS_FP_BASE
+ || !FLOAT_MODE_P (vmode)
+ || !d->perm[0].is_constant (&odd)
+ || (odd == 3)
+ || (odd < 0 && odd > (HOST_WIDE_INT)(nelt >> 1)))
+ return false;
+
+ for (i = 4; (i > 0) && (diff == 0); i >>= 1)
+ {
+ bool found = true;
+ for (j = 0; (j < i) && found; j++)
+ if (!d->perm.series_p (j, 2 * i, odd + j, 2 * i)
+ || !d->perm.series_p (i + j, 2 * i, nelt + odd + j, 2 * i))
+ found = false;
+ if (found)
+ diff = i;
+ }
+
+ size = diff * GET_MODE_UNIT_BITSIZE (vmode);
+ if (size == 64)
+ {
+ if (!ARC64_HAS_FPUD)
+ return false;
+ unspec = odd ? ARC64_UNSPEC_DBFLYM : ARC64_UNSPEC_DBFLYL;
+ }
+ else if (size == 32)
+ {
+ unspec = odd ? ARC64_UNSPEC_SBFLYM : ARC64_UNSPEC_SBFLYL;
+ }
+ else if (size == 16)
+ {
+ unspec = odd ? ARC64_UNSPEC_HBFLYM : ARC64_UNSPEC_HBFLYL;
+ }
+ else
+ return false;
+
+ /* Success! */
+ if (d->testing_p)
+ return true;
+
+ rtx src = gen_rtx_UNSPEC (vmode, gen_rtvec (2, d->op0, d->op1), unspec);
+ emit_set_insn (d->target, src);
+ return true;
+}
+
+/* Implement combination of vpack4hl/vpack4hm instructions. */
+
+static bool
+arc64_simd_lane_pack (struct e_vec_perm_d *d)
+{
+ machine_mode vmode = d->vmode;
+ HOST_WIDE_INT elem;
+ poly_uint64 nelt = d->perm.length ();
+ rtx t0, t1;
+ rtx in0 = d->op0;
+ rtx in1 = d->op1;
+ rtx out = d->target;
+
+ if (vmode != E_V4HImode
+ || !TARGET_64BIT
+ || !d->perm[0].is_constant (&elem)
+ || (elem != 0 && elem != 2)
+ || !d->perm.series_p (0, 2, elem, 1)
+ || !d->perm.series_p (1, 2, elem + nelt, 1))
+ return false;
+
+ /* Success! */
+ if (d->testing_p)
+ return true;
+
+ t0 = gen_reg_rtx (vmode);
+ t1 = gen_reg_rtx (vmode);
+ emit_insn (gen_arc64_sel_lane2_0v4hi (t0, in0, in1));
+ emit_insn (gen_arc64_sel_lane3_1v4hi (t1, in0, in1));
+ if (elem == 0)
+ emit_insn (gen_arc64_sel_lane2_0v4hi (out, t0, t1));
+ else
+ emit_insn (gen_arc64_sel_lane3_1v4hi (out, t0, t1));
+ return true;
+}
+
+/* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
+
+static bool
+arc64_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
+ rtx op1, const vec_perm_indices &sel)
+{
+ struct e_vec_perm_d d;
+
+ /* Check whether the mask can be applied to a single vector. */
+ if (sel.ninputs () == 1
+ || (op0 && rtx_equal_p (op0, op1)))
+ d.one_vector_p = true;
+ else if (sel.all_from_input_p (0))
+ {
+ d.one_vector_p = true;
+ op1 = op0;
+ }
+ else if (sel.all_from_input_p (1))
+ {
+ d.one_vector_p = true;
+ op0 = op1;
+ }
+ else
+ d.one_vector_p = false;
+
+ d.perm.new_vector (sel.encoding (), d.one_vector_p ? 1 : 2,
+ sel.nelts_per_input ());
+ d.vmode = vmode;
+ d.target = target;
+ d.op0 = op0 ? force_reg (vmode, op0) : NULL_RTX;
+ if (op0 == op1)
+ d.op1 = op1;
+ else
+ d.op1 = op1 ? force_reg (vmode, op1) : NULL_RTX;
+ d.testing_p = !target;
+
+ /* The pattern matching functions above are written to look for a small
+ number to begin the sequence (0, 1, N/2). If we begin with an index
+ from the second operand, we can swap the operands. */
+ poly_int64 nelt = d.perm.length ();
+ if (known_ge (d.perm[0], nelt))
+ {
+ d.perm.rotate_inputs (1);
+ std::swap (d.op0, d.op1);
+ }
+ if (known_gt (nelt, 1))
+ {
+ if (arc64_simd_dup (&d))
+ return true;
+ else if (arc64_simd_vpack (&d))
+ return true;
+ else if (arc64_simd_swapl (&d))
+ return true;
+ else if (arc64_simd_swap (&d))
+ return true;
+ else if (arc64_simd_vpack2wl (&d))
+ return true;
+ else if (arc64_simd_vpack2wm (&d))
+ return true;
+ else if (arc64_simd_exch (&d))
+ return true;
+ else if (arc64_simd_unpk (&d))
+ return true;
+ else if (arc64_simd_pack (&d))
+ return true;
+ else if (arc64_simd_bfly (&d))
+ return true;
+ else if (arc64_simd_lane_pack (&d))
+ return true;
+ }
+ return false;
+}
+
+/* Provide the costs of an addressing mode that contains ADDR.
+ LOAD_P is true when address is used to load a value. */
+
+static int
+arc64_address_cost (rtx addr, machine_mode mode,
+ addr_space_t as ATTRIBUTE_UNUSED,
+ bool speed)
+{
+ const int cost_limm = speed ? 0 : COSTS_N_INSNS (1);
+
+ if (CONSTANT_P (addr))
+ return cost_limm;
+
+ /* The cheapest construct are the addresses which fit a store
+ instruction (or a fp load/store instruction). */
+ if (arc64_legitimate_address_1_p (mode, addr, true, false, true))
+ switch (GET_CODE (addr))
+ {
+ case PRE_DEC:
+ case PRE_INC:
+ case POST_DEC:
+ case POST_INC:
+ case PRE_MODIFY:
+ case POST_MODIFY:
+ return 0;
+
+ default:
+ return 1;
+ }
+
+ /* Anything else has a limm. */
+ return cost_limm + 2;
+}
+
+/* Compute the rtx cost. */
+
+static bool
+arc64_rtx_costs (rtx x, machine_mode mode, rtx_code outer,
+ int opno ATTRIBUTE_UNUSED, int *cost, bool speed)
+{
+ rtx op0, op1;
+ const int cost_limm = speed ? 0 : COSTS_N_INSNS (1);
+ int factor;
+
+ /* If we use a mode larger than UNITS_PER_WORD factor it in. N.B. The cost is
+ already factored in, however, the costs for MULT and DIV is too large. */
+ factor = CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
+
+ switch (GET_CODE (x))
+ {
+ case SET:
+ op0 = SET_DEST (x);
+ op1 = SET_SRC (x);
+
+ switch (GET_CODE (op0))
+ {
+ case MEM:
+ /* Store instruction. */
+
+ if ((factor == 2) && DOUBLE_LOAD_STORE)
+ *cost = COSTS_N_INSNS (1);
+ *cost += arc64_address_cost (XEXP (op0, 0), mode, 0, speed);
+ if (CONST_INT_P (op1))
+ {
+ *cost += speed ? 0 :
+ satisfies_constraint_S06S0 (op1) ? 0 : cost_limm;
+ return true;
+ }
+
+ *cost += rtx_cost (op1, mode, SET, 1, speed);
+ return true;
+
+ case SUBREG:
+ if (!REG_P (SUBREG_REG (op0)))
+ *cost += rtx_cost (SUBREG_REG (op0), VOIDmode, SET, 0, speed);
+
+ /* Fall through. */
+ case REG:
+ /* Cost is just the cost of the RHS of the set. */
+ *cost += rtx_cost (op1, mode, SET, 1, speed);
+ return true;
+
+ default:
+ break;
+ }
+ return false;
+
+ case MEM:
+ /* Generic/loads. */
+
+ if ((factor == 2) && DOUBLE_LOAD_STORE)
+ *cost = COSTS_N_INSNS (1);
+ *cost += arc64_address_cost (XEXP (x, 0), mode, 0, speed);
+ return true;
+
+ case MINUS:
+ case PLUS:
+ op0 = XEXP (x, 0);
+ op1 = XEXP (x, 1);
+
+ if ((mode != SImode) && (mode != DImode))
+ *cost += 1;
+
+ /* Check if we have add{1,2,3} instruction. */
+ if ((GET_CODE (op0) == ASHIFT
+ && _1_2_3_operand (XEXP (op0, 1), VOIDmode))
+ || (GET_CODE (op0) == MULT
+ && _2_4_8_operand (XEXP (op0, 1), VOIDmode)))
+ {
+ /* Check if 2nd instruction operand is constant int. This
+ always goes as limm. */
+ if (CONST_INT_P (op1))
+ *cost += cost_limm ;
+ }
+ return true;
+ break;
+
+ case COMPARE:
+ op0 = XEXP (x, 0);
+ op1 = XEXP (x, 1);
+
+ /* Vitually, any instruction can do compare with zero. */
+ if (op1 == const0_rtx)
+ *cost = 0;
+ return true;
+
+ case ZERO_EXTEND:
+ op0 = XEXP (x, 0);
+
+ /* Zero extending from an SI operation is cheap. */
+ if (MEM_P (op0))
+ {
+ /* All loads can zero extend to any size for free. */
+ *cost = rtx_cost (op0, VOIDmode, ZERO_EXTEND, 0, speed);
+ return true;
+ }
+ if (mode == DImode
+ && GET_MODE (op0) == SImode
+ && outer == SET)
+ {
+ int op_cost = rtx_cost (op0, VOIDmode, ZERO_EXTEND, 0, speed);
+ if (op_cost)
+ *cost = op_cost;
+ return true;
+ }
+ break;
+
+ case SIGN_EXTEND:
+ op0 = XEXP (x, 0);
+ if (MEM_P (op0))
+ {
+ /* All loads can sign extend to any size for free. */
+ *cost = rtx_cost (op0, VOIDmode, SIGN_EXTEND, 0, speed);
+ return true;
+ }
+ *cost += COSTS_N_INSNS (2);
+ break;
+
+ case CONST_INT:
+ {
+ HOST_WIDE_INT imm = INTVAL (x);
+
+ /* In general any 32bit constant can be loaded immediately,
+ however, when we compile for speed, we try to avoid
+ them. */
+ *cost = 0;
+ if (UNSIGNED_INT6 (imm))
+ return true;
+ else
+ switch (outer)
+ {
+ case SET:
+ if (SIGNED_INT12 (imm))
+ return true;
+ break;
+
+ default:
+ break;
+ }
+ }
+ /* FALLTHRU */
+ case CONST:
+ case LABEL_REF:
+ case SYMBOL_REF:
+ *cost = cost_limm;
+ return true;
+
+ case LSHIFTRT:
+ op0 = XEXP (x, 0);
+ if (REG_P (op0))
+ return true;
+ break;
+
+ case ASHIFT:
+ case ASHIFTRT:
+ return true;
+
+ case MULT:
+ op0 = XEXP (x, 0);
+ /* Multiplication has a large latency, use adds and shifts. */
+ *cost = COSTS_N_INSNS (2);
+ /* 64x64 multiplication is expensive. */
+ if (GET_MODE_SIZE (mode) != UNITS_PER_WORD
+ && (GET_CODE (op0) != ZERO_EXTEND
+ || GET_CODE (op0) != SIGN_EXTEND))
+ *cost = COSTS_N_INSNS (3);
+ else if (GET_MODE_SIZE (mode) == UNITS_PER_WORD * 2)
+ *cost = factor * COSTS_N_INSNS (4);
+
+ return true;
+
+ case MOD:
+ case UMOD:
+ case DIV:
+ case UDIV:
+ /* Fav synthetic divs. */
+ *cost = factor * COSTS_N_INSNS (12);
+ return true;
+
+ case EQ:
+ case NE:
+ if (outer == IF_THEN_ELSE
+ && (GET_CODE (XEXP (x, 0)) == AND
+ || GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT)
+ && XEXP (x, 1) == const0_rtx)
+ {
+ *cost = 0;
+ return true;
+ }
+ break;
+
+ case AND:
+ case XOR:
+ case IOR:
+ op0 = XEXP (x, 0);
+ op1 = XEXP (x, 1);
+
+ if ((REG_P (op0) || REG_P (op1))
+ && (CONST_INT_P (op0) || CONST_INT_P (op1)))
+ return true;
+
+ /* Detect VPACK2HL instructions. */
+ if (TARGET_SIMD
+ && GET_CODE (op0) == AND
+ && GET_CODE (op1) == ASHIFT
+ && mode == E_SImode)
+ return true;
+
+ break;
+
+ default:
+ break;
+ }
+ return false;
+}
+
+/* Wrapper around arc64_rtx_costs, dumps the partial, or total cost
+ calculated for X. This cost is stored in *COST. Returns true
+ if the total cost of X was calculated. */
+static bool
+arc64_rtx_costs_wrapper (rtx x, machine_mode mode, int outer,
+ int param, int *cost, bool speed)
+{
+ bool result = arc64_rtx_costs (x, mode, (rtx_code) outer, param, cost, speed);
+
+ if (dump_file)
+ {
+ print_rtl_single (dump_file, x);
+ fprintf (dump_file, "\nARC: %s cost: %d (%s)\n",
+ speed ? "Speed" : "Size",
+ *cost, result ? "final" : "partial");
+ }
+
+ return result;
+}
+
+/* Implement TARGET_SCHED_MACRO_FUSION_P. Return true if target supports
+ instruction fusion of some sort. */
+
+static bool
+arc64_macro_fusion_p (void)
+{
+ /* When we use accumulators, make sure we schedule the producer/consumer of
+ accumulator close to each others. */
+ return TARGET_SIMD;
+}
+
+/* Implement TARGET_SCHED_MACRO_FUSION_PAIR_P. Return true if PREV and CURR
+ should be kept together during scheduling. */
+
+static bool
+arc64_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
+{
+ rtx prev_set = single_set (prev);
+ rtx curr_set = single_set (curr);
+ /* prev and curr are simple SET insns i.e. no flag setting or branching. */
+ bool simple_sets_p = prev_set && curr_set && !any_condjump_p (curr);
+
+ if (!arc64_macro_fusion_p ())
+ return false;
+
+ /* Don't handle anything with a jump. FIXME! maybe it is interesting to keep
+ the cmp and jcc together for latter folding into BRcc insn. */
+ if (!simple_sets_p)
+ return false;
+
+ /* 1st We are trying to match any MPY instruction which can have implicit
+ accumulator write and any mac instruction. */
+ if (get_attr_type (prev) == TYPE_MPY
+ && get_attr_type (curr) == TYPE_MAC)
+ return true;
+
+ /* 2nd We try to match any back to back mac instruction. */
+ if (get_attr_type (prev) == TYPE_MAC
+ && (get_attr_type (curr) == TYPE_MAC))
+ return true;
+ if (get_attr_type (prev) == TYPE_VMAC2H
+ && (get_attr_type (curr) == TYPE_VMAC2H))
+ return true;
+
+ /* 3rd Keep close to each other the MAC and the following MOV(L) rx,r58. This
+ pattern will be match in machine reorg and simplified to a simple MAC
+ instruction. */
+ if (get_attr_type (curr) == TYPE_MOVE
+ && REG_P (SET_SRC (curr_set))
+ && REGNO (SET_SRC (curr_set)) == R58_REGNUM
+ && get_attr_type (prev) == TYPE_MAC)
+ return true;
+
+#if 0
+ /* Try to keep r58 setting close to any previous related instruction. We may
+ be able to merge those two into one instruction. */
+ rtx set_dest;
+ set_dest = SET_DEST (curr_set);
+ if (get_attr_type (curr) == TYPE_MOVE
+ && REG_P (set_dest)
+ && REGNO (set_dest) == R58_REGNUM
+ && REG_P (SET_DEST (prev_set))
+ && REG_P (SET_SRC (curr_set))
+ && REGNO (SET_DEST (prev_set)) == REGNO (SET_SRC (curr_set)))
+ return true;
+
+ /* Try to keep any mac and any previous instruction close, dependency on add
+ operand. */
+ if (get_attr_type (curr) == TYPE_MAC
+ && REG_P (SET_DEST (prev_set))
+ && GET_CODE (SET_SRC (curr_set)) == PLUS
+ && REG_P (XEXP (SET_SRC (curr_set), 1))
+ && REGNO (SET_DEST (prev_set)) != R58_REGNUM
+ && REGNO (SET_DEST (prev_set)) == REGNO (XEXP (SET_SRC (curr_set), 1)))
+ return true;
+#endif
+ return false;
+}
+
+static void
+arc64_override_options (void)
+{
+ if (arcv3_cpu_string)
+ {
+ const char *p = arcv3_cpu_string;
+ if (strncmp (p, "hs5", 3) == 0)
+ TARGET_64BIT = false;
+ else if (strncmp (p, "hs6", 3) == 0)
+ TARGET_64BIT = true;
+ else
+ error ("%<-mcpu=%s%>s is not a valid CPU option.", arcv3_cpu_string);
+ p += 3;
+ if ( *p == '8')
+ {
+ if (TARGET_64BIT)
+ {
+ target_flags |= MASK_WIDE_LDST;
+ }
+ else
+ {
+ target_flags |= MASK_LL64;
+ }
+ target_flags |= MASK_SIMD;
+ }
+ }
+
+ if (TARGET_LL64 && TARGET_64BIT)
+ {
+ target_flags &= ~MASK_LL64;
+ warning (0, "Option -mll64 is ignored because the target"
+ " is not 32-bit.");
+ }
+}
+
+/* Return the fixed registers used for condition codes. */
+
+static bool
+arc64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
+{
+ *p1 = CC_REGNUM;
+ *p2 = INVALID_REGNUM;
+ return true;
+}
+
+/* Return true if FUNC is a naked function. */
+static bool
+arc64_naked_function_p (tree func)
+{
+ tree func_decl = func;
+ if (func == NULL_TREE)
+ func_decl = current_function_decl;
+ return NULL_TREE != lookup_attribute ("naked", DECL_ATTRIBUTES (func_decl));
+}
+
+/* Implement 'TARGET_SET_CURRENT_FUNCTION'. */
+
+static void
+arc64_set_current_function (tree decl)
+{
+ unsigned int fn_type = ARC64_FUNCTION_UNKNOWN;
+ tree func_decl = decl;
+
+ if (decl == NULL_TREE
+ || current_function_decl == NULL_TREE
+ || current_function_decl == error_mark_node
+ || ! cfun->machine
+ || cfun->machine->fn_type != ARC64_FUNCTION_UNKNOWN)
+ return;
+
+ /* Check if it is a naked function. */
+ if (arc64_naked_function_p (decl))
+ fn_type |= ARC64_FUNCTION_NAKED;
+
+ if (func_decl == NULL_TREE)
+ func_decl = current_function_decl;
+
+ /* Now see if this is an interrupt handler. */
+ if (lookup_attribute ("interrupt",
+ TYPE_ATTRIBUTES (TREE_TYPE (func_decl))) != NULL_TREE)
+ fn_type |= ARC64_FUNCTION_ILINK;
+
+ if (!ARC_NAKED_P (fn_type) && !ARC_INTERRUPT_P (fn_type))
+ fn_type |= ARC64_FUNCTION_NORMAL;
+
+ cfun->machine->fn_type = fn_type;
+
+ if (ARC_NAKED_P (fn_type) && ARC_INTERRUPT_P (fn_type))
+ error ("function attributes %qs and %qs are mutually exclusive",
+ "interrupt", "naked");
+}
+
+/* Implement TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS. */
+static bool
+arc64_allocate_stack_slots_for_args ()
+{
+ /* Naked functions should not allocate stack slots for arguments. */
+ return !arc64_naked_function_p (current_function_decl);
+}
+
+/* Implement TARGET_WARN_FUNC_RETURN. */
+static bool
+arc64_warn_func_return (tree decl)
+{
+ /* Naked functions are implemented entirely in assembly, including the
+ return sequence, so suppress warnings about this. */
+ return !arc64_naked_function_p (decl);
+}
+
+/* Return false for selected jumps crossing between hot and cold partitions. */
+
+static bool
+arc64_can_follow_jump (const rtx_insn *br1, const rtx_insn *br2)
+{
+ /* Avoid compiler warnings. */
+ union {const rtx_insn *c; rtx_insn *r;} u;
+
+ u.c = br1;
+ if (flag_reorder_blocks_and_partition
+ && CROSSING_JUMP_P (br2))
+ switch (get_attr_type (u.r))
+ {
+ case TYPE_BRANCHCC:
+ case TYPE_BRCC:
+ return false;
+ case TYPE_BRANCH:
+ if (get_attr_length (u.r) == 2)
+ return false;
+ break;
+ default:
+ break;
+ }
+
+ return true;
+}
+
+/* Implements target hook TARGET_SCHED_ISSUE_RATE. */
+
+static int
+arc64_sched_issue_rate (void)
+{
+ return 2;
+}
+
+/*
+ Global functions.
+*/
+
+/* Returns TRUE if CALLEE should be treated as long-calls (i.e. called
+ via a register). */
+
+bool
+arc64_is_long_call_p (rtx sym)
+{
+ arc64_symb symb_t = arc64_get_symbol_type (sym);
+
+ /* No subtleties for the time being, if user asks for large memory model,
+ everything goes via regs. */
+ if (!TARGET_64BIT
+ && (arc64_cmodel_var == ARC64_CMODEL_LARGE))
+ return true;
+
+ switch (symb_t)
+ {
+ case ARC64_UNK:
+ case ARC64_LO32:
+ return false;
+
+ case ARC64_PCREL:
+ case ARC64_PIC:
+ return false;
+
+ case ARC64_LPIC:
+ /* fPIC + Large memory model forces everything in registers. */
+ return (arc64_cmodel_var == ARC64_CMODEL_LARGE) ? true : false;
+
+ case ARC64_LARGE:
+ return true;
+
+ case ARC64_TLS:
+ default:
+ gcc_unreachable ();
+ }
+}
+
+/* X and Y are two things to compare using CODE. Emit the compare insn and
+ return the rtx for the cc reg in the proper mode. */
+
+rtx
+arc64_gen_compare_reg (enum rtx_code code, rtx x, rtx y)
+{
+ machine_mode mode = SELECT_CC_MODE (code, x, y);
+ rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
+
+ if (CONSTANT_P (x) && CONSTANT_P (y))
+ x = force_reg (word_mode, x);
+
+ emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
+ return cc_reg;
+}
+
+/* Prepare operands for move in MODE. Return true iff the move has
+ been emitted. */
+
+bool
+arc64_prepare_move_operands (rtx op0, rtx op1, machine_mode mode)
+{
+ if (MEM_P (op0) && !REG_P (op1))
+ {
+ if (mode == E_DImode
+ || !satisfies_constraint_S06S0 (op1))
+ op1 = force_reg (mode, op1);
+ }
+ else if (GET_MODE_SIZE (mode) == UNITS_PER_WORD
+ && CONSTANT_P (op1))
+ {
+ unsigned HOST_WIDE_INT lo;
+ unsigned HOST_WIDE_INT hi;
+ rtx tmp;
+
+ switch (GET_CODE (op1))
+ {
+ case CONST_INT:
+ gcc_assert (mode == Pmode);
+ if (!SIGNED_INT32 (INTVAL (op1)) && !UNSIGNED_INT32 (INTVAL (op1)))
+ {
+ HOST_WIDE_INT val;
+ /* We have a large 64bit immediate:
+ movhl rA, (val64 >> 32)
+ orl rA,rA, (val64 & 0xffffffff)
+ FIXME! add strategies to minimize the size. */
+
+ val = INTVAL (op1);
+ lo = zext_hwi (val, 32);
+ hi = zext_hwi (val >> 32, 32);
+ tmp = op0;
+
+ if (can_create_pseudo_p ())
+ tmp = gen_reg_rtx (mode);
+
+ /* Maybe do first a move cnst to movsi to get the
+ constants minimized. */
+ emit_insn (gen_rtx_SET (tmp,
+ gen_rtx_ASHIFT (mode, GEN_INT (hi),
+ GEN_INT (32))));
+ emit_insn (gen_rtx_SET (op0,
+ plus_constant (mode, tmp, lo)));
+ return true;
+ }
+ break;
+
+ case CONST_WIDE_INT:
+ gcc_unreachable ();
+
+ case CONST_DOUBLE:
+ if (mode == SFmode)
+ return false;
+ else
+ {
+ long res[2];
+ unsigned HOST_WIDE_INT ival;
+ scalar_int_mode imode = int_mode_for_mode (mode).require ();
+
+ gcc_assert (mode == DFmode);
+
+ real_to_target (res, CONST_DOUBLE_REAL_VALUE (op1),
+ REAL_MODE_FORMAT (mode));
+ lo = zext_hwi (res[0], 32);
+ hi = zext_hwi (res[1], 32);
+
+ ival = lo | (hi << 32);
+ tmp = gen_reg_rtx (imode);
+ emit_move_insn (tmp, gen_int_mode (ival, imode));
+ emit_move_insn (op0, gen_lowpart (mode, tmp));
+ return true;
+ }
+
+ case CONST:
+ case SYMBOL_REF:
+ case LABEL_REF:
+ op1 = arc64_legitimize_address_1 (op1, op0);
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ /* Check and fix unsupported store addresses. */
+ if (MEM_P (op0)
+ && !arc64_legitimate_address_1_p (mode, XEXP (op0, 0), false,
+ false, true))
+ {
+ rtx tmp = gen_reg_rtx (Pmode);
+ rtx addr = XEXP (op0, 0);
+ rtx t0 = XEXP (addr, 0);
+ rtx t1 = XEXP (addr, 1);
+
+ if (GET_CODE (t0) == MULT)
+ {
+ rtx ta = XEXP (t0, 0);
+ rtx tb = XEXP (t0, 1);
+ t0 = gen_rtx_ASHIFT (Pmode, ta,
+ GEN_INT (ARC64LOG2 (INTVAL (tb))));
+ }
+
+ emit_insn (gen_rtx_SET (tmp, gen_rtx_PLUS (Pmode, t0, t1)));
+ op0 = replace_equiv_address (op0, tmp);
+ }
+ emit_insn (gen_rtx_SET (op0, op1));
+ return true;
+}
+
+/* Split a mov with long immediate instruction into smaller, size
+ friendly instructions. */
+#if 0
+bool
+arc64_split_mov_const (rtx *operands)
+{
+ unsigned HOST_WIDE_INT ival;
+ HOST_WIDE_INT shimm;
+ machine_mode mode = GET_MODE (operands[0]);
+
+ /* Manage a constant. */
+ gcc_assert (CONST_INT_P (operands[1]));
+ ival = INTVAL (operands[1]) & 0xffffffff;
+
+ if (SIGNED_INT12 (ival))
+ return false;
+
+ /* 1. Check if we can just rotate limm by 8 but using ROR8. */
+ if (TARGET_BARREL_SHIFTER && ((ival & ~0x3f000000) == 0))
+ {
+ shimm = (ival >> 24) & 0x3f;
+ emit_insn (gen_rtx_SET (operands[0],
+ gen_rtx_ROTATERT (mode, GEN_INT (shimm),
+ GEN_INT (8))));
+ return true;
+ }
+ /* 2. Check if we can just shift by 8 to fit into the u6 of LSL8. */
+ if (TARGET_BARREL_SHIFTER && ((ival & ~0x3f00) == 0))
+ {
+ shimm = (ival >> 8) & 0x3f;
+ emit_insn (gen_rtx_SET (operands[0],
+ gen_rtx_ASHIFT (mode, GEN_INT (shimm),
+ GEN_INT (8))));
+ return true;
+ }
+
+ /* 3. Check if we can just shift by 16 to fit into the u6 of LSL16. */
+ if (TARGET_BARREL_SHIFTER && ((ival & ~0x3f0000) == 0))
+ {
+ shimm = (ival >> 16) & 0x3f;
+ emit_insn (gen_rtx_SET (operands[0],
+ gen_rtx_ASHIFT (mode, GEN_INT (shimm),
+ GEN_INT (16))));
+ return true;
+ }
+
+ /* 4. Check if we can do something like mov_s h,u8 / asl_s ra,h,#nb. */
+ if (((ival >> (__builtin_ffs (ival) - 1)) & 0xffffff00) == 0
+ && TARGET_BARREL_SHIFTER)
+ {
+ HOST_WIDE_INT shift = __builtin_ffs (ival);
+ shimm = (ival >> (shift - 1)) & 0xff;
+ emit_insn (gen_rtx_SET (operands[0], GEN_INT (shimm)));
+ emit_insn (gen_rtx_SET (operands[0],
+ gen_rtx_ASHIFT (mode, operands[0],
+ GEN_INT (shift - 1))));
+ return true;
+ }
+
+ /* 5. Check if we can just rotate the limm, useful when no barrel
+ shifter is present. */
+ if ((ival & ~0x8000001f) == 0)
+ {
+ shimm = (ival * 2 + 1) & 0x3f;
+ emit_insn (gen_rtx_SET (operands[0],
+ gen_rtx_ROTATERT (mode, GEN_INT (shimm),
+ const1_rtx)));
+ return true;
+ }
+
+ /* 6. Check if we can do something with bmask. */
+ if (IS_POWEROF2_P (ival + 1))
+ {
+ emit_insn (gen_rtx_SET (operands[0], constm1_rtx));
+ emit_insn (gen_rtx_SET (operands[0],
+ gen_rtx_AND (mode, operands[0],
+ GEN_INT (ival))));
+ return true;
+ }
+
+ return false;
+}
+
+/* Helper to check Cax constraint. */
+
+bool
+arc64_check_mov_const (HOST_WIDE_INT ival)
+{
+ ival = ival & 0xffffffff;
+
+ if ((ival & ~0x8000001f) == 0)
+ return true;
+
+ if (IS_POWEROF2_P (ival + 1))
+ return true;
+
+ /* The next rules requires a barrel shifter. */
+ if (!TARGET_BARREL_SHIFTER)
+ return false;
+
+ if (((ival >> (__builtin_ffs (ival) - 1)) & 0xffffff00) == 0)
+ return true;
+
+ if ((ival & ~0x3f00) == 0)
+ return true;
+
+ if ((ival & ~0x3f0000) == 0)
+ return true;
+
+ if ((ival & ~0x3f000000) == 0)
+ return true;
+
+ return false;
+}
+#endif
+
+/* This function is used by the call expanders of the machine description.
+ RESULT is the register in which the result is returned. It's NULL for
+ "call" and "sibcall".
+ MEM is the location of the function call.
+ SIBCALL indicates whether this function call is normal call or sibling call.
+ It will generate different pattern accordingly. */
+
+void
+arc64_expand_call (rtx result, rtx mem, bool sibcall)
+{
+ rtx call, callee, tmp;
+ rtvec vec;
+ machine_mode mode;
+
+ gcc_assert (MEM_P (mem));
+ callee = XEXP (mem, 0);
+ mode = GET_MODE (callee);
+ gcc_assert (mode == Pmode || CONST_INT_P (callee));
+
+ /* Decide if we should generate indirect calls by loading the
+ address of the callee into a register before performing the
+ branch-and-link. */
+ if (arc64_is_long_call_p (callee) && !REG_P (callee))
+ XEXP (mem, 0) = force_reg (mode, callee);
+
+ call = gen_rtx_CALL (VOIDmode, mem, const0_rtx);
+
+ if (result != NULL_RTX)
+ call = gen_rtx_SET (result, call);
+
+ if (sibcall)
+ tmp = ret_rtx;
+ else
+ tmp = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, BLINK_REGNUM));
+
+ vec = gen_rtvec (2, call, tmp);
+ call = gen_rtx_PARALLEL (VOIDmode, vec);
+
+ emit_call_insn (call);
+}
+
+/* Return nonzero if this function is known to have a null epilogue.
+ This allows the optimizer to omit jumps to jumps if no stack
+ was created. */
+
+bool
+arc64_can_use_return_insn_p (void)
+{
+ return (reload_completed && cfun->machine->frame.frame_size == 0
+ && !ARC_INTERRUPT_P (cfun->machine->fn_type));
+}
+
+
+/* Return 1 if the register is used by the epilogue. We need to say the
+ return register is used, but only after epilogue generation is complete.
+ Note that in the case of sibcalls, the values "used by the epilogue" are
+ considered live at the start of the called function. */
+
+int
+arc64_epilogue_uses (int regno)
+{
+#ifdef HAVE_AS_TLS
+ if (regno == R30_REGNUM)
+ return 1;
+#endif
+
+ if (epilogue_completed)
+ {
+ if (regno == BLINK_REGNUM)
+ return 1;
+
+ /* An interrupt restores more registers. */
+ if (ARC_INTERRUPT_P (cfun->machine->fn_type)
+ && (df_regs_ever_live_p (regno)
+ || (!crtl->is_leaf && call_used_or_fixed_reg_p (regno))))
+ return 1;
+ }
+
+ return 0;
+}
+
+/* Return 1 if we use TP because it is alivel on entry to an exception
+ edge. */
+
+int
+arc64_eh_uses (int regno ATTRIBUTE_UNUSED)
+{
+#ifdef HAVE_AS_TLS
+ if (regno == R30_REGNUM)
+ return 1;
+#endif
+ return 0;
+}
+
+
+/* Implement INITIAL_ELIMINATION_OFFSET. FROM is either the frame pointer
+ or argument pointer. TO is either the stack pointer or hard frame
+ pointer. */
+
+HOST_WIDE_INT
+arc64_initial_elimination_offset (unsigned from, unsigned to)
+{
+ struct arc64_frame *frame = &cfun->machine->frame;
+
+ if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
+ return frame->saved_regs_size;
+
+ if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
+ return (frame->saved_regs_size + frame->saved_locals_size
+ + frame->saved_outargs_size);
+
+ if ((from == FRAME_POINTER_REGNUM) && (to == STACK_POINTER_REGNUM))
+ return (frame->saved_locals_size + frame->saved_outargs_size);
+
+ if ((from == FRAME_POINTER_REGNUM) && (to == HARD_FRAME_POINTER_REGNUM))
+ return 0;
+
+ gcc_unreachable ();
+}
+
+/* Helper for INIT_EXPANDERS macro called to initialize any target
+ specific information. */
+
+void arc64_init_expanders (void)
+{
+ init_machine_status = arc64_init_machine_status;
+}
+
+/* Given a comparison code (EQ, NE, etc.) and the first operand of a
+ COMPARE, return the mode to be used for the comparison. */
+
+machine_mode
+arc64_select_cc_mode (enum rtx_code op,
+ rtx x,
+ rtx y)
+{
+ machine_mode mode = GET_MODE (x);
+
+ /* Matches all instructions which can do .f and clobbers only Z flag. */
+ if (GET_MODE_CLASS (mode) == MODE_INT
+ && y == const0_rtx
+ && GET_CODE (x) == MULT
+ && (op == EQ || op == NE))
+ return CC_Zmode;
+
+ /* Matches all instructions which can do .f and clobbers Z and N
+ flags. Because we compare with zero, for LT we can use "mi" and
+ for GT we can use "pl". We cannot use GT with "pnz" because it
+ cannot be reversed. */
+ if (GET_MODE_CLASS (mode) == MODE_INT
+ && y == const0_rtx
+ && (op == EQ || op == NE || op == LT || op == GE))
+ return CC_ZNmode;
+
+ /* All floating point compares return CC_FPU if it is an equality
+ comparison, and CC_FPUE otherwise. N.B. LTGT and UNEQ cannot be
+ directly mapped to fcmp instructions. */
+ if (GET_MODE_CLASS (mode) == MODE_FLOAT)
+ {
+ switch (op)
+ {
+ case EQ:
+ case NE:
+ case UNORDERED:
+ case ORDERED:
+ case UNLT:
+ case UNLE:
+ case UNGT:
+ case UNGE:
+ case UNEQ:
+ return CC_FPUmode;
+
+ case LT:
+ case LE:
+ case GT:
+ case GE:
+ case LTGT:
+ return CC_FPUEmode;
+
+ default:
+ gcc_unreachable ();
+ }
+ }
+ return CCmode;
+}
+
+/* Implement RETURN_ADDR_RTX. We do not support moving back to a
+ previous frame. */
+
+rtx
+arc64_return_addr (int count , rtx frame ATTRIBUTE_UNUSED)
+{
+ if (count != 0)
+ return const0_rtx;
+ return get_hard_reg_initial_val (Pmode, BLINK_REGNUM);
+}
+
+/* Expand the "prologue" pattern. */
+
+void
+arc64_expand_prologue (void)
+{
+ HOST_WIDE_INT frame_allocated;
+ struct arc64_frame *frame = &cfun->machine->frame;
+
+ if (flag_stack_usage_info)
+ current_function_static_stack_size = frame->frame_size;
+
+ if (ARC_NAKED_P (cfun->machine->fn_type))
+ return;
+
+ frame_allocated = frame->frame_size;
+
+ frame_allocated -= arc64_save_callee_saves ();
+
+ /* If something left, allocate. */
+ if (frame_allocated > 0)
+ frame_stack_add ((HOST_WIDE_INT) 0 - frame_allocated);
+
+ /* Emit a blockage. */
+ emit_insn (gen_blockage ());
+}
+
+/* Expand "epilogue" pattern. */
+
+void
+arc64_expand_epilogue (bool sibcall_p)
+{
+ HOST_WIDE_INT frame_deallocated;
+ struct arc64_frame *frame = &cfun->machine->frame;
+
+ if (ARC_NAKED_P (cfun->machine->fn_type))
+ {
+ emit_jump_insn (gen_return ());
+ return;
+ }
+
+ frame_deallocated = frame->frame_size;
+ frame_deallocated -= arc64_restore_callee_saves (sibcall_p);
+
+ if (frame_deallocated != 0)
+ frame_stack_add (frame_deallocated);
+
+ /* For frames that use __builtin_eh_return, the register defined by
+ EH_RETURN_STACKADJ_RTX is set to 0 for all standard return paths.
+ On eh_return paths however, the register is set to the value that
+ should be added to the stack pointer in order to restore the
+ correct stack pointer for the exception handling frame.
+
+ For ARC64 we are going to use r4 for EH_RETURN_STACKADJ_RTX, add
+ this onto the stack for eh_return frames. */
+ if (crtl->calls_eh_return)
+ emit_insn (gen_add2_insn (stack_pointer_rtx,
+ EH_RETURN_STACKADJ_RTX));
+
+ if (ARC_INTERRUPT_P (cfun->machine->fn_type))
+ emit_jump_insn (gen_rtie ());
+ else if (!sibcall_p)
+ emit_jump_insn (gen_simple_return ());
+}
+
+/* Helper used to determine if an address requires a long immediate.
+ To be used in computing the length of an load/store
+ instruction. */
+
+bool
+arc64_limm_addr_p (rtx op)
+{
+ rtx addr = XEXP (op, 0);
+
+ if (!MEM_P (op))
+ return false;
+
+ switch (GET_CODE (addr))
+ {
+ case SYMBOL_REF:
+ case LABEL_REF:
+ case CONST_INT:
+ case CONST:
+ case UNSPEC:
+ case LO_SUM:
+ return true;
+
+ case PRE_INC:
+ case PRE_DEC:
+ case POST_INC:
+ case POST_DEC:
+ case PRE_MODIFY:
+ case POST_MODIFY:
+ case PLUS:
+ /* legitimate address doesn't recognize [b,limm] variant of st.
+ Hence, use it to determine if we have limm or not in
+ address. */
+ return !arc64_legitimate_address_1_p (GET_MODE (op), addr,
+ false, false, true);
+ default:
+ break;
+ }
+ return false;
+}
+
+/* Used by move_dest_operand predicate. */
+
+bool
+arc64_legitimate_store_address_p (machine_mode mode, rtx addr)
+{
+ return arc64_legitimate_address_1_p (mode, addr, true, false, true);
+}
+
+/* Return true if an address fits a short load/store instruction. */
+
+bool
+arc64_short_access_p (rtx op, machine_mode mode, bool load_p)
+{
+ rtx addr, plus0, plus1;
+ bool f0, f1;
+
+ /* Eliminate non-memory operations. */
+ if (GET_CODE (op) != MEM)
+ return 0;
+
+ /* FIXME! remove it when "uncached" attribute is added. */
+ if (MEM_VOLATILE_P (op) && TARGET_VOLATILE_DI)
+ return false;
+
+ if (mode == VOIDmode)
+ mode = GET_MODE (op);
+
+ /* Decode the address now. */
+ addr = XEXP (op, 0);
+ switch (GET_CODE (addr))
+ {
+ case REG:
+ return check_short_insn_register_p (addr, false);
+
+ case PLUS:
+ plus0 = XEXP (addr, 0);
+ plus1 = XEXP (addr, 1);
+
+ f0 = check_short_insn_register_p (plus0, false);
+ f1 = check_short_insn_constant_p (plus1, mode);
+
+ /* Check for [Rb + shimm]. */
+ if (f0 && f1)
+ return true;
+
+ if (!load_p)
+ return false;
+
+ /* Check for [Rb + Ri]. */
+ f1 = check_short_insn_register_p (plus1, false);
+
+ if (f0 && f1)
+ return true;
+
+ default:
+ break;
+ }
+ return false;
+}
+
+/* Return true if an address fits a floating point load/store
+ instruction. The next formats are allowed [b, s9], [b], [s32limm],
+ and scaled [b, s9]. */
+
+bool
+arc64_fp_access_p (rtx op, machine_mode mode)
+{
+ rtx addr;
+
+ /* Eliminate non-memory operations. */
+ if (GET_CODE (op) != MEM)
+ return 0;
+
+ /* FIXME! remove it when "uncached" attribute is added. */
+ if (MEM_VOLATILE_P (op) && TARGET_VOLATILE_DI)
+ return false;
+
+ if (mode == VOIDmode)
+ mode = GET_MODE (op);
+
+ /* Decode the address now. */
+ addr = XEXP (op, 0);
+
+ return arc64_legitimate_address_1_p (mode, addr, true, false, false);
+}
+
+/* Implement EH_RETURN_HANDLER_RTX. EH returns need to either return
+ normally or return to a previous frame after unwinding.
+
+ An EH return uses a single shared return sequence. The epilogue is
+ exactly like a normal epilogue except that it has an extra input
+ register (EH_RETURN_STACKADJ_RTX) which contains the stack
+ adjustment that must be applied after the frame has been destroyed.
+ An extra label is inserted before the epilogue which initializes
+ this register to zero, and this is the entry point for a normal
+ return.
+
+ An actual EH return updates the return address, initializes the
+ stack adjustment and jumps directly into the epilogue (bypassing
+ the zeroing of the adjustment). Since the return address is
+ typically saved on the stack when a function makes a call, the
+ saved BLINK must be updated outside the epilogue.
+
+ This poses problems as the store is generated well before the
+ epilogue, so the offset of BLINK is not known yet. Also
+ optimizations will remove the store as it appears dead, even after
+ the epilogue is generated (as the base or offset for loading BLINK
+ is different in many cases).
+
+ To avoid these problems this implementation forces the frame
+ pointer in eh_return functions so that the location of BLINK is
+ fixed and known early. It also marks the store volatile, so no
+ optimization is permitted to remove the store. */
+
+rtx
+arc64_eh_return_handler_rtx (void)
+{
+ rtx tmp = gen_frame_mem (Pmode,
+ plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
+
+ /* Mark the store volatile, so no optimization is permitted to remove it. */
+ MEM_VOLATILE_P (tmp) = true;
+ return tmp;
+}
+
+/* Select a format to encode pointers in exception handling data. */
+
+int
+arc64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
+{
+ int type;
+
+ if (!flag_pic)
+ return DW_EH_PE_absptr;
+
+ switch (arc64_cmodel_var)
+ {
+ case ARC64_CMODEL_SMALL:
+ case ARC64_CMODEL_MEDIUM:
+ /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
+ for everything. */
+ type = DW_EH_PE_sdata4;
+ break;
+ default:
+ /* No assumptions here. 8-byte relocs required. */
+ type = DW_EH_PE_sdata8;
+ break;
+ }
+ return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
+}
+
+/* Emit a (pre) memory barrier around an atomic sequence according to
+ MODEL. */
+
+void
+arc64_pre_atomic_barrier (enum memmodel model)
+{
+ if (need_atomic_barrier_p (model, true))
+ emit_insn (gen_memory_barrier ());
+}
+
+/* Emit a (post) memory barrier around an atomic sequence according to
+ MODEL. */
+
+void
+arc64_post_atomic_barrier (enum memmodel model)
+{
+ if (need_atomic_barrier_p (model, false))
+ emit_insn (gen_memory_barrier ());
+}
+
+/* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
+ to perform. MEM is the memory on which to operate. VAL is the second
+ operand of the binary operator. BEFORE and AFTER are optional locations to
+ return the value of MEM either before of after the operation. MODEL_RTX
+ is a CONST_INT containing the memory model to use. */
+
+void
+arc64_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
+ rtx orig_before, rtx orig_after, rtx model_rtx)
+{
+ enum memmodel model = (enum memmodel) INTVAL (model_rtx);
+ machine_mode mode = GET_MODE (mem);
+ rtx label, x, cond;
+ rtx before = orig_before, after = orig_after;
+
+ /* ARC atomic ops work only with 32-bit aligned memories. */
+ gcc_assert (mode == SImode || mode == DImode);
+
+ arc64_pre_atomic_barrier (model);
+
+ label = gen_label_rtx ();
+ emit_label (label);
+ label = gen_rtx_LABEL_REF (VOIDmode, label);
+
+ if (before == NULL_RTX)
+ before = gen_reg_rtx (mode);
+
+ if (after == NULL_RTX)
+ after = gen_reg_rtx (mode);
+
+ /* Load exclusive. */
+ if(mode == SImode)
+ emit_insn (gen_arc_load_exclusivesi (before, mem));
+ else /* DImode */
+ emit_insn (gen_arc_load_exclusivedi (before, mem));
+
+ switch (code)
+ {
+ case NOT:
+ x = gen_rtx_AND (mode, before, val);
+ emit_insn (gen_rtx_SET (after, x));
+ x = gen_rtx_NOT (mode, after);
+ emit_insn (gen_rtx_SET (after, x));
+ break;
+
+ case MINUS:
+ if (CONST_INT_P (val))
+ {
+ val = GEN_INT (-INTVAL (val));
+ code = PLUS;
+ }
+
+ /* FALLTHRU. */
+ default:
+ x = gen_rtx_fmt_ee (code, mode, before, val);
+ emit_insn (gen_rtx_SET (after, x));
+ break;
+ }
+
+ /* Exclusively store new item. Store clobbers CC reg. */
+ if(mode == SImode)
+ emit_insn (gen_arc_store_exclusivesi (mem, after));
+ else /* DImode */
+ emit_insn (gen_arc_store_exclusivedi (mem, after));
+
+ /* Check the result of the store. */
+ cond = gen_rtx_REG (CC_Zmode, CC_REGNUM);
+ x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
+ x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
+ label, pc_rtx);
+ emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
+
+ arc64_post_atomic_barrier (model);
+}
+
+/* Helper function used by "atomic_compare_and_swap" expand
+ pattern. */
+
+void
+arc64_expand_compare_and_swap (rtx operands[])
+{
+ rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
+ machine_mode mode;
+
+ bval = operands[0];
+ rval = operands[1];
+ mem = operands[2];
+ oldval = operands[3];
+ newval = operands[4];
+ is_weak = operands[5];
+ mod_s = operands[6];
+ mod_f = operands[7];
+ mode = GET_MODE (mem);
+
+ if (reg_overlap_mentioned_p (rval, oldval))
+ oldval = copy_to_reg (oldval);
+
+ if (mode == SImode || mode == DImode)
+ {
+ if (mode == SImode)
+ emit_insn (gen_atomic_compare_and_swapsi_1 (rval, mem, oldval, newval,
+ is_weak, mod_s, mod_f));
+ else /* DImode */
+ emit_insn (gen_atomic_compare_and_swapdi_1 (rval, mem, oldval, newval,
+ is_weak, mod_s, mod_f));
+
+ x = gen_rtx_REG (CC_Zmode, CC_REGNUM);
+ x = gen_rtx_EQ (SImode, x, const0_rtx);
+ emit_insn (gen_rtx_SET (bval, x));
+ }
+ else
+ {
+ arc_expand_compare_and_swap_qh (bval, rval, mem, oldval, newval,
+ is_weak, mod_s, mod_f);
+ }
+}
+
+/* Helper function used by the "atomic_compare_and_swapsdi_1"
+ pattern. */
+
+void
+arc64_split_compare_and_swap (rtx operands[])
+{
+ rtx rval, mem, oldval, newval;
+ machine_mode mode, mode_cc;
+ enum memmodel mod_s, mod_f;
+ bool is_weak;
+ rtx label1, label2, x, cond;
+
+ rval = operands[0];
+ mem = operands[1];
+ oldval = operands[2];
+ newval = operands[3];
+ is_weak = (operands[4] != const0_rtx);
+ mod_s = (enum memmodel) INTVAL (operands[5]);
+ mod_f = (enum memmodel) INTVAL (operands[6]);
+ mode = GET_MODE (mem);
+
+ /* ARC atomic ops work only with 32-bit or 64-bit aligned memories. */
+ gcc_assert (mode == SImode || mode == DImode);
+
+ arc64_pre_atomic_barrier (mod_s);
+
+ label1 = NULL_RTX;
+ if (!is_weak)
+ {
+ label1 = gen_label_rtx ();
+ emit_label (label1);
+ }
+ label2 = gen_label_rtx ();
+
+ /* Load exclusive. */
+ if(mode == SImode)
+ emit_insn (gen_arc_load_exclusivesi (rval, mem));
+ else /* DImode */
+ emit_insn (gen_arc_load_exclusivedi (rval, mem));
+
+ /* Check if it is oldval. */
+ mode_cc = SELECT_CC_MODE (NE, rval, oldval);
+ cond = gen_rtx_REG (mode_cc, CC_REGNUM);
+ emit_insn (gen_rtx_SET (cond, gen_rtx_COMPARE (mode_cc, rval, oldval)));
+
+ x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
+ x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
+ gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
+ emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
+
+ /* Exclusively store new item. Store clobbers CC reg. */
+ if(mode == SImode)
+ emit_insn (gen_arc_store_exclusivesi (mem, newval));
+ else /* DImode */
+ emit_insn (gen_arc_store_exclusivedi (mem, newval));
+
+ if (!is_weak)
+ {
+ /* Check the result of the store. */
+ cond = gen_rtx_REG (CC_Zmode, CC_REGNUM);
+ x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
+ x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
+ gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
+ emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
+ }
+
+ if (mod_f != MEMMODEL_RELAXED)
+ emit_label (label2);
+
+ arc64_post_atomic_barrier (mod_s);
+
+ if (mod_f == MEMMODEL_RELAXED)
+ emit_label (label2);
+}
+
+/* Expander for casesi. The vector table is always PC-relative, and
+ it is made up of branch instructions. When we have CODE_DENSITY
+ option enabled, we use BI instruction, otherwise, depending on the
+ memory model, an emulation of it. We use the same emulation
+ contruction, for PIC or LARGE memory model. For a non-pic
+ SMALL/MEDIUM memory model, we make use of a single add2 instruction
+ which has one input the address of the start dispatch table, and
+ the other input indicates where we jump in the table. */
+
+void arc64_expand_casesi (rtx operands[])
+{
+ rtx reg;
+
+ if (operands[1] != const0_rtx)
+ {
+ reg = gen_reg_rtx (SImode);
+ operands[1] = GEN_INT (trunc_int_for_mode (-INTVAL (operands[1]),
+ SImode));
+ emit_insn (gen_addsi3 (reg, operands[0], operands[1]));
+ operands[0] = reg;
+ }
+ emit_unlikely_jump (gen_cbranchsi4 (gen_rtx_GTU (SImode, operands[0],
+ operands[2]),
+ operands[0], operands[2], operands[4]));
+
+ if (!TARGET_CODE_DENSITY)
+ {
+ switch (arc64_cmodel_var)
+ {
+ case ARC64_CMODEL_SMALL:
+ if (!flag_pic)
+ {
+ reg = gen_reg_rtx (SImode);
+ emit_insn (gen_casesi_addaddr (reg, operands[0], operands[3]));
+ operands[0] = reg;
+ break;
+ }
+ /* Fall through */
+ case ARC64_CMODEL_MEDIUM:
+ case ARC64_CMODEL_LARGE:
+ {
+ gcc_assert (word_mode == DImode);
+ /* Same code is used for PIC and large memory model. */
+ rtx lbl = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
+ rtx tmp = gen_reg_rtx (DImode);
+ reg = gen_reg_rtx (DImode);
+ emit_insn (gen_rtx_SET (reg,
+ gen_rtx_UNSPEC (DImode,
+ gen_rtvec (1, lbl),
+ ARC64_UNSPEC_PCREL)));
+ emit_insn (gen_casesi_addaddrdi (tmp, operands[0], reg));
+ emit_jump_insn (gen_casesi_dispatchdi (tmp, operands[3]));
+ return;
+ }
+ default:
+ gcc_unreachable ();
+ }
+ }
+
+ emit_jump_insn (gen_casesi_dispatch (operands[0], operands[3]));
+}
+
+bool
+arc64_allow_direct_access_p (rtx op)
+{
+ return (arc64_get_symbol_type (op) == ARC64_LO32);
+}
+
+/* Decide if mov simd instruction needs to be split. Return TRUE if
+ so. This procedure is required when the vector length is larger
+ than 64 bit. */
+bool
+arc64_split_double_move_p (rtx *operands, machine_mode mode)
+{
+ rtx op0 = operands[0];
+ rtx op1 = operands[1];
+
+ /* Split only double moves. */
+ if (GET_MODE_SIZE (mode) < (UNITS_PER_WORD * 2))
+ return false;
+
+ if (register_operand (op0, mode) && register_operand (op1, mode))
+ {
+ /* Check if we can use vadd2 instruction as a mov. */
+ if (TARGET_SIMD
+ && !FLOAT_MODE_P (mode)
+ && !TARGET_64BIT
+ && (GET_MODE_SIZE (mode) == (2 * UNITS_PER_WORD)))
+ {
+ /* If both registers are even-numbered, fallback to vadd2. */
+ if (((REGNO (op0) & 0x01) == 0) && ((REGNO (op1) & 0x01) == 0))
+ return false;
+ else
+ return true;
+ }
+
+ /* Check for r-reg to f-reg moves. */
+ if (GP_REGNUM_P (REGNO (op0)) || GP_REGNUM_P (REGNO (op1)))
+ return true;
+
+ /* Sanity check for vfmov instruction. */
+ gcc_assert (arc64_fsimd_register (op0, mode)
+ && arc64_fsimd_register (op1, mode));
+ return false;
+ }
+
+ /* Check if we have 64/128bit moves. */
+ if (DOUBLE_LOAD_STORE
+ && ((memory_operand (op0, mode) && REG_P (op1))
+ || (memory_operand (op1, mode) && REG_P (op0))))
+ {
+ gcc_assert (GET_MODE_SIZE (mode) == (UNITS_PER_WORD * 2));
+ /* Sanity check for wide st/ld instructions. */
+ if (REG_P (op0) && ((REGNO (op0) & 0x01) != 0))
+ return true;
+ if (REG_P (op1) && ((REGNO (op1) & 0x01) != 0))
+ return true;
+ return false;
+ }
+
+ /* Evereything else is going for a split. */
+ return true;
+}
+
+/* This is the actual routine which splits a move simd to smaller
+ bits. */
+void
+arc64_split_double_move (rtx *operands, machine_mode mode)
+{
+ rtx op0 = operands[0];
+ rtx op1 = operands[1];
+ rtx lo, hi, mem_lo, mem_hi, src, dst;
+ unsigned int rdst, rsrc, i;
+ unsigned iregs = CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
+ bool swap_p = false;
+ machine_mode mvmode = smallest_int_mode_for_size (BITS_PER_WORD);
+
+ /* Maximum size handled is twice UNITS_PER_WORD. */
+ gcc_assert (iregs <= 2);
+
+ /* This procedure works as long as the width of the fp regs is the
+ same as the width of r regs. */
+ if (FLOAT_MODE_P (mode))
+ {
+ gcc_assert (UNITS_PER_WORD == UNITS_PER_FP_REG);
+ mvmode = float_mode_for_size (BITS_PER_WORD).require ();
+ }
+
+ /* Split reg-reg move. */
+ if (REG_P (op0) && REG_P (op1))
+ {
+ rdst = REGNO (op0);
+ rsrc = REGNO (op1);
+
+ if (!reg_overlap_mentioned_p (op0, op1)
+ || rdst < rsrc)
+ /* The fp regs will never overlap r-regs. However, this
+ procedure can be used also for r-reg to r-regs splits. */
+ for (i = 0; i < iregs; i++)
+ emit_move_insn (gen_rtx_REG (mvmode, rdst + i),
+ gen_rtx_REG (mvmode, rsrc + i));
+ else
+ for (i = 0; i < iregs; i++)
+ emit_move_insn (gen_rtx_REG (mvmode, rdst + iregs - i - 1),
+ gen_rtx_REG (mvmode, rsrc + iregs - i - 1));
+ return;
+ }
+
+ /* Split mem-reg moves. */
+ gcc_assert (REG_P (op0) || REG_P (op1));
+
+ if (REG_P (op1))
+ {
+ src = op1;
+ dst = op0;
+ }
+ else
+ {
+ src = op0;
+ dst = op1;
+ }
+
+ lo = gen_lowpart (mvmode, src);
+ hi = gen_highpart_mode (mvmode, mode, src);
+
+ if (auto_inc_p (XEXP (dst, 0)))
+ {
+ rtx offset, reg, next, addr = XEXP (dst, 0);
+ enum rtx_code code = GET_CODE (addr);
+
+ switch (code)
+ {
+ case PRE_INC:
+ offset = GEN_INT (GET_MODE_SIZE (mode));
+ code = PRE_MODIFY;
+ break;
+ case PRE_DEC:
+ offset = GEN_INT (-GET_MODE_SIZE (mode));
+ code = PRE_MODIFY;
+ break;
+ case POST_MODIFY:
+ case PRE_MODIFY:
+ offset = XEXP (XEXP (addr, 1), 1);
+ break;
+ case POST_INC:
+ offset = GEN_INT (GET_MODE_SIZE (mode));
+ code = POST_MODIFY;
+ break;
+ case POST_DEC:
+ offset = GEN_INT (-GET_MODE_SIZE (mode));
+ code = POST_MODIFY;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ reg = XEXP (addr, 0);
+ next = gen_rtx_fmt_ee (code, Pmode, reg,
+ gen_rtx_PLUS (Pmode, reg, offset));
+
+ switch (code)
+ {
+ case POST_MODIFY:
+ /* We need to swap lo/hi order such that we emit first the
+ hi-load with an offset, and last the post modify
+ instruction. Thus the code can handle any type of auto
+ increment address. */
+ mem_lo = adjust_automodify_address (dst, mvmode, next, 0);
+ next = plus_constant (Pmode, reg, GET_MODE_SIZE (mvmode));
+ mem_hi = adjust_automodify_address (dst, mvmode, next,
+ GET_MODE_SIZE (mvmode));
+ swap_p = true;
+ break;
+ case PRE_MODIFY:
+ mem_lo = adjust_automodify_address (dst, mvmode, next, 0);
+ next = plus_constant (Pmode, reg, GET_MODE_SIZE (mvmode));
+ mem_hi = adjust_automodify_address (dst, mvmode, next,
+ GET_MODE_SIZE (mvmode));
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ }
+ else if (GET_CODE (XEXP (dst, 0)) == UNSPEC)
+ {
+ /* For rare situations when we need to split a PIC address. */
+ rtx addr = XEXP (dst, 0);
+ switch (XINT (addr, 1))
+ {
+ case ARC64_UNSPEC_PCREL:
+ addr = XVECEXP (addr, 0, 0);
+ addr = plus_constant (Pmode, addr, GET_MODE_SIZE (mvmode));
+ addr = gen_sym_unspec (addr, ARC64_UNSPEC_PCREL);
+ break;
+
+ default:
+ /* Fail for anything else. */
+ gcc_unreachable ();
+ }
+
+ mem_lo = adjust_address (dst, mvmode, 0);
+ mem_hi = adjust_automodify_address (mem_lo, GET_MODE (mem_lo),
+ addr, GET_MODE_SIZE (mvmode));
+ }
+ else
+ {
+ mem_lo = adjust_address (dst, mvmode, 0);
+ mem_hi = arc64_move_pointer (mem_lo, GET_MODE_SIZE (mvmode));
+ /* Catching scenarios like:
+ ld r0, [r0, 4] (ld lo, [mem_lo])
+ ld r1, [r0, 8] (ld hi, [mem_hi])
+
+ And setting the trigger (swap_p) to convert them to:
+ ld r1, [r0, 8]
+ ld r0, [r0, 4] */
+ if (reg_overlap_mentioned_p (lo, mem_lo))
+ swap_p = true;
+ }
+
+ if (REG_P (op1))
+ {
+ if (!swap_p)
+ emit_move_insn (mem_lo, lo);
+ emit_move_insn (mem_hi, hi);
+ if (swap_p)
+ emit_move_insn (mem_lo, lo);
+ }
+ else
+ {
+ if (!swap_p)
+ emit_move_insn (lo, mem_lo);
+ emit_move_insn (hi, mem_hi);
+ if (swap_p)
+ emit_move_insn (lo, mem_lo);
+ }
+}
+
+/* What mode to use when copying N-bits of data.
+
+ HS5x
+ n >= 64: copy_mode()
+ n >= 32: SFmode if FP_MOVE
+ SImode otherwise
+ n >= 16: HFmode if FP_MOVE
+ HImode otherwise
+ n >= 8: QImode
+
+ HS6x
+ n >= 128: copy_mode()
+ n >= 64: DFmode if FP_MOVE
+i DImode otherwise
+ n >= 32: SFmode if FP_MOVE
+i SImode otherwise
+ n >= 16: HFmode if FP_MOVE
+i HImode otherwise
+ n >= 8: QImode
+
+ Note about the "return ((machine_mode) (FP ? Fmode : Imode))":
+ GCC 8.3 gives a warning about "int to machine_mode" conversion if we
+ don't use the explicit "((machine_mode) ...)" casting, while it is
+ absolutely OK with "retun [F|I]mode;" separately.
+*/
+
+static machine_mode
+cpymem_copy_mode_for_n (int n)
+{
+ /* HS6x. */
+ if (TARGET_64BIT)
+ {
+ if (n >= 128)
+ return cpymem_copy_mode ();
+ else if (n >= 64)
+ return ((machine_mode) (TARGET_FP_MOVE ? DFmode : DImode));
+ /* fall-thru. */
+ }
+ /* HS5x. */
+ else
+ {
+ if (n >= 64)
+ return cpymem_copy_mode ();
+ /* fall-thru. */
+ }
+
+ if (n >= 32)
+ return ((machine_mode) (TARGET_FP_MOVE ? SFmode : SImode));
+ else if (n >= 16)
+ return ((machine_mode) (TARGET_FP_MOVE ? HFmode : HImode));
+ else
+ return QImode;
+}
+
+/* Returns the bit size (of a mode) that is big enough to
+ handle the remaining N-bits of data.
+
+ This function is not expected to be called for Ns that
+ are too big for the architecture to swallow. e.g. for
+ an HS5x target without 64-bit load/store support, any
+ N > 32 is not expected. */
+
+static int
+cpymem_smallest_bigger_mode_bitsize (int n)
+{
+ if (n <= 8)
+ return 8; /* QImode. */
+ else if (n <= 16)
+ return 16; /* H{I|F}mode. */
+ else if (n <= 32)
+ return 32; /* S{I|F}mode. */
+ else if (n <= 64)
+ {
+ /* a 64-bit arch or a 32-bit arch with double load/stores. */
+ if (TARGET_64BIT || TARGET_LL64)
+ return 64; /* {DI|DF|V2SF}mode. */
+
+ /* This functions mustn't have been called. */
+ gcc_unreachable ();
+ }
+ else if (n <= 128)
+ {
+ if (TARGET_64BIT && TARGET_WIDE_LDST)
+ return 128; /* {TI|V2DF}mode. */
+ /* Fall-thru. */
+ }
+
+ gcc_unreachable ();
+}
+
+/* Expand cpymem, as if from a __builtin_memcpy. Return true if
+ we succeed, otherwise return false. */
+
+bool
+arc64_expand_cpymem (rtx *operands)
+{
+ int n, mode_bits;
+ rtx dst = operands[0];
+ rtx src = operands[1];
+ rtx base;
+ machine_mode cur_mode;
+ bool speed_p = !optimize_function_for_size_p (cfun);
+
+ /* When optimizing for size, give a better estimate of the length of a
+ memcpy call, but use the default otherwise. Moves larger than 8 bytes
+ will always require an even number of instructions to do now. And each
+ operation requires both a load+store, so devide the max number by 2. */
+ int max_num_moves = (speed_p ? 16 : ARC64_CALL_RATIO) / 2;
+ /* In case of double moves, double the threshold. */
+ if (DOUBLE_LOAD_STORE)
+ max_num_moves *= 2;
+
+ /* We can't do anything smart if the amount to copy is not constant. */
+ if (!CONST_INT_P (operands[2]))
+ return false;
+
+ n = INTVAL (operands[2]);
+
+ /* Try to keep the number of instructions low. For all cases we will do at
+ most two moves for the residual amount, since we'll always overlap the
+ remainder. */
+ const int divisor = GET_MODE_SIZE (cpymem_copy_mode ());
+ if (((n / divisor) + (n % divisor ? 2 : 0)) > max_num_moves)
+ return false;
+
+ base = copy_to_mode_reg (Pmode, XEXP (dst, 0));
+ dst = adjust_automodify_address (dst, VOIDmode, base, 0);
+
+ base = copy_to_mode_reg (Pmode, XEXP (src, 0));
+ src = adjust_automodify_address (src, VOIDmode, base, 0);
+
+ /* Convert n to bits to make the rest of the code simpler. */
+ n = n * BITS_PER_UNIT;
+
+ while (n > 0)
+ {
+ cur_mode = cpymem_copy_mode_for_n (n);
+
+ mode_bits = GET_MODE_BITSIZE (cur_mode);
+ arc64_copy_one_block_and_progress_pointers (&src, &dst, cur_mode);
+
+ n -= mode_bits;
+
+ /* Do certain trailing copies as overlapping if it's going to be
+ cheaper. i.e. less instructions to do so. For instance doing a 15
+ byte copy it's more efficient to do two overlapping 8 byte copies than
+ 8 + 4 + 2 + 1. */
+ if (n > 0 && n < (BITS_PER_UNIT * divisor))
+ {
+ int n_bits = cpymem_smallest_bigger_mode_bitsize (n);
+ src = arc64_move_pointer (src, (n - n_bits) / BITS_PER_UNIT);
+ dst = arc64_move_pointer (dst, (n - n_bits) / BITS_PER_UNIT);
+ n = n_bits;
+ }
+ }
+
+ return true;
+}
+
+/* Provide a mapping from gcc register numbers to dwarf register numbers. */
+unsigned
+arc64_dbx_register_number (unsigned regno)
+{
+ if (GP_REGNUM_P (regno))
+ return regno;
+ else if (FP_REGNUM_P (regno))
+ return 128 + regno - F0_REGNUM;
+
+ /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
+ equivalent DWARF register. */
+ return DWARF_FRAME_REGISTERS;
+}
+
+#if 0
+/* Expand fp vector shift right pattern. Can handle maximum 128bit
+ SIMD vectors.
+
+ +----+----+----+----+----+----+----+----+
+ | h7 | h6 | h5 | h4 | h3 | h2 | h1 | h0 |
+ | s3 | s2 | s1 | s0 |
+ | d1 | d0 |
+ +----+----+----+----+----+----+----+----+
+
+ */
+
+bool
+arc64_expand_fvect_shr (rtx *operands)
+{
+ rtx op0 = operands[0];
+ rtx op1 = operands[1];
+ rtx op2 = operands[2];
+ rtx t0;
+ machine_mode mode = GET_MODE (op0);
+ scalar_int_mode imode = int_mode_for_mode (mode).require ();
+ unsigned int ival = INTVAL (op2);
+
+ if (ARC64_VFP_128 && (ival == 64))
+ {
+ emit_move_insn (gen_lowpart (DFmode, op0), gen_highpart (DFmode, op1));
+ return true;
+ }
+ else if (ARC64_VFP_64 && (ival == 32))
+ {
+ t0 = gen_reg_rtx (SFmode);
+
+ emit_insn (gen_vec_extractv2sfsf (t0,
+ gen_lowpart (V2SFmode, op1),
+ GEN_INT (1)));
+ emit_insn (gen_vec_setv2sf (gen_lowpart (V2SFmode, op0),
+ t0, GEN_INT (0)));
+ return true;
+ }
+ else if (ARC64_VFP_32 && (ival == 16))
+ {
+ t0 = gen_reg_rtx (HFmode);
+
+ emit_insn (gen_vec_extractv2hfhf (t0, op1, GEN_INT (1)));
+ emit_insn (gen_vec_setv2hf (op0, t0, GEN_INT (0)));
+ return true;
+ }
+
+ t0 = gen_reg_rtx (imode);
+ rtx shift = expand_binop (imode, lshr_optab,
+ gen_lowpart (imode, op1), op2,
+ NULL_RTX, true, OPTAB_DIRECT);
+ emit_move_insn (t0, shift);
+ emit_move_insn (op0, gen_lowpart (mode, t0));
+ return true;
+}
+#endif
+
+/* Return TRUE if SYM requires a PLT34 reloc. The instruction is
+ valid, hence any symbol which its type is LPIC is valid for
+ instruction, see arc64_is_long_call_p. */
+
+bool
+arc64_use_plt34_p (rtx sym)
+{
+ return (arc64_get_symbol_type (sym) == ARC64_LPIC);
+}
+
+/* Determine if it's legal to put X into the constant pool. By all means, it is
+ not ok to put a symbol in a constant pool. We arive here in the case of a
+ TLS symbol which needs to be precomputed. We force this in
+ legitimize_constant_p. */
+
+static bool
+arc64_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED,
+ rtx x)
+{
+ return contains_symbol_ref_p (x) || tls_referenced_p (x);
+}
+
+/* Generate RTL for conditional branch with rtx comparison CODE in mode
+ CC_MODE. */
+
+void
+arc64_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
+ rtx label_ref)
+{
+ rtx x;
+ x = gen_rtx_fmt_ee (code, VOIDmode, gen_rtx_REG (cc_mode, CC_REGNUM),
+ const0_rtx);
+ x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
+ gen_rtx_LABEL_REF (VOIDmode, label_ref),
+ pc_rtx);
+ emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
+}
+
+/* True if the dependency between OUT_INSN and IN_INSN is on the accumulator
+ register. IN_INSN is a mac type of instruction. */
+
+int
+accumulator_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
+{
+ rtx in_set = single_set (in_insn);
+ rtx out_set = single_set (out_insn);
+
+ if (!in_set || !out_set)
+ return false;
+
+ if (!REG_P (SET_DEST (out_set)) || (REGNO (SET_DEST (out_set)) != R58_REGNUM))
+ return false;
+
+ rtx tmp = SET_SRC (in_set);
+ if (GET_CODE (tmp) == PLUS && GET_CODE (XEXP (tmp, 0)) == MULT)
+ return true;
+ return true;
+}
+
+/* True if IN_INSN is setting the accumulator. */
+
+int
+set_accumulator_p (rtx_insn *out_insn ATTRIBUTE_UNUSED,
+ rtx_insn *in_insn)
+{
+ rtx in_set = single_set (in_insn);
+ if (!in_set)
+ return false;
+
+ if (REG_P (SET_DEST (in_set)) && (REGNO (SET_DEST (in_set)) == R58_REGNUM))
+ return true;
+ return false;
+}
+
+/* Return 'return' instruction. */
+
+const char *
+arc64_output_return (void)
+{
+ if (ARC_NAKED_P (cfun->machine->fn_type))
+ return "";
+
+ return "j_s%*\t[blink]";
+}
+
+/* Return nonzero if register FROM_REGNO can be renamed to register
+ TO_REGNO. */
+
+bool
+arc64_hard_regno_rename_ok (unsigned from_regno ATTRIBUTE_UNUSED,
+ unsigned to_regno)
+{
+ /* Interrupt functions can only use registers that have already been saved by
+ the prologue, even if they would normally be call-clobbered. */
+ return (!ARC_INTERRUPT_P (cfun->machine->fn_type)
+ || df_regs_ever_live_p (to_regno));
+}
+
+/* Emit the RTX necessary to initialize the vector TARGET with values in
+ VALS. */
+
+void
+arc64_expand_vector_init (rtx target, rtx vals)
+{
+ machine_mode mode = GET_MODE (target);
+ machine_mode inner_mode = GET_MODE_INNER (mode);
+ int n_elts = GET_MODE_NUNITS (mode);
+ int i;
+ rtx elem[4], tmp[2];
+
+ gcc_assert (n_elts <= 4);
+ for (i = 0; i < n_elts; i++)
+ {
+ elem[i] = XVECEXP (vals, 0, i);
+ if (!register_operand (elem[i], GET_MODE (elem[i])))
+ elem[i] = force_reg (inner_mode, elem[i]);
+ }
+
+ switch (mode)
+ {
+ case V4HImode:
+ tmp[0] = gen_reg_rtx (mode);
+ tmp[1] = gen_reg_rtx (mode);
+ emit_insn (gen_arc64_vpack_v4hihi (tmp[0], elem[0], elem[1]));
+ emit_insn (gen_arc64_vpack_v4hihi (tmp[1], elem[2], elem[3]));
+ emit_insn (gen_arc64_sel_lane2_0v4hi (target, tmp[0], tmp[1]));
+ break;
+
+ case V2SImode:
+ emit_insn (gen_arc64_vpack_v2sisi (target, elem[0], elem[1]));
+ break;
+
+ case V2HImode:
+ emit_insn (gen_arc64_vpack_v2hihi (target, elem[0], elem[1]));
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+}
+
+/* Target hooks. */
+
+#undef TARGET_ASM_ALIGNED_DI_OP
+#define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
+
+#undef TARGET_ASM_ALIGNED_HI_OP
+#define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
+
+#undef TARGET_ASM_ALIGNED_SI_OP
+#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
+
+#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
+#define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
+ hook_bool_const_tree_hwi_hwi_const_tree_true
+
+#undef TARGET_ASM_OUTPUT_MI_THUNK
+#define TARGET_ASM_OUTPUT_MI_THUNK arc64_output_mi_thunk
+
+#undef TARGET_CAN_ELIMINATE
+#define TARGET_CAN_ELIMINATE arc64_can_eliminate
+
+#undef TARGET_FRAME_POINTER_REQUIRED
+#define TARGET_FRAME_POINTER_REQUIRED arc64_frame_pointer_required
+
+#undef TARGET_LEGITIMATE_ADDRESS_P
+#define TARGET_LEGITIMATE_ADDRESS_P arc64_legitimate_address_p
+
+#undef TARGET_LEGITIMATE_CONSTANT_P
+#define TARGET_LEGITIMATE_CONSTANT_P arc64_legitimate_constant_p
+
+#undef TARGET_RETURN_IN_MEMORY
+#define TARGET_RETURN_IN_MEMORY arc64_return_in_memory
+
+/* Passing arguments. */
+#undef TARGET_PASS_BY_REFERENCE
+#define TARGET_PASS_BY_REFERENCE arc64_pass_by_reference
+
+#undef TARGET_SETUP_INCOMING_VARARGS
+#define TARGET_SETUP_INCOMING_VARARGS arc64_setup_incoming_varargs
+
+#undef TARGET_MUST_PASS_IN_STACK
+#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
+
+#undef TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE arc64_function_value
+
+#undef TARGET_FUNCTION_VALUE_REGNO_P
+#define TARGET_FUNCTION_VALUE_REGNO_P arc64_function_value_regno_p
+
+#undef TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG arc64_function_arg
+
+#undef TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE arc64_function_arg_advance
+
+#undef TARGET_ARG_PARTIAL_BYTES
+#define TARGET_ARG_PARTIAL_BYTES arc64_arg_partial_bytes
+
+#undef TARGET_STRICT_ARGUMENT_NAMING
+#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
+
+#undef TARGET_COMPUTE_FRAME_LAYOUT
+#define TARGET_COMPUTE_FRAME_LAYOUT arc64_compute_frame_info
+
+#undef TARGET_HARD_REGNO_NREGS
+#define TARGET_HARD_REGNO_NREGS arc64_hard_regno_nregs
+
+#undef TARGET_HARD_REGNO_MODE_OK
+#define TARGET_HARD_REGNO_MODE_OK arc64_hard_regno_mode_ok
+
+#undef TARGET_MODES_TIEABLE_P
+#define TARGET_MODES_TIEABLE_P arc64_modes_tieable_p
+
+#undef TARGET_PRINT_OPERAND
+#define TARGET_PRINT_OPERAND arc64_print_operand
+
+#undef TARGET_PRINT_OPERAND_ADDRESS
+#define TARGET_PRINT_OPERAND_ADDRESS arc64_print_operand_address
+
+#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
+#define TARGET_PRINT_OPERAND_PUNCT_VALID_P arc64_print_operand_punct_valid_p
+
+#undef TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT arc64_initialize_trampoline
+
+#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
+#define TARGET_ASM_TRAMPOLINE_TEMPLATE arc64_asm_trampoline_template
+
+#undef TARGET_HAVE_SPECULATION_SAFE_VALUE
+#define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed
+
+#undef TARGET_FUNCTION_OK_FOR_SIBCALL
+#define TARGET_FUNCTION_OK_FOR_SIBCALL arc64_function_ok_for_sibcall
+
+#undef TARGET_INIT_LIBFUNCS
+#define TARGET_INIT_LIBFUNCS arc64_init_libfuncs
+
+#undef TARGET_ASM_FILE_END
+#define TARGET_ASM_FILE_END file_end_indicate_exec_stack
+
+#undef TARGET_ASM_FUNCTION_PROLOGUE
+#define TARGET_ASM_FUNCTION_PROLOGUE arc64_output_function_prologue
+
+#undef TARGET_CONSTANT_ALIGNMENT
+#define TARGET_CONSTANT_ALIGNMENT constant_alignment_word_strings
+
+#undef TARGET_PROMOTE_FUNCTION_MODE
+#define TARGET_PROMOTE_FUNCTION_MODE \
+ default_promote_function_mode_always_promote
+
+/* To be checked if it is better without it. */
+#undef TARGET_PROMOTE_PROTOTYPES
+#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
+
+#undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
+#define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arc64_output_addr_const_extra
+
+#undef TARGET_INIT_BUILTINS
+#define TARGET_INIT_BUILTINS arc64_init_builtins
+
+#undef TARGET_EXPAND_BUILTIN
+#define TARGET_EXPAND_BUILTIN arc64_expand_builtin
+
+#undef TARGET_BUILTIN_DECL
+#define TARGET_BUILTIN_DECL arc64_builtin_decl
+
+/* Having TLS support, we turn R30 fixed as well. */
+#ifdef HAVE_AS_TLS
+#undef TARGET_HAVE_TLS
+#define TARGET_HAVE_TLS HAVE_AS_TLS
+#endif
+
+#undef TARGET_LRA_P
+#define TARGET_LRA_P hook_bool_void_true
+
+#undef TARGET_INSN_COST
+#define TARGET_INSN_COST arc64_insn_cost
+
+#undef TARGET_MACHINE_DEPENDENT_REORG
+#define TARGET_MACHINE_DEPENDENT_REORG arc64_reorg
+
+#undef TARGET_CONDITIONAL_REGISTER_USAGE
+#define TARGET_CONDITIONAL_REGISTER_USAGE arc64_conditional_register_usage
+
+#undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
+#define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
+arc64_libgcc_floating_mode_supported_p
+
+#undef TARGET_SCALAR_MODE_SUPPORTED_P
+#define TARGET_SCALAR_MODE_SUPPORTED_P arc64_scalar_mode_supported_p
+
+#undef TARGET_SPLIT_COMPLEX_ARG
+#define TARGET_SPLIT_COMPLEX_ARG arc64_split_complex_arg
+
+/* Vectors. */
+#undef TARGET_VECTOR_MODE_SUPPORTED_P
+#define TARGET_VECTOR_MODE_SUPPORTED_P arc64_vector_mode_supported_p
+
+#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
+#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arc64_preferred_simd_mode
+
+#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
+#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
+ arc64_autovectorize_vector_modes
+
+#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
+#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
+ arc64_builtin_vectorization_cost
+
+#undef TARGET_REGISTER_MOVE_COST
+#define TARGET_REGISTER_MOVE_COST arc64_register_move_cost
+
+#undef TARGET_VECTORIZE_VEC_PERM_CONST
+#define TARGET_VECTORIZE_VEC_PERM_CONST arc64_vectorize_vec_perm_const
+
+#undef TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS arc64_rtx_costs_wrapper
+
+#undef TARGET_ADDRESS_COST
+#define TARGET_ADDRESS_COST arc64_address_cost
+
+/* Scheduling. */
+#undef TARGET_SCHED_MACRO_FUSION_P
+#define TARGET_SCHED_MACRO_FUSION_P arc64_macro_fusion_p
+
+#undef TARGET_SCHED_MACRO_FUSION_PAIR_P
+#define TARGET_SCHED_MACRO_FUSION_PAIR_P arc64_macro_fusion_pair_p
+
+/* Disable the speculation when filling delay slots. In general we get better
+ (speed) results but not for EEMBC's text01 benchmark. Disabling delay slot
+ filler speculation is needed to conserve the loops body size as calculated in
+ machine reorg phase. More info see github issue#416. */
+#undef TARGET_NO_SPECULATION_IN_DELAY_SLOTS_P
+#define TARGET_NO_SPECULATION_IN_DELAY_SLOTS_P hook_bool_void_true
+
+#undef TARGET_CANNOT_FORCE_CONST_MEM
+#define TARGET_CANNOT_FORCE_CONST_MEM arc64_cannot_force_const_mem
+
+#undef TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE arc64_override_options
+
+/* CC regs optimizations. */
+#undef TARGET_FIXED_CONDITION_CODE_REGS
+#define TARGET_FIXED_CONDITION_CODE_REGS arc64_fixed_condition_code_regs
+
+#undef TARGET_FLAGS_REGNUM
+#define TARGET_FLAGS_REGNUM CC_REGNUM
+
+#undef TARGET_SET_CURRENT_FUNCTION
+#define TARGET_SET_CURRENT_FUNCTION arc64_set_current_function
+
+#undef TARGET_ATTRIBUTE_TABLE
+#define TARGET_ATTRIBUTE_TABLE arc64_attribute_table
+
+#undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
+#define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arc64_allocate_stack_slots_for_args
+
+#undef TARGET_WARN_FUNC_RETURN
+#define TARGET_WARN_FUNC_RETURN arc64_warn_func_return
+
+#undef TARGET_CAN_FOLLOW_JUMP
+#define TARGET_CAN_FOLLOW_JUMP arc64_can_follow_jump
+
+#undef TARGET_SCHED_ISSUE_RATE
+#define TARGET_SCHED_ISSUE_RATE arc64_sched_issue_rate
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+#include "gt-arc64.h"
diff --git a/gcc/config/arc64/arc64.h b/gcc/config/arc64/arc64.h
new file mode 100644
index 0000000000000..e95bf2037251a
--- /dev/null
+++ b/gcc/config/arc64/arc64.h
@@ -0,0 +1,736 @@
+/* Machine description for ARC64 architecture.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING3. If not see
+ . */
+
+#ifndef GCC_ARC64_H
+#define GCC_ARC64_H
+
+/* Bits are always numbered from the LSBit. */
+#define BITS_BIG_ENDIAN 0
+
+/* Define this if most significant byte of a word is the lowest numbered. */
+#define BYTES_BIG_ENDIAN 0
+
+/* Define this if most significant word of a multiword number is the lowest
+ numbered. */
+#define WORDS_BIG_ENDIAN 0
+
+/* Is the 64bit or 32bit variant of the CPU used? */
+#define TARGET_64BIT arc64_target_64bit
+
+/* Determine TARGET_ARCH64 in all possible cases. */
+#ifdef IN_LIBGCC2
+#if defined(__ARC64_ARCH64__)
+#define TARGET_ARCH64 1
+#else
+#define TARGET_ARCH64 0
+#endif
+#else /* not IN_LIBGCC2 */
+#define TARGET_ARCH64 TARGET_64BIT
+#endif
+
+#define MAX_BITS_PER_WORD 64
+
+/* Width of a word, in units (bytes). */
+#define UNITS_PER_WORD (TARGET_ARCH64 ? 8 : 4)
+#ifndef IN_LIBGCC2
+#define MIN_UNITS_PER_WORD 4
+#endif
+
+/* Width of a fp register, in bytes. */
+#define UNITS_PER_FP_REG ((arc64_fp_model == 2) ? 8 : 4)
+
+/* Maximum number of registers that can appear in a valid memory
+ address. N.B. The ld insn allows 2, but the st insn only allows
+ 1. */
+#define MAX_REGS_PER_ADDRESS 2
+
+/* Addressing modes. */
+#define HAVE_PRE_INCREMENT 1
+#define HAVE_PRE_DECREMENT 1
+#define HAVE_POST_INCREMENT 1
+#define HAVE_POST_DECREMENT 1
+#define HAVE_PRE_MODIFY_DISP 1
+#define HAVE_POST_MODIFY_DISP 1
+#define HAVE_PRE_MODIFY_REG 1
+#define HAVE_POST_MODIFY_REG 1
+
+/* The number of registers used for parameter passing. Local to this
+ file. */
+#define MAX_ARC64_PARM_REGS 8
+
+/* 1 if N is a possible register number for function argument
+ passing. */
+/* Hard floats: r0-r7, and f0-f7. */
+#define FUNCTION_ARG_REGNO_P(N) \
+ (IN_RANGE ((N), R0_REGNUM, R7_REGNUM) \
+ || (ARC64_HAS_FP_BASE && IN_RANGE ((N), F0_REGNUM, F7_REGNUM)))
+
+/* Boundaries. */
+#define PARM_BOUNDARY BITS_PER_WORD
+#define STACK_BOUNDARY POINTER_SIZE
+#define FUNCTION_BOUNDARY 32
+#define EMPTY_FIELD_BOUNDARY 32
+#define STRUCTURE_SIZE_BOUNDARY 8
+
+/* Look at the fundamental type that is used for a bit-field and use
+ that to impose alignment on the enclosing structure. struct s {int
+ a:8}; should have same alignment as "int", not "char". */
+#define PCC_BITFIELD_TYPE_MATTERS 1
+
+/* Alignments. */
+#define FASTEST_ALIGNMENT BITS_PER_WORD
+/* pr64242.c is one interesting test which changing BIGGEST_ALIGNMENT triggers
+ errors. */
+#define BIGGEST_ALIGNMENT BITS_PER_WORD
+#define ARC64_EXPAND_ALIGNMENT(COND, EXP, ALIGN) \
+ (((COND) && ((ALIGN) < FASTEST_ALIGNMENT) \
+ && (TREE_CODE (EXP) == ARRAY_TYPE)) ? FASTEST_ALIGNMENT : (ALIGN))
+
+/* Align global data. */
+#define DATA_ALIGNMENT(EXP, ALIGN) \
+ ARC64_EXPAND_ALIGNMENT (!optimize_size, EXP, ALIGN)
+
+/* Similarly, make sure that objects on the stack are sensibly
+ aligned. */
+#define LOCAL_ALIGNMENT(EXP, ALIGN) \
+ ARC64_EXPAND_ALIGNMENT (!flag_conserve_stack, EXP, ALIGN)
+
+/* Set this nonzero if move instructions will actually fail to work
+ when given unaligned data. */
+#define STRICT_ALIGNMENT (!unaligned_access)
+
+/* Default unaligned accesses. */
+#ifndef UNALIGNED_ACCESS_DEFAULT
+#define UNALIGNED_ACCESS_DEFAULT 0
+#endif
+
+/* Layout of Source Language Data Types. */
+#define SHORT_TYPE_SIZE 16
+#define INT_TYPE_SIZE 32
+#define LONG_LONG_TYPE_SIZE 64
+#define POINTER_SIZE (TARGET_ARCH64 ? 64 : 32)
+#define LONG_TYPE_SIZE POINTER_SIZE
+
+/* Defined for convenience. */
+#define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
+
+#define FLOAT_TYPE_SIZE 32
+#define DOUBLE_TYPE_SIZE 64
+#define LONG_DOUBLE_TYPE_SIZE 64
+
+/* Defined by ABI. */
+#define WCHAR_TYPE "int"
+#define WCHAR_TYPE_SIZE 32
+
+#define DEFAULT_SIGNED_CHAR 0
+
+#undef SIZE_TYPE
+#define SIZE_TYPE (POINTER_SIZE == 64 ? "long unsigned int" : "unsigned int")
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE (POINTER_SIZE == 64 ? "long int" : "int")
+
+/* Specify the machine mode that the hardware addresses have.
+ After generation of rtl, the compiler makes no further distinction
+ between pointers and any other objects of this machine mode. */
+
+#define Pmode word_mode
+
+/* Mode of a function address in a call instruction (for indexing purposes). */
+#define FUNCTION_MODE Pmode
+
+#ifdef HAVE_AS_TLS
+#define ARC64_TLS_REGNO 1
+#else
+#define ARC64_TLS_REGNO 0
+#endif
+
+/* Register usage:
+ R0-R3 Parameter/result registers
+ R4-R7 Parameter registers
+ R8-R13 Temporary registers
+ R14-R26 Callee-saved registers
+ R27 FP (frame pointer)
+ R28 SP (stack pointer)
+ R29 ILINK (Interrupt link register)
+ R30 GP/TP Global pointer, also it is used as thread pointer;
+ otherwise can be used as a temporary register.
+ R31 BLINK (return register)
+ R32-R57 Extension registers
+ R58 ACC (accumulator)
+ R59 Reserved
+ --- Special registers ---
+ R60 sign-extended 32-bit indicator
+ R61 Reserved
+ R62 zero extended 32-bit immediate indicator
+ R63 PCL (program counter)
+ --- Floating point registers ---
+ F0 Parameter/result register
+ F1-F7 Parameter registers
+ F8-F13 Temporary registers
+ F14-F31 Callee-saved registers
+ -- Fake registers --
+ AP Argument pointer
+ SFP Soft frame pointer
+ CC Status register.
+ */
+
+/* 1 for registers that are not available for the register
+ allocator. */
+#define FIXED_REGISTERS \
+ { \
+ 0, 0, 0, 0, 0, 0, 0, 0, /* R0 - R7 */ \
+ 0, 0, 0, 0, 0, 0, 0, 0, /* R8 - R15 */ \
+ 0, 0, 0, 0, 0, 0, 0, 0, /* R16 - R23 */ \
+ 0, 0, 0, 0, 1, 1, ARC64_TLS_REGNO, 1, /* R24 - R26, FP, SP, ILINK, R30, BLINK */ \
+ \
+ 1, 1, 1, 1, 1, 1, 1, 1, /* R32 - R39 */ \
+ 1, 1, 1, 1, 1, 1, 1, 1, /* R40 - R47 */ \
+ 1, 1, 1, 1, 1, 1, 1, 1, /* R48 - R55 */ \
+ 1, 1, 1, 1, 1, 1, 1, 1, /* R56, R57, ACCL, R59, Specials */ \
+ \
+ 1, 1, 1, 1, 1, 1, 1, 1, /* F0 - F7 */ \
+ 1, 1, 1, 1, 1, 1, 1, 1, /* F8 - F15 */ \
+ 1, 1, 1, 1, 1, 1, 1, 1, /* F16 - F23 */ \
+ 1, 1, 1, 1, 1, 1, 1, 1, /* F24 - F31 */ \
+ \
+ 1, 1, 1, /* AP, SFP, CC */ \
+ }
+
+/* 1 for registers not available across function calls. */
+#define CALL_USED_REGISTERS \
+ { \
+ 1, 1, 1, 1, 1, 1, 1, 1, /* R0 - R7 */ \
+ 1, 1, 1, 1, 1, 1, 0, 0, /* R8 - R15 */ \
+ 0, 0, 0, 0, 0, 0, 0, 0, /* R16 - R23 */ \
+ 0, 0, 0, 0, 1, 1, 1, 1, /* R24 - R26, FP, SP, ILINK, R30, BLINK */ \
+ \
+ 1, 1, 1, 1, 1, 1, 1, 1, /* R32 - R39 */ \
+ 1, 1, 1, 1, 1, 1, 1, 1, /* R40 - R47 */ \
+ 1, 1, 1, 1, 1, 1, 1, 1, /* R48 - R55 */ \
+ 1, 1, 1, 1, 1, 1, 1, 1, /* R56, R57, ACCL, R59, Specials */ \
+ \
+ 1, 1, 1, 1, 1, 1, 1, 1, /* F0 - F7 */ \
+ 1, 1, 1, 1, 1, 1, 1, 1, /* F8 - F15 */ \
+ 1, 1, 1, 1, 1, 1, 1, 1, /* F16 - F23 */ \
+ 1, 1, 1, 1, 1, 1, 1, 1, /* F24 - F31 */ \
+ \
+ 1, 1, 1, /* AP, SFP, CC */ \
+ }
+
+#define REGISTER_NAMES \
+ { \
+ "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", \
+ "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", \
+ "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", \
+ "r24", "r25", "r26", "r27", "sp", "ilink", "r30", "blink", \
+ "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39", \
+ "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47", \
+ "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55", \
+ "r56", "r57", "r58", "r59", "ximm", "rez", "limm", "pcl", \
+ \
+ "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", \
+ "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15", \
+ "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", \
+ "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31", \
+ "ap", "sfp", "cc", \
+ }
+
+#define ADDITIONAL_REGISTER_NAMES \
+ { \
+ { "fp", 27 }, \
+ { "gp", 30 }, \
+ { "acc", 58 }, \
+ }
+
+#define EPILOGUE_USES(REGNO) (arc64_epilogue_uses (REGNO))
+
+/* EXIT_IGNORE_STACK should be nonzero if, when returning from a function,
+ the stack pointer does not matter. This is only true if the function
+ uses alloca. */
+#define EXIT_IGNORE_STACK (cfun->calls_alloca)
+
+#define STATIC_CHAIN_REGNUM R11_REGNUM
+#define HARD_FRAME_POINTER_REGNUM R27_REGNUM
+#define FRAME_POINTER_REGNUM SFP_REGNUM
+#define STACK_POINTER_REGNUM SP_REGNUM
+#define ARG_POINTER_REGNUM AP_REGNUM
+#define FIRST_PSEUDO_REGISTER (CC_REGNUM + 1)
+
+enum reg_class
+{
+ NO_REGS,
+ AC16_REGS,
+ SIBCALL_REGS,
+ CORE_REGS,
+ GENERAL_REGS,
+ FP_REGS,
+ ALL_REGS,
+ LIM_REG_CLASSES
+};
+
+#define N_REG_CLASSES ((int) LIM_REG_CLASSES)
+
+#define REG_CLASS_NAMES \
+{ \
+ "NO_REGS", \
+ "AC16_REGS", \
+ "SIBCALL_REGS", \
+ "CORE_REGS", \
+ "GENERAL_REGS", \
+ "FP_REGS", \
+ "ALL_REGS" \
+}
+
+#define REG_CLASS_CONTENTS \
+{ \
+ { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, /* NO_REGS */ \
+ { 0x0000f00f, 0x00000000, 0x00000000, 0x00000000 }, /* AC16_REGS */ \
+ { 0x00001fff, 0x00000000, 0x00000000, 0x00000000 }, /* SIBCALL_REGS */ \
+ { 0x0000ffff, 0x00000000, 0x00000000, 0x00000000 }, /* CORE_REGS */ \
+ { 0xdfffffff, 0x0fffffff, 0x00000000, 0x00000003 }, /* GENERAL_REGS */ \
+ { 0x00000000, 0x00000000, 0xffffffff, 0x00000000 }, /* FP_REGS */ \
+ { 0xffffffff, 0xffffffff, 0xffffffff, 0x00000007 }, /* ALL_REGS */ \
+}
+
+/* A C expression whose value is a register class containing hard
+ register REGNO. In general there is more that one such class;
+ choose a class which is "minimal", meaning that no smaller class
+ also contains the register. */
+
+#define REGNO_REG_CLASS(REGNO) arc64_regno_to_regclass[ (REGNO) ]
+
+/* A C expression that is nonzero if it is OK to rename a hard register FROM to
+ another hard register TO. */
+
+#define HARD_REGNO_RENAME_OK(FROM, TO) arc64_hard_regno_rename_ok (FROM, TO)
+
+/* The class value for valid base registers. A base register is one used in
+ an address which is the register value plus a displacement. */
+
+#define BASE_REG_CLASS GENERAL_REGS
+#define INDEX_REG_CLASS GENERAL_REGS
+
+/* Definitions for register eliminations.
+
+ This is an array of structures. Each structure initializes one pair
+ of eliminable registers. The "from" register number is given first,
+ followed by "to". Eliminations of the same "from" register are listed
+ in order of preference.
+
+ We have two registers that can be eliminated on the ARC. First, the
+ argument pointer register can always be eliminated in favor of the stack
+ pointer register or frame pointer register. Secondly, the frame pointer
+ register can often be eliminated in favor of the stack pointer register.
+*/
+
+#define ELIMINABLE_REGS \
+ { \
+ { ARG_POINTER_REGNUM, STACK_POINTER_REGNUM }, \
+ { ARG_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM }, \
+ { FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM }, \
+ { FRAME_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM } \
+ }
+
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \
+ (OFFSET) = arc64_initial_elimination_offset (FROM, TO)
+
+/* RTL generation support. */
+#define INIT_EXPANDERS arc64_init_expanders ()
+
+/* Stack layout; function entry, exit and calling. */
+#define STACK_GROWS_DOWNWARD 1
+
+/* Addresses of local variables slots are at negative offsets from the
+ frame pointer. */
+#define FRAME_GROWS_DOWNWARD 1
+
+/* If defined, the maximum amount of space required for outgoing
+ arguments will be computed and placed into the variable
+ `crtl->outgoing_args_size'. No space will be pushed onto the stack
+ for each call; instead, the function prologue should increase the
+ stack frame size by this amount. */
+#define ACCUMULATE_OUTGOING_ARGS 1
+
+/* Offset of first parameter from the argument pointer register
+ value. */
+#define FIRST_PARM_OFFSET(FNDECL) 0
+
+/* Define how to find the value returned by a library function
+ assuming the value has mode MODE. */
+#define LIBCALL_VALUE(MODE) \
+ gen_rtx_REG (MODE, arc64_use_fp_regs (MODE) ? F0_REGNUM : R0_REGNUM)
+
+/* Tell GCC to use RETURN_IN_MEMORY. */
+#define DEFAULT_PCC_STRUCT_RETURN 0
+
+/* To be check: WORD_REGISTER_OPERATIONS, ARC64 has 32bit
+ opertations. */
+#define WORD_REGISTER_OPERATIONS 1
+
+/* Define if loading from memory in MODE, an integral mode narrower than
+ BITS_PER_WORD will either zero-extend or sign-extend. The value of this
+ macro should be the code that says which one of the two operations is
+ implicitly done, or UNKNOWN if none. */
+#define LOAD_EXTEND_OP(MODE) (((MODE) == SImode) ? SIGN_EXTEND : ZERO_EXTEND)
+
+/* Enable wide bitfield accesses for more efficient bitfield code. */
+#define SLOW_BYTE_ACCESS 1
+
+#define NO_FUNCTION_CSE 1
+
+/* Conditional info. */
+#define SELECT_CC_MODE(OP, X, Y) arc64_select_cc_mode (OP, X, Y)
+
+/* Restrictions apply to floating-point comparisons. */
+#define REVERSIBLE_CC_MODE(MODE) ((MODE) != CC_FPUmode && (MODE) != CC_FPUEmode)
+
+/* Returning. */
+#define INCOMING_RETURN_ADDR_RTX gen_rtx_REG (Pmode, BLINK_REGNUM)
+
+#define RETURN_ADDR_RTX arc64_return_addr
+
+/* Define this to be nonzero if shift instructions ignore all but the
+ low-order few bits. */
+#define SHIFT_COUNT_TRUNCATED 1
+
+/* Defines if the CLZ result is undefined or has a useful value. */
+#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
+ ((VALUE) = GET_MODE_SIZE((MODE)) * BITS_PER_UNIT - 1, 2)
+
+/* Defines if the CTZ result is undefined or has a useful value. */
+#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
+ ((VALUE) = GET_MODE_SIZE((MODE)) * BITS_PER_UNIT - 1, 2)
+
+/* Function argument passing. */
+
+/* Define a data type for recording info about an argument list during
+ the scan of that argument list. This data type should hold all
+ necessary information about the function itself and about the args
+ processed so far, enough to enable macros such as FUNCTION_ARG to
+ determine where the next arg should go. */
+#define CUMULATIVE_ARGS struct arc64_args
+struct arc64_args
+{
+ /* Number of integer registers used so far. */
+ int iregs;
+
+ /* Number of floating-point registers used so far. */
+ int fregs;
+};
+
+/* Initialize a variable CUM of type CUMULATIVE_ARGS
+ for a call to a function whose data type is FNTYPE.
+ For a library call, FNTYPE is 0. */
+#define INIT_CUMULATIVE_ARGS(CUM,FNTYPE,LIBNAME,INDIRECT,N_NAMED_ARGS) \
+ ((CUM).iregs = 0, (CUM).fregs = 0)
+
+/* An integer expression for the size in bits of the largest integer machine
+ mode that should actually be used. We allow pairs of registers. */
+#define MAX_FIXED_MODE_SIZE GET_MODE_BITSIZE (TARGET_ARCH64 ? TImode : DImode)
+
+/* Maximum bytes moved by a single instruction (load/store pair). */
+#define MOVE_MAX (2*UNITS_PER_WORD)
+#define MAX_MOVE_MAX 16
+
+/* The base cost overhead of a memcpy call, for MOVE_RATIO and friends. */
+#define ARC64_CALL_RATIO 8
+
+/* MOVE_RATIO dictates when we will use the move_by_pieces infrastructure.
+ move_by_pieces will continually copy the largest safe chunks. So a
+ 7-byte copy is a 4-byte + 2-byte + byte copy. This proves inefficient
+ for both size and speed of copy, so we will instead use the "cpymem"
+ standard name to implement the copy. This logic does not apply when
+ targeting -mstrict-align, so keep a sensible default in that case. */
+#define MOVE_RATIO(speed) \
+ (!STRICT_ALIGNMENT ? 2 : ((speed) ? 15 : ARC64_CALL_RATIO))
+
+#ifndef USED_FOR_TARGET
+extern const enum reg_class arc64_regno_to_regclass[];
+#endif
+
+#define SIGNED(X,V) \
+ ((unsigned long long) ((X) + (1ULL << (V - 1))) < (1ULL << V))
+#define UNSIGNED(X,V) ((unsigned long long) (X) < (1ULL << V))
+#define VERIFY_SHIFT(X,S) ((X & ((1 << S) - 1)) == 0)
+
+#define UNSIGNED_INT3(X) (UNSIGNED(X,3))
+#define UNSIGNED_INT5(X) (UNSIGNED(X,5))
+#define UNSIGNED_INT6(X) (UNSIGNED(X,6))
+#define UNSIGNED_INT7(X) (UNSIGNED(X,7))
+#define UNSIGNED_INT8(X) (UNSIGNED(X,8))
+#define UNSIGNED_INT9(X) (UNSIGNED(X,9))
+#define UNSIGNED_INT10(X) (UNSIGNED(X,10))
+#define UNSIGNED_INT12(X) (UNSIGNED(X,12))
+#define UNSIGNED_INT16(X) (UNSIGNED(X,16))
+// TODO: Fix for 32 bit compiler host architecture.
+#define UNSIGNED_INT32(X) (UNSIGNED(X,32))
+
+#define SIGNED_INT3(X) (SIGNED(X,3))
+#define SIGNED_INT6(X) (SIGNED(X,6))
+#define SIGNED_INT7(X) (SIGNED(X,7))
+#define SIGNED_INT8(X) (SIGNED(X,8))
+#define SIGNED_INT9(X) (SIGNED(X,9))
+#define SIGNED_INT10(X) (SIGNED(X,10))
+#define SIGNED_INT11(X) (SIGNED(X,11))
+#define SIGNED_INT12(X) (SIGNED(X,12))
+#define SIGNED_INT13(X) (SIGNED(X,13))
+#define SIGNED_INT16(X) (SIGNED(X,16))
+#define SIGNED_INT21(X) (SIGNED(X,21))
+#define SIGNED_INT25(X) (SIGNED(X,25))
+
+// TODO: Fix for 32 bit compiler host architecture.
+#define SIGNED_INT32(X) (SIGNED(X,32))
+
+#define UNSIGNED_INT7_SHIFTED(X,S) (VERIFY_SHIFT(X,S) && UNSIGNED_INT6(X >> S))
+#define UNSIGNED_INT8_SHIFTED(X,S) (VERIFY_SHIFT(X,S) && UNSIGNED_INT6(X >> S))
+#define UNSIGNED_INT9_SHIFTED(X,S) (VERIFY_SHIFT(X,S) && UNSIGNED_INT6(X >> S))
+
+#define SIGNED_INT13_SHIFTED(X,S) (VERIFY_SHIFT(X,S) && SIGNED_INT12(X >> S))
+#define SIGNED_INT14_SHIFTED(X,S) (VERIFY_SHIFT(X,S) && SIGNED_INT12(X >> S))
+#define SIGNED_INT15_SHIFTED(X,S) (VERIFY_SHIFT(X,S) && SIGNED_INT12(X >> S))
+
+#define IS_POWEROF2_P(X) (! ( (X) & ((X) - 1)) && (X))
+
+/* These assume that REGNO is a hard or pseudo reg number.
+ They give nonzero only if REGNO is a hard reg of the suitable class
+ or a pseudo reg currently allocated to a suitable hard reg.
+ Since they use reg_renumber, they are safe only once reg_renumber
+ has been allocated, which happens in local-alloc.c. */
+#define REGNO_OK_FOR_BASE_P(REGNO) \
+ (GP_REGNUM_P (REGNO) \
+ || ((REGNO) == AP_REGNUM) \
+ || ((REGNO) == SFP_REGNUM))
+
+#define REGNO_OK_FOR_INDEX_P(REGNO) REGNO_OK_FOR_BASE_P(REGNO)
+
+/* Return true if regno is FP register. */
+#define FP_REGNUM_P(REGNO) \
+ (((unsigned) (REGNO - F0_REGNUM)) <= (F31_REGNUM - F0_REGNUM))
+
+#define GP_REGNUM_P(REGNO) \
+ (((unsigned) (REGNO - R0_REGNUM)) <= (BLINK_REGNUM - R0_REGNUM))
+
+/* Trampolines, used for entering nested functions, are a block of code
+ followed by two pointers. The sizes here are in bytes. */
+#define TRAMPOLINE_CODE_SIZE \
+ ((Pmode == SImode) \
+ ? 8 /* ld_s, ld, j_s */ \
+ : 16) /* nop, ldl, ldl, j */
+#define TRAMPOLINE_SIZE (TRAMPOLINE_CODE_SIZE + 2 * POINTER_BYTES)
+/* Alignment required for a trampoline in bits . */
+#define TRAMPOLINE_ALIGNMENT POINTER_SIZE
+
+/* Names to predefine in the preprocessor for this target machine. */
+#define TARGET_CPU_CPP_BUILTINS() arc64_cpu_cpp_builtins (pfile)
+
+/* Dispatch tables. */
+#define JUMP_TABLES_IN_TEXT_SECTION 1
+#define CASE_VECTOR_MODE SImode
+#define CASE_VECTOR_PC_RELATIVE 1
+#define ADDR_VEC_ALIGN(VEC_INSN) 0
+
+/* Define this macro if it is advisable to hold scalars in registers
+ in a wider mode than that declared by the program. In such cases,
+ the value is constrained to be within the bounds of the declared
+ type, but kept valid in the wider mode. The signedness of the
+ extension may differ from that of the type. */
+#define PROMOTE_MODE(MODE,UNSIGNEDP,TYPE) \
+ if (GET_MODE_CLASS (MODE) == MODE_INT \
+ && GET_MODE_SIZE (MODE) < 4) \
+ { \
+ (MODE) = SImode; \
+ }
+
+
+/* A C string constant describing how to begin a comment in the target
+ assembler language. The compiler assumes that the comment will
+ end at the end of the line. */
+#define ASM_COMMENT_START "#"
+
+#define ASM_OUTPUT_ALIGN(FILE,LOG) \
+ fprintf(FILE, "\t.align\t%d\n", 1 << (int)LOG)
+
+/* Output to assembler file text saying following lines
+ may contain character constants, extra white space, comments, etc. */
+#undef ASM_APP_ON
+#define ASM_APP_ON ""
+
+/* Output to assembler file text saying following lines
+ no longer contain unusual constructs. */
+#undef ASM_APP_OFF
+#define ASM_APP_OFF ""
+
+/* This is how to output a reference to a symbol_ref / label_ref as
+ (part of) an operand. To disambiguate from register names like a1
+ / a2 / status etc, symbols are preceded by '@'. */
+#define ASM_OUTPUT_SYMBOL_REF(FILE,SYM) \
+ ASM_OUTPUT_LABEL_REF ((FILE), XSTR ((SYM), 0))
+#define ASM_OUTPUT_LABEL_REF(FILE,STR) \
+ do \
+ { \
+ fputs ("@", (FILE)); \
+ assemble_name ((FILE), (STR)); \
+ } \
+ while (0)
+
+#define LOCAL_LABEL_PREFIX "."
+
+/* This is how to output an element of a PIC case-vector. */
+#define ASM_OUTPUT_ADDR_DIFF_ELT(STREAM, BODY, VALUE, REL) \
+ fprintf (STREAM, "\tb\t@%sL%d\n", \
+ LOCAL_LABEL_PREFIX, VALUE)
+
+/* Defined to also emit an .align in elfos.h. We don't want that. */
+#undef ASM_OUTPUT_CASE_LABEL
+
+/* Section selection. */
+
+/* Globalizing directive for a label. */
+#define GLOBAL_ASM_OP "\t.global\t"
+
+#define TEXT_SECTION_ASM_OP "\t.section\t.text"
+#define DATA_SECTION_ASM_OP "\t.section\t.data"
+
+#define BSS_SECTION_ASM_OP "\t.section\t.bss"
+#define SDATA_SECTION_ASM_OP "\t.section\t.sdata"
+#define SBSS_SECTION_ASM_OP "\t.section\t.sbss"
+
+/* Expression whose value is a string, including spacing, containing
+ the assembler operation to identify the following data as
+ initialization/termination code. If not defined, GCC will assume
+ such a section does not exist. */
+#define INIT_SECTION_ASM_OP "\t.section\t.init"
+#define FINI_SECTION_ASM_OP "\t.section\t.fini"
+
+/* All the work done in PROFILE_HOOK, but still required. */
+#undef FUNCTION_PROFILER
+#define FUNCTION_PROFILER(STREAM, LABELNO) do { } while (0)
+
+#define NO_PROFILE_COUNTERS 1
+
+/* Tell crtstuff.c we're using ELF. */
+#define OBJECT_FORMAT_ELF
+
+/* Called by crtstuff.c to make calls to function FUNCTION that are defined in
+ SECTION_OP, and then to switch back to text section. */
+#undef CRT_CALL_STATIC_FUNCTION
+#ifdef __ARC64_ARCH32__
+#define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC) \
+ asm (SECTION_OP "\n\t" \
+ "mov\tr12,@" USER_LABEL_PREFIX #FUNC "\n\t" \
+ "jl\t[r12]\n" \
+ TEXT_SECTION_ASM_OP);
+#elif (defined __ARC64_ARCH64__)
+#define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC) \
+ asm (SECTION_OP "\n\t" \
+ "addl\tr12,pcl,@" USER_LABEL_PREFIX #FUNC "@pcl\n\t" \
+ "jl\t[r12]\n" \
+ TEXT_SECTION_ASM_OP);
+#endif
+
+/* ATOMIC options. */
+/* FIXME: is 0 okay or should it be -1 like DEFAULT_arc_mpy_option? */
+/* Default atomic option value. */
+#undef DEFAULT_ARC64_ATOMIC_OPTION
+#define DEFAULT_ARC64_ATOMIC_OPTION 1
+
+#define ARC64_HAS_ATOMIC_1 (arc64_atomic_option > 0)
+#define ARC64_HAS_ATOMIC_2 (arc64_atomic_option > 1)
+#define ARC64_HAS_ATOMIC_3 (arc64_atomic_option > 2)
+
+/* DIVREM options. */
+#undef TARGET_ARC64_DIVREM_DEFAULT
+#define TARGET_ARC64_DIVREM_DEFAULT 1
+
+/* FP options. */
+#define ARC64_HAS_FP_BASE (arc64_fp_model > 0)
+#define ARC64_HAS_FPUH (arc64_fp_model > 0)
+#define ARC64_HAS_FPUS (arc64_fp_model > 0)
+#define ARC64_HAS_FPUD (arc64_fp_model > 1)
+
+#define TARGET_HARD_FLOAT ARC64_HAS_FP_BASE
+
+/* Vector SIMD length. */
+#define ARC64_VFP_32 (arc64_fp_model == 1)
+#define ARC64_VFP_64 ((arc64_fp_model == 2) && !TARGET_WIDE_SIMD)
+#define ARC64_VFP_128 ((arc64_fp_model == 2) && TARGET_WIDE_SIMD)
+
+/* IFCVT macros. */
+#define STORE_FLAG_VALUE 1
+#define MAX_CONDITIONAL_EXECUTE 12
+#define BRANCH_COST(speed_p, predictable_p) 10
+
+/* DWARF macros. */
+#define DWARF2_DEBUGGING_INFO 1
+/* The mapping from gcc register number to DWARF2 CFA column number. */
+#define DWARF_FRAME_REGNUM(REGNO) DBX_REGISTER_NUMBER(REGNO)
+/* DWARF2 CFA column which tracks the return address. */
+#define DWARF_FRAME_RETURN_COLUMN BLINK_REGNUM
+/* DWARF registers encodings. */
+#define DBX_REGISTER_NUMBER(REGNO) arc64_dbx_register_number (REGNO)
+/* The DWARF 2 CFA column which tracks the return address from a signal handler
+ context. This value must not correspond to a hard register and must be out
+ of the range of DWARF_FRAME_REGNUM(). The unwind-dw2.c file is using
+ DWARF_REG_TO_UNWIND_COLUMN and DWARF_FRAME_REGISTERS macros. The
+ DWARF_FRAME_REGNUM macro returns no equivalent DWARF register for
+ AP_REGNUM. Thus, we should be safe using AP_REGNUM. */
+#define DWARF_ALT_FRAME_RETURN_COLUMN AP_REGNUM
+
+/* Exception Handling support. */
+/* Use R0 through R3 to pass exception handling information. */
+#define EH_RETURN_DATA_REGNO(N) \
+ ((N) < 4 ? ((unsigned int) R0_REGNUM + (N)) : INVALID_REGNUM)
+#define EH_RETURN_STACKADJ_RTX gen_rtx_REG (Pmode, R4_REGNUM)
+#define EH_RETURN_HANDLER_RTX arc64_eh_return_handler_rtx ()
+#define EH_USES(REGNO) (arc64_eh_uses((REGNO)))
+
+/* Select a format to encode pointers in exception handling data. */
+#define ASM_PREFERRED_EH_DATA_FORMAT(CODE, GLOBAL) \
+ arc64_asm_preferred_eh_data_format ((CODE), (GLOBAL))
+
+/* Specs. */
+
+/* Support for a compile-time defalt CPU or FPU. */
+#define OPTION_DEFAULT_SPECS \
+ { "fpu", "%{!mfpu=*:-mfpu=%(VALUE)}"}, \
+ { "cpu", "%{!mcpu=*:-mcpu=%(VALUE)}"}
+
+#define CPP_SPEC "%(subtarget_cpp_spec)"
+
+#define EXTRA_SPECS \
+ { "subtarget_cpp_spec", SUBTARGET_CPP_SPEC }, \
+ SUBTARGET_EXTRA_SPECS
+
+#undef ASM_SPEC
+#define ASM_SPEC \
+ "%{mcpu=*:-mcpu=%*}"
+
+#ifndef SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS
+#endif
+
+#ifndef SUBTARGET_CPP_SPEC
+#define SUBTARGET_CPP_SPEC ""
+#endif
+
+#undef ARC64_SUBTARGET_DEFAULT
+#define ARC64_SUBTARGET_DEFAULT 0
+
+#endif /* GCC_ARC64_H */
diff --git a/gcc/config/arc64/arc64.md b/gcc/config/arc64/arc64.md
new file mode 100644
index 0000000000000..1eaee6fb05277
--- /dev/null
+++ b/gcc/config/arc64/arc64.md
@@ -0,0 +1,3238 @@
+;; Register numbers
+(define_constants
+ [
+ (R0_REGNUM 0)
+ (R1_REGNUM 1)
+ (R2_REGNUM 2)
+ (R3_REGNUM 3)
+ (R4_REGNUM 4)
+ (R5_REGNUM 5)
+ (R6_REGNUM 6)
+ (R7_REGNUM 7)
+ (R8_REGNUM 8)
+ (R9_REGNUM 9)
+ (R10_REGNUM 10)
+ (R11_REGNUM 11)
+ (R12_REGNUM 12)
+ (R13_REGNUM 13)
+ (R14_REGNUM 14)
+ (R15_REGNUM 15)
+ (R16_REGNUM 16)
+ (R17_REGNUM 17)
+ (R18_REGNUM 18)
+ (R19_REGNUM 19)
+ (R20_REGNUM 20)
+ (R21_REGNUM 21)
+ (R22_REGNUM 22)
+ (R23_REGNUM 23)
+ (R24_REGNUM 24)
+ (R25_REGNUM 25)
+ (R26_REGNUM 26)
+ (R27_REGNUM 27)
+ (SP_REGNUM 28)
+ (ILINK_REGNUM 29)
+ (R30_REGNUM 30)
+ (BLINK_REGNUM 31)
+ (R32_REGNUM 32)
+ (R33_REGNUM 33)
+ (R34_REGNUM 34)
+ (R35_REGNUM 35)
+ (R36_REGNUM 36)
+ (R37_REGNUM 37)
+ (R38_REGNUM 38)
+ (R39_REGNUM 39)
+ (R40_REGNUM 40)
+ (R41_REGNUM 41)
+ (R42_REGNUM 42)
+ (R43_REGNUM 43)
+ (R44_REGNUM 44)
+ (R45_REGNUM 45)
+ (R46_REGNUM 46)
+ (R47_REGNUM 47)
+ (R48_REGNUM 48)
+ (R49_REGNUM 49)
+ (R50_REGNUM 50)
+ (R51_REGNUM 51)
+ (R52_REGNUM 52)
+ (R53_REGNUM 53)
+ (R54_REGNUM 54)
+ (R55_REGNUM 55)
+ (R56_REGNUM 56)
+ (R57_REGNUM 57)
+ (R58_REGNUM 58)
+ (R59_REGNUM 59)
+
+ (R60_REGNUM 60)
+ (R61_REGNUM 61)
+ (R62_REGNUM 62)
+ (R63_REGNUM 63)
+
+ (F0_REGNUM 64)
+ (F1_REGNUM 65)
+ (F2_REGNUM 66)
+ (F3_REGNUM 67)
+ (F4_REGNUM 68)
+ (F5_REGNUM 69)
+ (F6_REGNUM 70)
+ (F7_REGNUM 71)
+ (F8_REGNUM 72)
+ (F9_REGNUM 73)
+ (F10_REGNUM 74)
+ (F11_REGNUM 75)
+ (F12_REGNUM 76)
+ (F13_REGNUM 77)
+ (F14_REGNUM 78)
+ (F15_REGNUM 79)
+ (F16_REGNUM 80)
+ (F17_REGNUM 81)
+ (F18_REGNUM 82)
+ (F19_REGNUM 83)
+ (F20_REGNUM 84)
+ (F21_REGNUM 85)
+ (F22_REGNUM 86)
+ (F23_REGNUM 87)
+ (F24_REGNUM 88)
+ (F25_REGNUM 89)
+ (F26_REGNUM 90)
+ (F27_REGNUM 91)
+ (F28_REGNUM 92)
+ (F29_REGNUM 93)
+ (F30_REGNUM 94)
+ (F31_REGNUM 95)
+
+ (AP_REGNUM 96)
+ (SFP_REGNUM 97)
+ (CC_REGNUM 98)
+ ]
+ )
+
+(define_c_enum "unspec"
+ [
+ ARC64_UNSPEC_PCREL
+ ARC64_UNSPEC_GOT
+ ARC64_UNSPEC_GOT32
+ ARC64_UNSPEC_TLS_GD
+ ARC64_UNSPEC_TLS_IE
+ ARC64_UNSPEC_TLS_OFF
+ ARC64_VUNSPEC_BLOCKAGE
+
+ ARC64_VUNSPEC_LR
+ ARC64_VUNSPEC_SR
+ ARC64_VUNSPEC_LRL
+ ARC64_VUNSPEC_SRL
+ ARC64_VUNSPEC_FLAG
+ ARC64_VUNSPEC_BRK
+ ARC64_VUNSPEC_NOP
+ ARC64_VUNSPEC_TRAP_S
+
+ ARC64_VUNSPEC_EX
+ ARC64_VUNSPEC_CAS
+ ARC64_VUNSPEC_SC
+ ARC64_VUNSPEC_LL
+ ARC64_VUNSPEC_SYNC
+ ARC64_VUNSPEC_ATOOPS
+ ARC64_VUNSPEC_RTIE
+
+ ARC64_UNSPEC_MEMBAR
+ ARC64_UNSPEC_FLS
+ ARC64_UNSPEC_COPYSIGN
+ ARC64_UNSPEC_XORSIGN
+ ARC64_UNSPEC_ROUND
+ ARC64_UNSPEC_BTRUNC
+ ARC64_UNSPEC_CASESI
+ ARC64_UNSPEC_VECINIT
+ ARC64_UNSPEC_QMPYH
+ ARC64_UNSPEC_QMACH
+ ARC64_UNSPEC_DMPYWH
+ ARC64_UNSPEC_DMPYWHU
+ ARC64_UNSPEC_DMACWH
+ ARC64_UNSPEC_DMACWHU
+ ARC64_UNSPEC_VPACK4HL
+ ARC64_UNSPEC_VPACK4HM
+ ARC64_UNSPEC_VPACK2WL
+ ARC64_UNSPEC_SWAPL
+ ARC64_UNSPEC_SWAP
+ ARC64_UNSPEC_VEC_SHR
+ ARC64_UNSPEC_VEC_SHL
+ ARC64_UNSPEC_HEXCH
+ ARC64_UNSPEC_SEXCH
+ ARC64_UNSPEC_DEXCH
+ ARC64_UNSPEC_HUNPKL
+ ARC64_UNSPEC_SUNPKL
+ ARC64_UNSPEC_DUNPKL
+ ARC64_UNSPEC_HUNPKM
+ ARC64_UNSPEC_SUNPKM
+ ARC64_UNSPEC_DUNPKM
+ ARC64_UNSPEC_HPACKL
+ ARC64_UNSPEC_SPACKL
+ ARC64_UNSPEC_DPACKL
+ ARC64_UNSPEC_HPACKM
+ ARC64_UNSPEC_SPACKM
+ ARC64_UNSPEC_DPACKM
+ ARC64_UNSPEC_HBFLYL
+ ARC64_UNSPEC_SBFLYL
+ ARC64_UNSPEC_DBFLYL
+ ARC64_UNSPEC_HBFLYM
+ ARC64_UNSPEC_SBFLYM
+ ARC64_UNSPEC_DBFLYM
+ ARC64_UNSPEC_VFADDSUB
+ ARC64_UNSPEC_VFSUBADD
+ ARC64_UNSPEC_VADDSUB
+ ARC64_UNSPEC_VSUBADD
+ ])
+
+(include "constraints.md")
+(include "predicates.md")
+
+;; -------------------------------------------------------------------
+;; Mode Iterators
+;; -------------------------------------------------------------------
+
+;; Iterator for General Purpose Integer registers (32- and 64-bit modes)
+(define_mode_iterator GPI [SI (DI "TARGET_64BIT")])
+
+;; For doubling width of an integer mode
+(define_mode_attr DWI [(QI "HI") (HI "SI") (SI "DI") (DI "TI")])
+
+;; Iterator for QI and HI modes
+(define_mode_iterator SHORT [QI HI])
+
+;; Iterator for QI HI and SI modes
+(define_mode_iterator EXT [QI HI SI])
+
+;; Iterator for all integer modes (up to 64-bit)
+(define_mode_iterator ALLI [QI HI SI (DI "TARGET_64BIT")])
+(define_mode_iterator MV_ALLI [QI HI SI (DI "TARGET_64BIT || TARGET_LL64")])
+
+;; Iterator for HI SI and DI modes
+(define_mode_iterator EPI [HI SI (DI "TARGET_64BIT")])
+
+;; Iterator for HI and SI modes
+(define_mode_iterator HI_SI [HI SI])
+
+;; This mode iterator allows :P to be used for patterns that operate on
+;; pointer-sized quantities. Exactly one of the two alternatives will match.
+(define_mode_iterator P [(SI "Pmode == SImode") (DI "Pmode == DImode")])
+
+;; Iterator for integer modes which map into a pair of registers.
+(define_mode_iterator DBLI [DI (TI "TARGET_64BIT")])
+
+;; Iterator for General Purpose Floating-point registers (16 -, 32-
+;; and 64-bit modes)
+(define_mode_iterator GPF_HF [(HF "ARC64_HAS_FPUH")
+ (SF "ARC64_HAS_FPUS") (DF "ARC64_HAS_FPUD")])
+
+;; Iterator for General Purpose Floating-point registers (32- and 64-bit modes)
+(define_mode_iterator GPF [(SF "ARC64_HAS_FPUS") (DF "ARC64_HAS_FPUD")])
+
+;; Iterator for General Purpose Floating-point registers (16- and 32-bit modes)
+(define_mode_iterator HF_SF [(HF "ARC64_HAS_FPUH") (SF "ARC64_HAS_FPUS")])
+
+;; All int vectors
+(define_mode_iterator VALL [V2HI V4HI V2SI])
+
+;; All 64b int vectors
+(define_mode_iterator V64I [V4HI V2SI])
+
+;; All fp vectors
+(define_mode_iterator VALLF [(V2HF "ARC64_VFP_32")
+ (V4HF "ARC64_VFP_64") (V2SF "ARC64_VFP_64")
+ (V8HF "ARC64_VFP_128") (V4SF "ARC64_VFP_128")
+ (V2DF "ARC64_VFP_128")])
+
+;; ALl fp vectors up to 64bit
+(define_mode_iterator VALLF_64 [(V2HF "ARC64_VFP_32")
+ (V4HF "ARC64_VFP_64") (V2SF "ARC64_VFP_64")])
+
+;; All 128b fp vectos
+(define_mode_iterator VALLF_128 [(V8HF "ARC64_VFP_128") (V4SF "ARC64_VFP_128")
+ (V2DF "ARC64_VFP_128")])
+
+;; All 2xfp Vectors
+(define_mode_iterator V2xF [(V2HF "ARC64_VFP_32") (V2SF "ARC64_VFP_64")
+ (V2DF "ARC64_VFP_128")])
+
+;; All 4xfp Vectors
+(define_mode_iterator V4xF [(V4HF "ARC64_VFP_64") (V4SF "ARC64_VFP_128")])
+
+;; All 2xreg wide vectors
+;; All 2xfp Vectors
+(define_mode_iterator W2xF [(V2DF "ARC64_VFP_128")])
+
+;; All HF and SF vectors
+(define_mode_iterator V1FRF [(V2HF "ARC64_VFP_32")
+ (V4HF "ARC64_VFP_64") (V2SF "ARC64_VFP_64")
+ (V8HF "ARC64_VFP_128") (V4SF "ARC64_VFP_128")])
+
+;; All HF vectors
+(define_mode_iterator VxHF [(V2HF "ARC64_VFP_32")
+ (V4HF "ARC64_VFP_64")
+ (V8HF "ARC64_VFP_128")])
+
+;; -------------------------------------------------------------------
+;; Code Iterators
+;; -------------------------------------------------------------------
+
+;; Code iterator for sign/zero extension
+(define_code_iterator ANY_EXTEND [sign_extend zero_extend])
+
+;; This code iterator allows the shifts supported in arithmetic instructions
+(define_code_iterator ASHIFT [ashift ashiftrt lshiftrt])
+
+;; Only logical shifts
+(define_code_iterator LSHIFT [ashift lshiftrt])
+
+;; Iterates over the SETcc instructions
+(define_code_iterator SETCC [eq ne gt lt ge le ltu geu])
+(define_code_iterator ALLCC [eq ne gt lt ge le ltu geu gtu leu])
+
+;; Three operand arithmetic operations
+(define_code_iterator ARITH [plus minus mult])
+(define_code_iterator ADDSUB [plus minus] )
+
+;; Three operand logic operations
+(define_code_iterator LOGIC [and ior xor smin smax])
+
+;; Two operand logic operations
+(define_code_iterator NOT_ABS [not abs])
+
+;; Two operand logic operations extended, used for zero_extend
+;; patterns
+(define_code_iterator LOP2EX [not abs neg])
+
+;; Min/Max iterator
+(define_code_iterator MINMAX [smin smax])
+
+;; Three operand floating point arithmetic instructions
+(define_code_iterator DOPF [plus minus mult div smin smax])
+
+;; Vector operations
+(define_code_iterator VOPS [plus minus mult div])
+
+;; Comutative VF operations
+(define_code_iterator VCOP [plus mult])
+
+;; Emulated 1 operand vector operations
+(define_code_iterator ABS_NEG [abs neg])
+
+;; Code iterator for unary negate and bitwise complement.
+(define_code_iterator NEG_NOT [neg not])
+
+;; Code iterator for bit logic ops.
+(define_code_iterator BIT [ior xor])
+
+;; Code iterator for div/mod ops.
+(define_code_iterator DIVREM [div udiv mod umod])
+
+;; Comutative operations
+(define_code_iterator COMMUTATIVE [and ior xor])
+(define_code_iterator COMMUTATIVEF [plus and ior xor])
+
+;; -------------------------------------------------------------------
+;; Mode Attributes
+;; -------------------------------------------------------------------
+
+;; Map rtl mode to ARC mnemonic suffixes used in sign extend
+;; instructions.
+(define_mode_attr exttab [(QI "b") (HI "h") (SI "w")])
+
+;; Map rtl mode to ARC mnemonic suffixes
+(define_mode_attr sfxtab [(QI "b") (HI "h") (SI "") (DI "l")
+ (HF "h") (SF "s") (DF "d")
+ (V2HI "2h") (V4HI "4h") (V2SI "2")
+ (V2HF "h") (V4HF "h") (V2SF "s")
+ (V8HF "h") (V4SF "s") (V2DF "d")])
+
+;; Used by FPABS patterns.
+(define_mode_attr fptab [(SF "") (DF "l")])
+
+;; Same as above but to be used by mov conditional
+(define_mode_attr mcctab [(QI "") (HI "") (SI "") (DI "l")
+ (HF "") (SF "") (DF "l")
+ (V2HI "") (V4HI "l") (V2SI "l")
+ (V2HF "") (V4HF "l") (V2SF "l")])
+
+(define_mode_attr slfp [(HF "h") (SF "") (DF "l")
+ (V2HF "") (V4HF "l") (V2SF "l")])
+
+(define_mode_attr fmvftab [(HF "s") (SF "s") (DF "d")
+ (V2HF "s") (V4HF "d") (V2SF "d")])
+(define_mode_attr fmvitab [(HF "i") (SF "i") (DF "l")
+ (V2HF "i") (V4HF "l") (V2SF "l")])
+
+;; To be used by vector exch instructions emitted by reduction
+;; patterns.
+(define_mode_attr fmextab [(V4HF "s") (V4SF "d")])
+
+;; Used to implement cadd{90,270} functions
+(define_mode_attr cplxtab [(V2HF "H")
+ (V4HF "H")
+ (V2SF "S")
+ (V8HF "H")
+ (V4SF "S")
+ (V2DF "D")])
+
+;; Give the number of bits-1 in the mode
+(define_mode_attr sizen [(QI "7") (HI "15") (SI "31") (DI "63")
+ (HF "15") (SF "31") (DF "63")])
+
+;; Same like above but without -1 used for fp loads/stores
+(define_mode_attr sizef [(HF "16") (SF "32") (DF "64")
+ (V2HF "32") (V4HF "64") (V2SF "64")
+ (V8HF "d64") (V4SF "d64") (V2DF "d64")])
+
+;; Used to implement predicated sign extension patterns
+(define_mode_attr sexsft [(QI "24") (HI "16") (SI "8")])
+
+;; Used by float conv patterns.
+(define_mode_attr f2tab [(SI "int") (DI "l")])
+
+;; Define element mode for each vector mode.
+(define_mode_attr VEL [(V2HI "HI") (V4HI "HI") (V2SI "SI")
+ (V2HF "HF") (V4HF "HF") (V2SF "SF")
+ (V8HF "HF") (V4SF "SF") (V2DF "DF")])
+(define_mode_attr vel [(V2HI "hi") (V4HI "hi") (V2SI "si")
+ (V2HF "hf") (V4HF "hf") (V2SF "sf")
+ (V8HF "hf") (V4SF "sf") (V2DF "df")])
+
+;; Define element mode for each double-r mode.
+(define_mode_attr REL [(DI "SI") (TI "DI")])
+(define_mode_attr rel [(DI "si") (TI "di")])
+
+;; Used by vector extract pattern
+(define_mode_attr vextrsz [(V2HI "16") (V4HI "16") (V2SI "32")])
+(define_mode_attr vextrmsk [(V2HI "0x1f") (V4HI "0x3f") (V2SI "0x3f")])
+(define_mode_attr vextrsh [(V2HI "5") (V4HI "6") (V2SI "6")])
+
+;; -------------------------------------------------------------------
+;; Code Attributes
+;; -------------------------------------------------------------------
+;; Map rtl objects to optab names
+(define_code_attr optab [(ashift "ashl")
+ (ashiftrt "ashr")
+ (lshiftrt "lshr")
+ (rotatert "rotr")
+ (sign_extend "extend")
+ (zero_extend "zero_extend")
+ (sign_extract "extv")
+ (zero_extract "extzv")
+ (fix "fix")
+ (unsigned_fix "fixuns")
+ (float "float")
+ (unsigned_float "floatuns")
+ (popcount "popcount")
+ (and "and")
+ (ior "ior")
+ (xor "xor")
+ (not "one_cmpl")
+ (neg "neg")
+ (plus "add")
+ (minus "sub")
+ (mult "mul")
+ (div "div")
+ (udiv "udiv")
+ (mod "mod")
+ (umod "umod")
+ (ss_plus "qadd")
+ (us_plus "qadd")
+ (ss_minus "qsub")
+ (us_minus "qsub")
+ (ss_neg "qneg")
+ (ss_abs "qabs")
+ (smin "smin")
+ (smax "smax")
+ (umin "umin")
+ (umax "umax")
+ (eq "eq")
+ (ne "ne")
+ (lt "lt")
+ (ge "ge")
+ (le "le")
+ (gt "gt")
+ (ltu "ltu")
+ (leu "leu")
+ (geu "geu")
+ (gtu "gtu")
+ (abs "abs")
+ (sqrt "sqrt")])
+
+;; map rtl to ARC's cc-mnemonic names, slightly different than above.
+(define_code_attr cctab [(eq "eq")
+ (ne "ne")
+ (lt "lt")
+ (ge "ge")
+ (le "le")
+ (gt "gt")
+ (ltu "lo")
+ (leu "NA")
+ (geu "hs")
+ (gtu "NA")])
+
+;; used for inverting predicated SET instructions.
+(define_code_attr CCTAB [(eq "EQ")
+ (ne "NE")
+ (lt "LT")
+ (ge "GE")
+ (le "LE")
+ (gt "GT")
+ (ltu "LTU")
+ (leu "NA")
+ (geu "GEU")
+ (gtu "NA")])
+
+;; Sign- or zero-extend data-op
+(define_code_attr su [(sign_extend "s") (zero_extend "u")])
+
+;; Optab prefix for sign/zero-extending operations
+(define_code_attr su_optab [(sign_extend "") (zero_extend "u")])
+
+;; Map rtl objects to arc instuction names
+(define_code_attr mntab [(abs "abs")
+ (not "not")
+ (neg "neg")
+ (ashift "asl")
+ (ashiftrt "asr")
+ (sign_extend "sex")
+ (zero_extend "ext")
+ (div "div")
+ (udiv "divu")
+ (mult "mul")
+ (mod "rem")
+ (umod "remu")
+ (lshiftrt "lsr")
+ (and "and")
+ (ior "or")
+ (xor "xor")
+ (plus "add")
+ (minus "sub")
+ (smax "max")
+ (smin "min")])
+
+;; Map rtl objects to arc's bit operation instructions
+(define_code_attr bit_optab [(ior "bset")
+ (xor "bxor")])
+
+;; -------------------------------------------------------------------
+;; Int Iterators.
+;; -------------------------------------------------------------------
+(define_int_iterator PERMUTED [ARC64_UNSPEC_DUNPKL
+ ARC64_UNSPEC_DUNPKM
+ ARC64_UNSPEC_DPACKL
+ ARC64_UNSPEC_DPACKM
+ ARC64_UNSPEC_DBFLYL
+ ARC64_UNSPEC_DBFLYM])
+(define_int_iterator PERMUTES [ARC64_UNSPEC_SUNPKL
+ ARC64_UNSPEC_SUNPKM
+ ARC64_UNSPEC_SPACKL
+ ARC64_UNSPEC_SPACKM
+ ARC64_UNSPEC_SBFLYL
+ ARC64_UNSPEC_SBFLYM])
+(define_int_iterator PERMUTEH [ARC64_UNSPEC_HUNPKL
+ ARC64_UNSPEC_HUNPKM
+ ARC64_UNSPEC_HPACKL
+ ARC64_UNSPEC_HPACKM
+ ARC64_UNSPEC_HBFLYL
+ ARC64_UNSPEC_HBFLYM])
+
+;; -------------------------------------------------------------------
+;; Int Iterators Attributes.
+;; -------------------------------------------------------------------
+(define_int_attr perm_pat [(ARC64_UNSPEC_HUNPKL "unpkl")
+ (ARC64_UNSPEC_SUNPKL "unpkl")
+ (ARC64_UNSPEC_DUNPKL "unpkl")
+ (ARC64_UNSPEC_HUNPKM "unpkm")
+ (ARC64_UNSPEC_SUNPKM "unpkm")
+ (ARC64_UNSPEC_DUNPKM "unpkm")
+ (ARC64_UNSPEC_HPACKL "packl")
+ (ARC64_UNSPEC_SPACKL "packl")
+ (ARC64_UNSPEC_DPACKL "packl")
+ (ARC64_UNSPEC_HPACKM "packm")
+ (ARC64_UNSPEC_SPACKM "packm")
+ (ARC64_UNSPEC_DPACKM "packm")
+ (ARC64_UNSPEC_HBFLYL "bflyl")
+ (ARC64_UNSPEC_SBFLYL "bflyl")
+ (ARC64_UNSPEC_DBFLYL "bflyl")
+ (ARC64_UNSPEC_HBFLYM "bflym")
+ (ARC64_UNSPEC_SBFLYM "bflym")
+ (ARC64_UNSPEC_DBFLYM "bflym")])
+
+;; -------------------------------------------------------------------
+;; Instruction types and attributes
+;; -------------------------------------------------------------------
+
+;; What is the insn_cost for this insn? The target hook can still
+;; override this. For optimizing for size the "length" attribute is
+;; used instead.
+(define_attr "cost" "" (const_int 0))
+
+(define_attr "type" "abs, adc, adcl, add, addhl, addl, and, andl, asl,
+asll, asr, asrl, atldlop, atldop, bbit, bclr, bi, bic, bl, block,
+bmsk, branch, branchcc, brcc, brk, bset, bsetl, btst, bxor, bxorl,
+cmp, dbnz, div, divl, dmb, dmpywh, ex, ext, fadd, fcmp, fd2s, fdiv,
+ffs, fh2s, flag, fls, fmadd, fmax, fmin, fmov, fmsub, fmul, fnmadd,
+fnmsub, fp2int, fp2uint, frnd, fs2d, fs2h, fsgnj, fsgnjn, fsgnjx,
+fsqrt, fsub, int2fp, jl, jump, ld, llock, lr, lsr, lsrl, mac, max,
+maxl, min, minl, mod, modl, move, movecc, mpy, mpyl, neg, nop, norm,
+normh, norml, not, notl, or, orl, qmach, qmpyh, return, rol, ror,
+rtie, sbc, sbcl, scond, setcc, sex, sr, st, sub, subl, swap, swape,
+swapel, swapl, sync, trap, tst, udiv, udivl, uint2fp, umod, umodl,
+unknown, vadd, vaddsub, vfadd, vfaddsub, vfbflyl, vfbflym, vfdiv,
+vfexch, vfext, vfins, vfmul, vfpackl, vfpackm, vfrep, vfsub, vfsubadd,
+vfunpkl, vfunpkm, vmac2h, vmpy2h, vpack, vsub, vsubadd, xbfu, xor,
+xorl"
+ (const_string "unknown"))
+
+(define_attr "iscompact" "yes,no,maybe" (const_string "no"))
+
+(define_attr "predicable" "yes,no" (const_string "no"))
+
+(define_attr "length" ""
+ (cond
+ [(eq_attr "iscompact" "yes")
+ (const_int 2)
+
+ (eq_attr "type" "ld")
+ (if_then_else
+ (match_operand 1 "limm_ldst_operand" "")
+ (const_int 8) (const_int 4))
+
+ (eq_attr "type" "st")
+ (if_then_else
+ (ior (match_operand 0 "limm_ldst_operand" "")
+ (and (not (match_operand 1 "S06S0_immediate_operand" ""))
+ (match_operand 1 "immediate_operand" "")))
+ (const_int 8) (const_int 4))
+
+ (eq_attr "type" "bl")
+ (if_then_else
+ (ior (match_operand 0 "plt34_symbol_p" "")
+ (match_operand 1 "plt34_symbol_p" ""))
+ (const_int 6) (const_int 4))
+
+ (eq_attr "iscompact" "maybe")
+ (cond
+ [(match_test "GET_CODE (PATTERN (insn)) == COND_EXEC")
+ (const_int 4)
+
+ (eq_attr "type" "and")
+ (const_int 2)
+
+ (eq_attr "type" "or")
+ (const_int 2)
+
+ (match_operand:DI 0 "" "")
+ (const_int 4)
+ ]
+ (const_int 2))
+ ]
+ (const_int 8)))
+
+;; Select various CPU features.
+(define_attr "cpu_facility" "std,cd,ncd"
+ (const_string "std"))
+
+(define_attr "enabled" "no,yes"
+ (cond [(and (eq_attr "cpu_facility" "cd")
+ (not (match_test ("TARGET_CODE_DENSITY"))))
+ (const_string "no")
+ (and (eq_attr "cpu_facility" "ncd")
+ (match_test ("TARGET_CODE_DENSITY")))
+ (const_string "no")
+ ]
+ (const_string "yes")))
+
+;; -------------------------------------------------------------------
+;; Delay slots
+;; -------------------------------------------------------------------
+
+;; Define what can go in a delay slot, generic.
+(define_attr "slottable" "false,true"
+ (cond
+ [(eq_attr "type" "jump,branch,jl,bl,bi,branchcc,dbnz,return,bbit,brcc")
+ (const_string "false")
+
+ (eq_attr "length" "2,4")
+ (const_string "true")
+ ]
+ (const_string "false")))
+
+;; Define what can go in a call delay slot.
+(define_attr "call_slottable" "false,true"
+ (cond
+ [(eq_attr "slottable" "false")
+ (const_string "false")
+
+ (match_test "regno_clobbered_p (BLINK_REGNUM, insn, Pmode, 1)")
+ (const_string "false")
+ ]
+ (const_string "true")))
+
+;; Calls delay slots
+(define_delay (and (eq_attr "type" "jl,bl,return")
+ (eq_attr "length" "2,4,8"))
+ [(eq_attr "call_slottable" "true") (nil) (nil)])
+
+;; Jumps delay slots
+(define_delay (ior (eq_attr "type" "jump,branch,branchcc,dbnz,bbit")
+;; Accordingly to PRM jumps with LIMM and delay slots are illegal.
+ (and (eq_attr "type" "brcc")
+ (eq_attr "length" "4,12")))
+ [(eq_attr "slottable" "true") (nil) (nil)])
+
+;; Is there an instruction that we are actually putting into the delay
+;; slot? N.B. Until after delay slot filler consider full insn size.
+;; This is required for computing a correct loop body size.
+(define_attr "delay_slot_filled" "no,yes"
+ (cond [(match_test "!crtl->dbr_scheduled_p")
+ (const_string "yes")
+ (match_test "NEXT_INSN (PREV_INSN (insn)) == insn")
+ (const_string "no")
+ (match_test "JUMP_P (insn)
+ && INSN_ANNULLED_BRANCH_P (insn)
+ && !INSN_FROM_TARGET_P (NEXT_INSN (insn))")
+ (const_string "no")]
+ (const_string "yes")))
+
+(define_attr "delay_slot_length" ""
+ (cond [(match_test "NEXT_INSN (PREV_INSN (insn)) == insn")
+ (const_int 0)]
+ (symbol_ref "get_attr_length (NEXT_INSN (PREV_INSN (insn)))
+ - get_attr_length (insn)")))
+
+;; -------------------------------------------------------------------
+;; Pipeline descriptions and scheduling
+;; -------------------------------------------------------------------
+
+(include "hs6x.md")
+
+;; -------------------------------------------------------------------
+;; Moves
+;; -------------------------------------------------------------------
+
+(define_expand "mov"
+ [(set (match_operand:MV_ALLI 0 "nonimmediate_operand")
+ (match_operand:MV_ALLI 1 "general_operand"))]
+ ""
+ "
+ if (arc64_prepare_move_operands (operands[0], operands[1], mode))
+ DONE;
+ "
+ )
+
+(define_expand "movti"
+ [(set (match_operand:TI 0 "nonimmediate_operand")
+ (match_operand:TI 1 "general_operand"))]
+ "TARGET_WIDE_LDST"
+ {
+ if (CONSTANT_P (operands[1]))
+ {
+ emit_move_insn (gen_lowpart (DImode, operands[0]),
+ gen_lowpart (DImode, operands[1]));
+ emit_move_insn (gen_highpart (DImode, operands[0]),
+ gen_highpart_mode (DImode, TImode, operands[1]));
+ DONE;
+ }
+ else if (!register_operand (operands[0], TImode)
+ && !register_operand (operands[1], TImode))
+ operands[1] = force_reg (TImode, operands[1]);
+ arc64_prepare_move_operands (operands[0], operands[1], TImode);
+ DONE;
+
+ })
+
+;; We use movsf for soft and hard floats.
+(define_expand "movsf"
+ [(set (match_operand:SF 0 "nonimmediate_operand" "")
+ (match_operand:SF 1 "general_operand"))]
+ ""
+ {
+ if (arc64_prepare_move_operands (operands[0], operands[1], SFmode))
+ DONE;
+ })
+
+(define_expand "movhf"
+ [(set (match_operand:HF 0 "nonimmediate_operand" "")
+ (match_operand:HF 1 "general_operand"))]
+ "ARC64_HAS_FPUH"
+ {
+ if (arc64_prepare_move_operands (operands[0], operands[1], HFmode))
+ DONE;
+ })
+
+(define_expand "movdf"
+ [(set (match_operand:DF 0 "nonimmediate_operand" "")
+ (match_operand:DF 1 "general_operand"))]
+ "ARC64_HAS_FPUD"
+ {
+ if (arc64_prepare_move_operands (operands[0], operands[1], DFmode))
+ DONE;
+ })
+
+;; mov<.f> b, c
+;; mov<.f> b, s12
+;; mov_s b, u8
+;; mov_s g, h
+;; mov_s h, s3
+;;
+;; ld a, [b, s9]
+;; ld a, [b, c]
+;; ld a, [limm ]
+;;
+;; ldb_s a, [b, c]
+;; ldb_s c, [b, u5]
+;;
+;; st c , [b , s9]
+;; st limm, [b , s9]
+;; stb_s b , [sp, u7]
+;; stb_s c , [b , u5]
+(define_insn "*arc64_movqi"
+ [(set
+ (match_operand:QI 0 "arc64_dest_operand" "=qh, q, r, q,Ustms,Ustor,Ucnst, r,Ustor")
+ (match_operand:QI 1 "general_operand" " qhS03MV,U08S0,ri,Uldms, q,S06S0, i, m, r"))
+ ]
+ ; in general, at least one of the operands must be a register
+ "register_operand (operands[0], QImode)
+ || register_operand (operands[1], QImode)
+ /* this is to match 'stb w6, [limm]' (S06S0 is the w6). */
+ || (satisfies_constraint_S06S0 (operands[1])
+ && memory_operand (operands[0], QImode))
+ /* writing a byte into memory using limm variant. */
+ || (immediate_operand (operands[1], QImode)
+ && memory_operand (operands[0], QImode))"
+ "@
+ mov_s\\t%0,%1
+ mov_s\\t%0,%1
+ mov\\t%0,%1
+ ldb_s\\t%0,%1
+ stb_s\\t%1,%0
+ stb%U0\\t%1,%0
+ stb%U0\\t%1,%0
+ ldb%U1\\t%0,%1
+ stb%U0\\t%1,%0"
+ [(set_attr "type" "move,move,move,ld,st,st,st,ld,st")
+ (set_attr "length" "2,2,4,2,2,*,8,*,*")]
+)
+
+(define_insn "*arc64_movhi"
+ [(set
+ (match_operand:HI 0 "arc64_dest_operand" "=qh,r, q, r,h,r, q,Ustms,Ustw6,Ucnst, r,Ustor")
+ (match_operand:HI 1 "general_operand" "qhS03MV,r,U08S0,S12S0,i,i,Uldms, q,S06S0, i, m, r"))
+ ]
+ "register_operand (operands[0], HImode)
+ || register_operand (operands[1], HImode)
+ || (satisfies_constraint_S06S0 (operands[1])
+ && memory_operand (operands[0], HImode))
+ || (CONST_INT_P (operands[1])
+ && satisfies_constraint_Ucnst (operands[0]))"
+ "@
+ mov_s\\t%0,%1
+ mov\\t%0,%1
+ mov_s\\t%0,%1
+ mov\\t%0,%1
+ mov_s\\t%0,%1
+ mov\\t%0,%1
+ ldh_s\\t%0,%1
+ sth_s\\t%1,%0
+ sth%U0\\t%1,%0
+ sth%U0\\t%1,%0
+ ldh%U1\\t%0,%1
+ sth%U0\\t%1,%0"
+ [(set_attr "type" "move,move,move,move,move,move,ld,st,st,st,ld,st")
+ (set_attr "length" "2,4,2,4,6,8,2,2,*,8,*,*")]
+)
+
+(define_insn "*arc64_movsi"
+ [(set
+ (match_operand:SI 0 "arc64_dest_operand" "=qh,r, q, r, r,h,r, q,Ustms,Ustor,Ucnst, r,Ustor")
+ (match_operand:SI 1 "arc64_movl_operand" "qhS03MV,r,U08S0,S12S0,SyPic,i,i,Uldms, q,S06S0, i, m, r"))
+ ]
+ "register_operand (operands[0], SImode)
+ || register_operand (operands[1], SImode)
+ || (satisfies_constraint_S06S0 (operands[1])
+ && memory_operand (operands[0], SImode))
+ || (CONST_INT_P (operands[1])
+ && satisfies_constraint_Ucnst (operands[0]))"
+ "@
+ mov_s\\t%0,%1
+ mov\\t%0,%1
+ mov_s\\t%0,%1
+ mov\\t%0,%1
+ add\\t%0,pcl,%1
+ mov_s\\t%0,%1
+ mov\\t%0,%1
+ ld_s\\t%0,%1
+ st_s\\t%1,%0
+ st%U0\\t%1,%0
+ st%U0\\t%1,%0
+ ld%U1\\t%0,%1
+ st%U0\\t%1,%0"
+ [(set_attr "type" "move,move,move,move,add,move,move,ld,st,st,st,ld,st")
+ (set_attr "length" "2,4,2,4,8,6,8,2,2,*,8,*,*")]
+)
+
+(define_insn "*mov_cmp0"
+ [(set (reg:CC_ZN CC_REGNUM)
+ (compare:CC_ZN (match_operand:ALLI 1 "nonmemory_operand" "S12S0r,S32S0")
+ (const_int 0)))
+ (set (match_operand:ALLI 0 "register_operand" "=r,r") (match_dup 1))]
+ ""
+ "mov.f\\t%0,%1"
+ [(set_attr "type" "move")
+ (set_attr "length" "4,8")])
+
+;; Softcore float move.
+(define_insn "*movsf_softfp"
+ [(set (match_operand:SF 0 "arc64_dest_operand" "=qh,r,qh,r, q,Ustms,r,Ustor")
+ (match_operand:SF 1 "general_operand" "qhZ,r, E,E,Uldms, q,m,r"))
+ ]
+ "!ARC64_HAS_FP_BASE
+ && (register_operand (operands[0], SFmode)
+ || register_operand (operands[1], SFmode))"
+ "@
+ mov_s\\t%0,%1
+ mov\\t%0,%1
+ mov_s\\t%0,%1
+ mov\\t%0,%1
+ ld_s\\t%0,%1
+ st_s\\t%1,%0
+ ld%U1\\t%0,%1
+ st%U0\\t%1,%0"
+ [(set_attr "type" "move,move,move,move,ld,st,ld,st")
+ (set_attr "length" "2,4,6,8,2,2,*,*")])
+
+;; For a fp move I use FSMOV. instruction. However, we can also
+;; use FSSGNJ.
+;; FIXME! add short instruction selection
+(define_insn "*mov_hardfp"
+ [(set (match_operand:GPF_HF 0 "arc64_dest_operand" "=w, w,Ufpms,*r,*w,*r,*r,*r,*Ustor")
+ (match_operand:GPF_HF 1 "arc64_movf_operand" "w,Ufpms, w,*w,*r,*r,*G,*m, *r"))]
+ "ARC64_HAS_FP_BASE
+ && (register_operand (operands[0], mode)
+ || register_operand (operands[1], mode))"
+ "@
+ fmov\\t%0,%1
+ fld%U1\\t%0,%1
+ fst%U0\\t%1,%0
+ fmv2\\t%0,%1
+ fmv2\\t%0,%1
+ mov\\t%0,%1
+ mov\\t%0,%1
+ ld%U1\\t%0,%1
+ st%U0\\t%1,%0"
+ [(set_attr "type" "fmov,ld,st,move,move,move,move,ld,st")
+ (set_attr "length" "4,*,*,4,4,4,8,*,*")])
+
+;; move 128bit
+(define_insn_and_split "*arc64_movti"
+ [(set (match_operand:TI 0 "arc64_dest_operand" "=r,r,Ustor")
+ (match_operand:TI 1 "nonimmediate_operand" "r,m,r"))]
+ "TARGET_WIDE_LDST
+ && (register_operand (operands[0], TImode)
+ || register_operand (operands[1], TImode))"
+ "@
+ #
+ lddl%U1\\t%0,%1
+ stdl%U0\\t%1,%0"
+ "&& reload_completed
+ && arc64_split_double_move_p (operands, TImode)"
+ [(const_int 0)]
+ {
+ arc64_split_double_move (operands, TImode);
+ DONE;
+ }
+ [(set_attr "type" "move,ld,st")
+ (set_attr "length" "8,*,*")])
+;;
+;; Short insns: movl_s g,h; movl_s b,u8
+;; Long insns: movl, stl, ldl
+;;
+(define_insn "*arc64_movdi"
+ [(set (match_operand:DI 0 "arc64_dest_operand" "=qh, q,r, r, r, r,Ucnst, r,r,Ustk<,Ustor")
+ (match_operand:DI 1 "arc64_movl_operand" "qh,U08S0,r,S12S0,S32S0SymMV,SyPic,S32S0,Ustk>,m, r, r"))]
+ "TARGET_64BIT
+ && (register_operand (operands[0], DImode)
+ || register_operand (operands[1], DImode)
+ || (CONST_INT_P (operands[1])
+ && satisfies_constraint_Ucnst (operands[0])))"
+ "@
+ movl_s\\t%0,%1
+ movl_s\\t%0,%1
+ movl\\t%0,%1
+ movl\\t%0,%1
+ movl\\t%0,%1
+ addl\\t%0,pcl,%1
+ stl%U0\\t%1,%0
+ popl_s\\t%0
+ ldl%U1\\t%0,%1
+ pushl_s\\t%1
+ stl%U0\\t%1,%0"
+ [(set_attr "type" "move,move,move,move,move,addl,st,ld,ld,st,st")
+ (set_attr "length" "2,2,4,4,8,8,8,2,*,2,*")]
+)
+
+;; Hi/Low moves for constant and symbol loading.
+
+(define_insn "*movdi_high"
+ [(set (match_operand:DI 0 "register_operand" "= r, qh, r,r")
+ (high:DI
+ (match_operand:DI 1 "arc64_immediate_or_pic" "S12S0,SymIm,SymIm,SyPic")))]
+ ""
+ "@
+ movhl\\t%0,%H1
+ movhl_s\\t%0,%H1
+ movhl\\t%0,%H1
+ addhl\\t%0,pcl,%H1"
+ [(set_attr "type" "move")
+ (set_attr "length" "4,6,8,8")])
+
+;; The immediates are already trimmed to fit the 32 bit limm field.
+(define_insn "*movh_shift"
+ [(set (match_operand:DI 0 "register_operand" "= r, qh, r")
+ (ashift:DI (match_operand:DI 1 "nonmemory_operand" "rS12S0,S32S0,S32S0")
+ (const_int 32)))]
+ ""
+ "@
+ movhl\\t%0,%1
+ movhl_s\\t%0,%1
+ movhl\\t%0,%1"
+ [(set_attr "type" "move")
+ (set_attr "length" "4,6,8")])
+
+;; N.B. All immediates needs to be unsiged to endup at most in u32.
+(define_insn "*movdi_lo_sum_iori"
+ [(set (match_operand:DI 0 "register_operand" "=q, r, h, r")
+ (lo_sum:DI (match_operand:DI 1 "register_operand" "0, 0, 0, r")
+ (match_operand:DI 2 "immediate_operand" "q,U10S0,SymIm,SymIm")))]
+ ""
+ "@
+ orl%?\\t%0,%1,%2
+ orl%?\\t%0,%1,%L2
+ orl%?\\t%0,%1,%L2
+ orl%?\\t%0,%1,%L2"
+ [(set_attr "type" "or")
+ (set_attr "iscompact" "yes,no,yes,no")
+ (set_attr "length" "2,4,6,8")])
+
+(define_insn "*adddi_high"
+ [(set (match_operand:DI 0 "register_operand" "= qh, r, r,r, r")
+ (plus:DI (match_operand:DI 1 "register_operand" " 0, 0, r,r, r")
+ (high:DI
+ (match_operand:DI 2 "nonmemory_operand" "S32S0,S12S0,U06S0,r,S32S0"))))]
+ ""
+ "@
+ addhl_s\\t%0,%1,%2
+ addhl\\t%0,%1,%2
+ addhl\\t%0,%1,%2
+ addhl\\t%0,%1,%2
+ addhl\\t%0,%1,%2"
+ [(set_attr "type" "addhl")
+ (set_attr "iscompact" "yes,no,no,no,no")
+ (set_attr "length" "6,4,4,4,8")])
+
+; conditional execution patterns
+(define_insn "*mov_ce"
+ [(cond_exec
+ (match_operator 3 "arc64_comparison_operator"
+ [(match_operand 2 "cc_register" "") (const_int 0)])
+ (set (match_operand:ALLI 0 "register_operand" "= r,r")
+ (match_operand:ALLI 1 "nonmemory_operand" "rU06S0,S32S0")))]
+ ""
+ "mov.%m3\\t%0,%1"
+ [(set_attr "type" "move")
+ (set_attr "length" "4,8")])
+
+(define_insn "*mov_ce"
+ [(cond_exec
+ (match_operator 3 "arc64_comparison_operator"
+ [(match_operand 2 "cc_register" "") (const_int 0)])
+ (set (match_operand:GPF_HF 0 "register_operand" "=w,*r,*r")
+ (match_operand:GPF_HF 1 "nonmemory_operand" "w,*r,*E")))]
+ ""
+ "@
+ fmov.%m3\\t%0,%1
+ mov.%m3\\t%0,%1
+ mov.%m3\\t%0,%1"
+ [(set_attr "type" "fmov,move,move")
+ (set_attr "length" "4,4,8")])
+
+;; 0 is dst
+;; 1 is src
+;; 2 is size of copy in bytes
+;; 3 is alignment
+
+(define_expand "cpymem"
+ [(match_operand:BLK 0 "memory_operand")
+ (match_operand:BLK 1 "memory_operand")
+ (match_operand:P 2 "immediate_operand")
+ (match_operand:P 3 "immediate_operand")]
+ "!STRICT_ALIGNMENT"
+{
+ if (arc64_expand_cpymem (operands))
+ DONE;
+ FAIL;
+}
+)
+
+;; -------------------------------------------------------------------
+;; Subroutine calls and sibcalls
+;; -------------------------------------------------------------------
+
+(define_expand "call"
+ [(parallel [(call (match_operand 0 "memory_operand")
+ (match_operand 1 "general_operand"))
+ (use (match_operand 2 "" ""))
+ (clobber (reg BLINK_REGNUM))])]
+ ""
+ {
+ arc64_expand_call (NULL_RTX, operands[0], false);
+ DONE;
+ }
+)
+
+(define_insn "*call_insn"
+ [(call (mem:P (match_operand:P 0 "arc64_call_insn_operand" "q,r,BLsym,S12S0,S32S0"))
+ (match_operand 1 "" ""))
+ (clobber (reg:P BLINK_REGNUM))]
+ ""
+ "@
+ jl_s%*\\t[%0]
+ jl%*\\t[%0]
+ bl%P0%*\\t%C0
+ jl%*\\t%0
+ jl%*\\t%0"
+ [(set_attr "type" "jl,jl,bl,jl,jl")
+ (set_attr "length" "2,4,*,4,8")])
+
+(define_expand "call_value"
+ [(parallel [(set (match_operand 0 "" "")
+ (call (match_operand 1 "memory_operand")
+ (match_operand 2 "general_operand")))
+ (use (match_operand 3 "" ""))
+ (clobber (reg BLINK_REGNUM))])]
+ ""
+ "
+ {
+ arc64_expand_call (operands[0], operands[1], false);
+ DONE;
+ }"
+)
+
+(define_insn "*call_value_insn"
+ [(set (match_operand 0 "" "")
+ (call (mem:P (match_operand:P 1 "arc64_call_insn_operand"
+ "q,r,BLsym,S12S0,S32S0"))
+ (match_operand 2 "" "")))
+ (clobber (reg:P BLINK_REGNUM))]
+ ""
+ "@
+ jl_s%*\\t[%1]
+ jl%*\\t[%1]
+ bl%P1%*\\t%C1
+ jl%*\\t%1
+ jl%*\\t%1"
+ [(set_attr "type" "jl,jl,bl,jl,jl")
+ (set_attr "length" "2,4,*,4,8")])
+
+(define_expand "sibcall"
+ [(parallel [(call (match_operand 0 "memory_operand")
+ (match_operand 1 "general_operand"))
+ (return)
+ (use (match_operand 2 "" ""))])]
+ ""
+ {
+ arc64_expand_call (NULL_RTX, operands[0], true);
+ DONE;
+ }
+ )
+
+(define_expand "sibcall_value"
+ [(parallel [(set (match_operand 0 "" "")
+ (call (match_operand 1 "memory_operand")
+ (match_operand 2 "general_operand")))
+ (return)
+ (use (match_operand 3 "" ""))])]
+ ""
+ {
+ arc64_expand_call (operands[0], operands[1], true);
+ DONE;
+ }
+)
+
+;FIXME! add short variant for jump
+(define_insn "*sibcall_insn"
+ [(call
+ (mem:P
+ (match_operand:P 0 "arc64_call_insn_operand" "Sbreg,BLsym,S12S0,S32S0"))
+ (match_operand 1 "" ""))
+ (return)]
+ "SIBLING_CALL_P (insn)"
+ "@
+ j%*\\t[%0]
+ b%*\\t%C0
+ j%*\\t%0
+ j%*\\t%0"
+ [(set_attr "type" "jump,branch,jump,jump")
+ (set_attr "length" "4,4,4,8")]
+)
+
+;FIXME! add short variant for jump
+(define_insn "*sibcall_value_insn"
+ [(set (match_operand 0 "" "")
+ (call
+ (mem:P
+ (match_operand:P 1 "arc64_call_insn_operand" "Sbreg,BLsym,S12S0,S32S0"))
+ (match_operand 2 "" "")))
+ (return)]
+ "SIBLING_CALL_P (insn)"
+ "@
+ j%*\\t[%1]
+ b%*\\t%C1
+ j%*\\t%1
+ j%*\\t%1"
+ [(set_attr "type" "jump,branch,jump,jump")
+ (set_attr "length" "4,4,4,8")]
+)
+
+; conditional execution patterns
+(define_insn "*call_ce"
+ [(cond_exec
+ (match_operator 3 "arc64_comparison_operator"
+ [(match_operand 2 "cc_register" "") (const_int 0)])
+ (parallel
+ [(call (mem:P
+ (match_operand:P 0 "arc64_call_insn_operand" "r,BLsym,U06S0"))
+ (match_operand 1 "" ""))
+ (clobber (reg:P BLINK_REGNUM))]))]
+ "(arc64_cmodel_var == ARC64_CMODEL_SMALL)
+ || register_operand (operands[0], Pmode)"
+ "@
+ jl%m3%*\\t[%0]
+ bl%m3%*\\t%C0
+ jl%m3%*\\t%0"
+ [(set_attr "type" "jl,bl,jl")
+ (set_attr "length" "4")])
+
+(define_insn "*callv_ce"
+ [(cond_exec
+ (match_operator 3 "arc64_comparison_operator"
+ [(match_operand 4 "cc_register" "") (const_int 0)])
+ (parallel
+ [(set (match_operand 0 "" "")
+ (call (mem:P (match_operand:P 1 "arc64_call_insn_operand"
+ "r,BLsym,U06S0"))
+ (match_operand 2 "" "")))
+ (clobber (reg:P BLINK_REGNUM))]))]
+ "(arc64_cmodel_var == ARC64_CMODEL_SMALL)
+ || register_operand (operands[1], Pmode)"
+ "@
+ jl%m3%*\\t[%1]
+ bl%m3%*\\t%C1
+ jl%m3%*\\t%1"
+ [(set_attr "type" "jl,bl,jl")
+ (set_attr "length" "4")])
+
+(define_insn "*sibcall_insn_ce"
+ [(cond_exec
+ (match_operator 3 "arc64_comparison_operator"
+ [(match_operand 2 "cc_register" "") (const_int 0)])
+ (parallel
+ [(call (mem:P
+ (match_operand:P 0 "arc64_call_insn_operand" "Sbreg,BLsym,U06S0"))
+ (match_operand 1 "" ""))
+ (return)]))]
+ "SIBLING_CALL_P (insn)
+ && ((arc64_cmodel_var == ARC64_CMODEL_SMALL)
+ || register_operand (operands[0], Pmode))"
+ "@
+ j%m3%*\\t[%0]
+ b%m3%*\\t%C0
+ j%m3%*\\t%0"
+ [(set_attr "type" "jump,branch,jump")
+ (set_attr "length" "4")])
+
+(define_insn "*sibcall_value_insn_ce"
+ [(cond_exec
+ (match_operator 3 "arc64_comparison_operator"
+ [(match_operand 4 "cc_register" "") (const_int 0)])
+ (parallel
+ [(set (match_operand 0 "" "")
+ (call
+ (mem:P
+ (match_operand:P 1 "arc64_call_insn_operand" "Sbreg,BLsym,U06S0"))
+ (match_operand 2 "" "")))
+ (return)]))]
+ "SIBLING_CALL_P (insn)
+ && ((arc64_cmodel_var == ARC64_CMODEL_SMALL)
+ || register_operand (operands[1], Pmode))"
+ "@
+ j%m3%*\\t[%1]
+ b%m3%*\\t%C1
+ j%m3%*\\t%1"
+ [(set_attr "type" "jump,branch,jump")
+ (set_attr "length" "4")])
+
+(define_expand "untyped_call"
+ [(parallel [(call (match_operand 0 "")
+ (const_int 0))
+ (match_operand 1 "")
+ (match_operand 2 "")])]
+ ""
+{
+ int i;
+
+ emit_call_insn (gen_call (operands[0], const0_rtx, const0_rtx));
+
+ for (i = 0; i < XVECLEN (operands[2], 0); i++)
+ {
+ rtx set = XVECEXP (operands[2], 0, i);
+ emit_move_insn (SET_DEST (set), SET_SRC (set));
+ }
+
+ emit_insn (gen_blockage ());
+ DONE;
+})
+
+;; -------------------------------------------------------------------
+;; Jumps and other miscellaneous insns
+;; -------------------------------------------------------------------
+
+(define_expand "indirect_jump"
+ [(set (pc) (match_operand 0 "register_operand"))]
+ ""
+{
+ operands[0] = force_reg (Pmode, operands[0]);
+ if (Pmode == SImode)
+ emit_jump_insn (gen_indirect_jumpsi (operands[0]));
+ else
+ emit_jump_insn (gen_indirect_jumpdi (operands[0]));
+ DONE;
+})
+
+(define_insn "indirect_jump"
+ [(set (pc) (match_operand:P 0 "register_operand" "q,r"))]
+ ""
+ "j%?%*\\t[%0]"
+ [(set_attr "type" "jump")
+ (set_attr "length" "2,4")]
+)
+
+(define_insn "jump"
+ [(set (pc) (label_ref (match_operand 0 "" "")))]
+ ""
+ "b%?%*\\t%l0"
+ [(set_attr "type" "branch")
+ (set (attr "length")
+ (if_then_else
+ (and (ge (minus (match_dup 0) (pc)) (const_int -512))
+ (le (minus (match_dup 0) (pc)) (const_int 506))
+ (match_test "!CROSSING_JUMP_P (insn)")
+ (eq_attr "delay_slot_filled" "no"))
+ (const_int 2)
+ (const_int 4)))]
+)
+
+(define_expand "cbranch4"
+ [(set (pc) (if_then_else
+ (match_operator 0 "arc64_comparison_operator"
+ [(match_operand:GPI 1 "nonmemory_operand")
+ (match_operand:GPI 2 "nonmemory_operand")])
+ (label_ref (match_operand 3 "" ""))
+ (pc)))]
+ ""
+ "
+ operands[1] = arc64_gen_compare_reg (GET_CODE (operands[0]), operands[1],
+ operands[2]);
+ operands[2] = const0_rtx;
+ "
+ )
+
+(define_expand "cbranch4"
+ [(set (pc) (if_then_else (match_operator 0 "arc64_comparison_operator"
+ [(match_operand:GPF_HF 1 "register_operand")
+ (match_operand:GPF_HF 2 "register_operand")])
+ (label_ref (match_operand 3 "" ""))
+ (pc)))]
+ "ARC64_HAS_FP_BASE"
+ "
+ operands[1] = arc64_gen_compare_reg (GET_CODE (operands[0]), operands[1],
+ operands[2]);
+ operands[2] = const0_rtx;
+ "
+)
+
+(define_expand "cbranchcc4"
+ [(set (pc) (if_then_else
+ (match_operator 0 "arc64_comparison_operator"
+ [(match_operand 1 "cc_register")
+ (match_operand 2 "const0_operand")])
+ (label_ref (match_operand 3 "" ""))
+ (pc)))]
+ ""
+ "")
+
+(define_insn "condjump"
+ [(set (pc) (if_then_else
+ (match_operator 0 "arc64_comparison_operator"
+ [(match_operand 1 "cc_register" "")
+ (const_int 0)])
+ (label_ref (match_operand 2 "" ""))
+ (pc)))]
+ ""
+ "b%m0%?%*\\t%l2"
+ [(set_attr "type" "branchcc")
+ (set (attr "length")
+ (cond
+ [(eq_attr "delay_slot_filled" "yes")
+ (const_int 4)
+
+ (and (match_operand 0 "equality_comparison_operator" "")
+ (and (ge (minus (match_dup 2) (pc)) (const_int -512))
+ (le (minus (match_dup 2) (pc)) (const_int 506))))
+ (const_int 2)
+
+ (and (match_operand 0 "ccmode_comparison_operator" "")
+ (and (ge (minus (match_dup 2) (pc)) (const_int -60))
+ (le (minus (match_dup 2) (pc)) (const_int 58))))
+ (const_int 2)]
+ (const_int 4)))])
+
+(define_expand "prologue"
+ [(clobber (const_int 0))]
+ ""
+ "
+ arc64_expand_prologue ();
+ DONE;
+ "
+)
+
+(define_expand "epilogue"
+ [(clobber (const_int 0))]
+ ""
+ "
+ arc64_expand_epilogue (false);
+ DONE;
+ "
+)
+
+(define_expand "sibcall_epilogue"
+ [(clobber (const_int 0))]
+ ""
+ "
+ arc64_expand_epilogue (true);
+ DONE;
+ "
+)
+
+(define_expand "return"
+ [(simple_return)]
+ "arc64_can_use_return_insn_p ()"
+ "")
+
+(define_insn "simple_return"
+ [(simple_return)]
+ ""
+ {
+ return arc64_output_return ();
+ }
+ [(set_attr "type" "return")
+ (set_attr "length" "2")])
+
+(define_insn "trap_s"
+ [(unspec_volatile [(match_operand:SI 0 "immediate_operand" "U06S0")]
+ ARC64_VUNSPEC_TRAP_S)]
+ ""
+ "trap_s\\t%0"
+ [(set_attr "length" "2")
+ (set_attr "type" "trap")])
+
+(define_insn "trap"
+ [(trap_if (const_int 1) (const_int 0))]
+ ""
+ "trap_s\\t5"
+ [(set_attr "length" "2")
+ (set_attr "type" "trap")])
+
+(define_insn "nop"
+ [(const_int 0)]
+ ""
+ "nop_s"
+ [(set_attr "type" "nop")
+ (set_attr "length" "2")])
+
+(define_insn "blockage"
+ [(unspec_volatile [(const_int 0)] ARC64_VUNSPEC_BLOCKAGE)]
+ ""
+ ""
+ [(set_attr "length" "0")
+ (set_attr "type" "block")]
+ )
+
+(define_insn "rtie"
+ [(return)
+ (unspec_volatile [(const_int 0)] ARC64_VUNSPEC_RTIE)]
+ ""
+ "rtie"
+ [(set_attr "length" "4")
+ (set_attr "type" "rtie")]
+ )
+
+;; Don't need initialization instructions.
+(define_expand "doloop_begin"
+ [(use (match_operand 0 "" "")) ; loop pseudo
+ (use (match_operand 1 "" ""))] ; doloop_end pattern
+ ""
+ {
+ FAIL;
+ }
+)
+
+; operand 0 is the loop count pseudo register
+; operand 1 is the label to jump to at the top of the loop
+(define_expand "doloop_end"
+ [(use (match_operand 0 "" "")) ; loop pseudo
+ (use (match_operand 1 "" ""))] ; doloop_end pattern
+ ""
+ {
+ machine_mode mode = GET_MODE (operands[0]);
+ if (mode != Pmode)
+ FAIL;
+
+ operands[0] = force_reg (Pmode, operands[0]);
+
+ if (mode == SImode)
+ emit_jump_insn (gen_dbnzsi (operands[0], operands[1]));
+ else
+ emit_jump_insn (gen_dbnzdi (operands[0], operands[1]));
+ DONE;
+ })
+
+(define_insn_and_split "dbnz"
+ [(set (pc)
+ (if_then_else
+ (ne (match_operand:P 0 "arc64_dest_operand" "+r,!Ustor")
+ (const_int 1))
+ (label_ref (match_operand 1 "" ""))
+ (pc)))
+ (set (match_dup 0)
+ (plus:P (match_dup 0)
+ (const_int -1)))
+ (clobber (match_scratch:P 2 "=X,r"))]
+ ""
+ "*
+{
+ switch (which_alternative)
+ {
+ default:
+ return \"#\";
+
+ case 0:
+ switch (get_attr_length (insn))
+ {
+ case 4:
+ /* This is the normal case. */
+ return \"dbnz%*\\t%0,%l1\";
+
+ case 8:
+ /* The dbnz is too short, use sub.f/bne instructions. */
+ return \"sub.f\\t%0,%0,1\\n\\tbne%*\\t%l1\";
+
+ default:
+ gcc_unreachable ();
+ }
+ break;
+ }
+}"
+ "reload_completed && memory_operand (operands[0], Pmode)"
+ [(set (match_dup 2) (match_dup 0))
+ (parallel
+ [(set (reg:CC_ZN CC_REGNUM)
+ (compare:CC_ZN (plus:P (match_dup 2) (const_int -1))
+ (const_int 0)))
+ (set (match_dup 2) (plus:P (match_dup 2) (const_int -1)))])
+ (set (match_dup 0) (match_dup 2))
+ (set (pc) (if_then_else (ne (reg:CC_ZN CC_REGNUM)
+ (const_int 0))
+ (label_ref (match_dup 1))
+ (pc)))]
+ ""
+ [(set_attr "type" "dbnz")
+ (set (attr "length")
+ (cond [(eq_attr "alternative" "1")
+ (const_int 20)
+ (and (eq_attr "alternative" "0")
+ (ge (minus (match_dup 1) (pc)) (const_int -4092))
+ (le (minus (match_dup 1) (pc))
+ (minus (const_int 4094)
+ (symbol_ref "get_attr_delay_slot_length (insn)"))))
+ (const_int 4)]
+ (const_int 8)))])
+
+; conditional execution
+(define_insn "*returnt_ce"
+ [(set (pc)
+ (if_then_else (match_operator 0 "arc64_comparison_operator"
+ [(reg CC_REGNUM) (const_int 0)])
+ (simple_return) (pc)))]
+ ""
+ "j%m0%*\\t[blink]"
+ [(set_attr "type" "return")
+ (set_attr "length" "4")])
+
+; Jump tables
+(define_expand "casesi"
+ [(match_operand:SI 0 "register_operand" "") ; Index
+ (match_operand:SI 1 "const_int_operand" "") ; Lower bound
+ (match_operand:SI 2 "const_int_operand" "") ; Total range
+ (match_operand 3 "" "") ; Table label
+ (match_operand 4 "" "")] ; Out of range label
+ ""
+ {
+ arc64_expand_casesi (operands);
+ DONE;
+ })
+
+(define_insn "casesi_dispatch"
+ [(set (pc)
+ (unspec:DI [(match_operand:SI 0 "register_operand" "r,q,r")
+ (label_ref (match_operand 1 "" ""))
+ (const_int 0)]
+ ARC64_UNSPEC_CASESI))]
+ ""
+ "@
+ bi\\t[%0]
+ j_s%*\\t[%0]
+ j%*\\t[%0]"
+ [(set_attr "type" "bi,jump,jump")
+ (set_attr "length" "4,2,4")
+ (set_attr "cpu_facility" "cd,ncd,ncd")])
+
+(define_insn "casesi_addaddr"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (unspec:SI [(match_operand:SI 1 "register_operand" "r")
+ (label_ref (match_operand 2 "" ""))
+ (const_int 1)]
+ ARC64_UNSPEC_CASESI))]
+ ""
+ "add2\\t%0,%l2,%1"
+ [(set_attr "type" "add")
+ (set_attr "length" "8")])
+
+(define_insn "casesi_addaddrdi"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (unspec:DI [(match_operand:SI 1 "register_operand" "r")
+ (match_operand:DI 2 "register_operand" "r")
+ (const_int 2)]
+ ARC64_UNSPEC_CASESI))]
+ ""
+ "add2l\\t%0,%2,%1"
+ [(set_attr "type" "addl")
+ (set_attr "length" "4")])
+
+(define_insn "casesi_dispatchdi"
+ [(set (pc) (match_operand:DI 0 "register_operand" "q,r"))
+ (use (label_ref (match_operand 1 "" "")))]
+ ""
+ "j%?%*\\t[%0]"
+ [(set_attr "type" "jump")
+ (set_attr "length" "2,4")])
+
+;; combiner patterns used to match bbit0/1 instructions.
+;; Unfortunately, I cannot use splitting for this pattern as the
+;; insn length is know very late during compilation process.
+(define_insn "*bbit_and"
+ [(set (pc)
+ (if_then_else
+ (match_operator 3 "equality_comparison_operator"
+ [(and:GPI
+ (match_operand:GPI 1 "register_operand" "r")
+ (match_operand 2 "bbitimm_operand" ""))
+ (const_int 0)])
+ (label_ref (match_operand 0 "" ""))
+ (pc)))
+ (clobber (reg:CC_ZN CC_REGNUM))]
+ "!CROSSING_JUMP_P (insn) && (TARGET_BBIT || reload_completed)"
+ {
+ operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2])));
+ switch (get_attr_length (insn))
+ {
+ case 4:
+ return (GET_CODE (operands[3]) == EQ
+ ? \"bbit0%*\\t%1,%2,%l0\" : \"bbit1%*\\t%1,%2,%l0\");
+ default:
+ return \"btst\\t%1,%2\\n\\tb%m3%*\\t%l0\";
+ }
+ }
+ [(set_attr "type" "bbit")
+ (set (attr "length")
+ (if_then_else
+ (and (ge (minus (match_dup 0) (pc)) (const_int -254))
+ (le (minus (match_dup 0) (pc))
+ (minus (const_int 248)
+ (symbol_ref "get_attr_delay_slot_length (insn)"))))
+ (const_int 4)
+ (const_int 8)))])
+
+;; BBITx instructions need to be generated as late as possible.
+;; Hence, we need to postpone it untill 2nd peephole2 step. However,
+;; this may need an upstream change.
+
+;;(define_peephole2
+;; [(set (match_operand 0 "cc_register")
+;; (compare:CC_ZN (and:GPI (match_operand:GPI 1 "register_operand" "")
+;; (match_operand 2 "bbitimm_operand" ""))
+;; (const_int 0)))
+;; (set (pc) (if_then_else
+;; (match_operator 3 "equality_comparison_operator"
+;; [(match_dup 0) (const_int 0)])
+;; (label_ref (match_operand 4 "" ""))
+;; (pc)))]
+;; "(peephole2_instance == 1) && peep2_reg_dead_p (2, operands[0])"
+;; [(parallel
+;; [(set (pc)
+;; (if_then_else
+;; (match_op_dup 3 [(and:GPI (match_dup 1) (match_dup 2))
+;; (const_int 0)])
+;; (label_ref (match_operand 4 "" ""))
+;; (pc)))
+;; (clobber (reg:CC_ZN CC_REGNUM))])])
+
+(define_insn "*bbit_zext"
+ [(set (pc)
+ (if_then_else
+ (match_operator 3 "equality_comparison_operator"
+ [(zero_extract:GPI
+ (match_operand:GPI 1 "register_operand" "r")
+ (const_int 1)
+ (match_operand:GPI 2 "nonmemory_operand" "ir"))
+ (const_int 0)])
+ (label_ref (match_operand 0 "" ""))
+ (pc)))
+ (clobber (reg:CC_ZN CC_REGNUM))]
+ "!CROSSING_JUMP_P (insn) && (TARGET_BBIT || reload_completed)"
+ {
+ switch (get_attr_length (insn))
+ {
+ case 4:
+ return (GET_CODE (operands[3]) == EQ
+ ? \"bbit0%*\\t%1,%2,%l0\" : \"bbit1%*\\t%1,%2,%l0\");
+ default:
+ return \"btst\\t%1,%2\\n\\tb%m3%*\\t%l0\";
+ }
+ }
+ [(set_attr "type" "bbit")
+ (set (attr "length")
+ (if_then_else
+ (and (ge (minus (match_dup 0) (pc)) (const_int -254))
+ (le (minus (match_dup 0) (pc))
+ (minus (const_int 248)
+ (symbol_ref "get_attr_delay_slot_length (insn)"))))
+ (const_int 4)
+ (const_int 8)))])
+
+;;(define_peephole2
+;; [(set (match_operand 0 "cc_register")
+;; (compare:CC_ZN (zero_extract:GPI
+;; (match_operand:GPI 1 "register_operand" "")
+;; (const_int 1)
+;; (match_operand:GPI 2 "nonmemory_operand" ""))
+;; (const_int 0)))
+;; (set (pc) (if_then_else
+;; (match_operator 3 "equality_comparison_operator"
+;; [(match_dup 0) (const_int 0)])
+;; (label_ref (match_operand 4 "" ""))
+;; (pc)))]
+;; "(peephole2_instance == 1) && peep2_reg_dead_p (2, operands[0])"
+;; [(parallel
+;; [(set (pc)
+;; (if_then_else
+;; (match_op_dup 3 [(zero_extract:GPI
+;; (match_dup 1) (const_int 1) (match_dup 2))
+;; (const_int 0)])
+;; (label_ref (match_operand 4 "" ""))
+;; (pc)))
+;; (clobber (reg:CC_ZN CC_REGNUM))])])
+
+;; combiner/instruction pattern for BRcc instructions. We consider
+;; all BRcc supported comparisons but compare with zero. The positive
+;; range needs to take into account the limm size, and the pcl
+;; rounding. This pattern is under an option as it may prohibit
+;; further optimizations like if-conversion.
+(define_insn "*brcc"
+ [(set (pc)
+ (if_then_else
+ (match_operator 3 "brcc_comparison_operator"
+ [(match_operand:GPI 1 "register_operand" "q, r,r")
+ (match_operand:GPI 2 "nonmemory_operand" "U0000,U06S0r,S32S0")])
+ (label_ref (match_operand 0 "" ""))
+ (pc)))
+ (clobber (reg:CC CC_REGNUM))]
+ "!CROSSING_JUMP_P (insn) && (TARGET_BRCC || reload_completed)"
+ {
+ switch (get_attr_length (insn))
+ {
+ case 2:
+ return \"br%m3_s\\t%1,%2,%l0\";
+ case 4:
+ case 8:
+ return \"br%m3%*\\t%1,%2,%l0\";
+ default:
+ return \"cmp\\t%1,%2\\n\\tb%m3%*\\t%l0\";
+ }
+ }
+ [(set_attr "type" "brcc")
+ (set (attr "length")
+ (cond [(and (match_operand 3 "equality_comparison_operator" "")
+ (ge (minus (match_dup 0) (pc)) (const_int -126))
+ (le (minus (match_dup 0) (pc)) (const_int 122))
+ (eq (symbol_ref "which_alternative") (const_int 0))
+ ;; no delay slot for short version.
+ (eq_attr "delay_slot_filled" "no")
+ (ior (and (match_operand:DI 1 "" "")
+ (match_test "TARGET_64BIT"))
+ (and (match_operand:SI 1 "" "")
+ (match_test "!TARGET_64BIT"))))
+ (const_int 2)
+ (and (ge (minus (match_dup 0) (pc)) (const_int -254))
+ (le (minus (match_dup 0) (pc)) (const_int 244))
+ (ior (eq (symbol_ref "which_alternative") (const_int 0))
+ (eq (symbol_ref "which_alternative") (const_int 1))))
+ (const_int 4)
+ (and (ge (minus (match_dup 0) (pc)) (const_int -254))
+ (le (minus (match_dup 0) (pc)) (const_int 244))
+ (eq_attr "delay_slot_filled" "no")
+ (eq (symbol_ref "which_alternative") (const_int 2)))
+ (const_int 8)
+ ;; This should be variable as well...
+ (eq (symbol_ref "which_alternative") (const_int 1))
+ (const_int 12)]
+ (const_int 12)))
+ ])
+
+;; BRcc is not complete, emulate missing variants:
+;; brgt rb,rc,label => brlt rc,rb,label
+;; brgt rb,u6,label => brge rb,u6+1,label
+;; brhi rb,rc,label => brlo rc,rb,label
+;; brhi rb,u6,label => brhs rb,u6+1,label
+;; brle rb,rc,label => brge rc,rb,label
+;; brle rb,u6,label => brlt rb,u6+1,label
+;; brls rb,rc,label => brhs rc,rb,label
+;; brls rb,u6,label => brlo rb,u6+1,label
+(define_insn "*emu_brcc"
+ [(set (pc)
+ (if_then_else
+ (match_operator 3 "ebrcc_comparison_operator"
+ [(match_operand:GPI 1 "register_operand" "r,r,r")
+ (match_operand:GPI 2 "arc64_nonmem_operand" "U06M1,r,n")])
+ (label_ref (match_operand 0 "" ""))
+ (pc)))
+ (clobber (reg:CC CC_REGNUM))]
+ "!CROSSING_JUMP_P (insn) && reload_completed"
+ {
+ switch (get_attr_length (insn))
+ {
+ case 4:
+ case 8:
+ if (which_alternative == 0)
+ {
+ return \"br%w3%*\\t%1,%2 + 1,%l0\";
+ }
+ return \"br%W3%*\\t%2,%1,%l0\";
+ default:
+ return \"cmp\\t%1,%2\\n\\tb%m3%*\\t%l0\";
+ }
+ }
+ [(set_attr "type" "brcc")
+ (set (attr "length")
+ (cond [(and (ge (minus (match_dup 0) (pc)) (const_int -254))
+ (le (minus (match_dup 0) (pc)) (const_int 244))
+ (ior (eq (symbol_ref "which_alternative") (const_int 0))
+ (eq (symbol_ref "which_alternative") (const_int 1))))
+ (const_int 4)
+ (and (ge (minus (match_dup 0) (pc)) (const_int -254))
+ (le (minus (match_dup 0) (pc)) (const_int 244))
+ (eq_attr "delay_slot_filled" "no")
+ (eq (symbol_ref "which_alternative") (const_int 2)))
+ (const_int 8)]
+ (const_int 12)))
+ ])
+
+;; Peephole pattern for matching BRcc instructions.
+(define_peephole2
+ [(set (match_operand 0 "cc_register")
+ (compare:CC (match_operand:GPI 1 "register_operand")
+ (match_operand:GPI 2 "nonmemory_operand")))
+ (set (pc) (if_then_else
+ (match_operator 3 "arc64_comparison_operator"
+ [(match_dup 0) (const_int 0)])
+ (label_ref (match_operand 4 ""))
+ (pc)))]
+ "peep2_reg_dead_p (2, operands[0])"
+ [(parallel [(set (pc)
+ (if_then_else
+ (match_op_dup 3 [(match_dup 1) (match_dup 2)])
+ (label_ref (match_dup 4))
+ (pc)))
+ (clobber (reg:CC CC_REGNUM))])])
+
+;; Similar like the one above.
+(define_peephole2
+ [(set (match_operand 0 "cc_register")
+ (compare:CC_ZN (match_operand:GPI 1 "register_operand")
+ (const_int 0)))
+ (set (pc) (if_then_else
+ (match_operator 2 "brcc_comparison_operator"
+ [(match_dup 0) (const_int 0)])
+ (label_ref (match_operand 3 ""))
+ (pc)))]
+ "peep2_reg_dead_p (2, operands[0])"
+ [(parallel [(set (pc)
+ (if_then_else
+ (match_op_dup 2 [(match_dup 1) (const_int 0)])
+ (label_ref (match_dup 3))
+ (pc)))
+ (clobber (reg:CC CC_REGNUM))])])
+
+;; -------------------------------------------------------------------
+;; Sign/Zero extension
+;; -------------------------------------------------------------------
+
+(define_expand "sidi2"
+ [(set (match_operand:DI 0 "register_operand")
+ (ANY_EXTEND:DI (match_operand:SI 1 "nonimmediate_operand")))]
+ "TARGET_64BIT"
+)
+
+(define_expand "2"
+ [(set (match_operand:GPI 0 "register_operand")
+ (ANY_EXTEND:GPI (match_operand:SHORT 1 "nonimmediate_operand")))]
+ ""
+)
+
+;; TODO: Commented out this to fix issues in dejagnu.
+;; NEEDS TO BE VERIFIED LATER ON.
+;; (define_expand "qihi2"
+;; [(set (match_operand:HI 0 "register_operand")
+;; (ANY_EXTEND:HI (match_operand:QI 1 "nonimmediate_operand")))]
+;; ""
+;; )
+
+(define_insn "*zero_extendsi2"
+ [(set (match_operand:SI 0 "register_operand" "=q,r, q,r")
+ (zero_extend:SI
+ (match_operand:SHORT 1 "nonimmediate_operand" "q,r,Uldms,m")))]
+ ""
+ "@
+ ext_s\\t%0,%1
+ ext\\t%0,%1
+ ld_s\\t%0,%1
+ ld%U1\\t%0,%1"
+ [(set_attr "type" "sex,sex,ld,ld")
+ (set_attr "length" "2,4,2,*")])
+
+(define_insn "*zero_extenddi2"
+ [(set (match_operand:DI 0 "register_operand" "=r, q,r")
+ (zero_extend:DI
+ (match_operand:EXT 1 "nonimmediate_operand" "r,Uldms,m")))]
+ "TARGET_64BIT"
+ "@
+ bmskl\\t%0,%1,
+ ld_s\\t%0,%1
+ ld%U1\\t%0,%1"
+ [(set_attr "type" "and,ld,ld")
+ (set_attr "length" "4,2,*")]
+)
+
+(define_insn "*sign_extenddi2"
+ [(set (match_operand:DI 0 "register_operand" "=r,r")
+ (sign_extend:DI
+ (match_operand:EXT 1 "nonimmediate_operand" "r,m")))]
+ "((!TARGET_VOLATILE_DI) || (!MEM_VOLATILE_P (operands[1])))
+ && TARGET_64BIT"
+ "@
+ sexl\\t%0,%1
+ ld.x%U1\\t%0,%1"
+ [(set_attr "type" "sex,ld")
+ (set_attr "length" "4,*")])
+
+(define_insn "*sign_extendsi2"
+ [(set (match_operand:SI 0 "register_operand" "=q,r,r")
+ (sign_extend:SI
+ (match_operand:SHORT 1 "nonimmediate_operand" "q,r,m")))]
+ ""
+ "@
+ sex_s\\t%0,%1
+ sex\\t%0,%1
+ ld.x%U1\\t%0,%1"
+ [(set_attr "type" "sex,sex,ld")
+ (set_attr "length" "2,4,8")])
+
+;; -------------------------------------------------------------------
+;; Simple arithmetic
+;; -------------------------------------------------------------------
+
+;; TODO: Allow symbols in LIMM field
+(define_expand "si3"
+ [(set (match_operand:SI 0 "register_operand")
+ (ADDSUB:SI (match_operand:SI 1 "register_operand")
+ (match_operand:SI 2 "nonmemory_operand")))]
+ ""
+ {
+ if (!register_operand (operands[1], SImode)
+ && !register_operand (operands[2], SImode))
+ {
+ if (!CONST_INT_P (operands[1]))
+ operands[1] = force_reg (SImode, operands[1]);
+ else
+ operands[2] = force_reg (SImode, operands[2]);
+ }
+ })
+
+(define_expand "mul3"
+ [(set (match_operand:GPI 0 "register_operand")
+ (mult:GPI (match_operand:GPI 1 "register_operand")
+ (match_operand:GPI 2 "nonmemory_operand")))]
+ ""
+ {
+ if (!register_operand (operands[2], mode)
+ && !satisfies_constraint_S32S0 (operands[2]))
+ operands[2] = force_reg (mode, operands[2]);
+ })
+
+;; The overflow patterns are tested using expensive tests and dg-torture.exp
+(define_expand "addv4"
+ [(match_operand:GPI 0 "register_operand")
+ (match_operand:GPI 1 "register_operand")
+ (match_operand:GPI 2 "register_operand")
+ (label_ref (match_operand 3 "" ""))]
+ ""
+ {
+ emit_insn (gen_add3_Vcmp (operands[0], operands[1], operands[2]));
+ arc64_gen_unlikely_cbranch (NE, CC_Vmode, operands[3]);
+ DONE;
+ })
+
+(define_insn "add3_Vcmp"
+ [(parallel
+ [(set
+ (reg:CC_V CC_REGNUM)
+ (compare:CC_V
+ (plus:
+ (sign_extend: (match_operand:GPI 1 "arc64_nonmem_operand" " 0, r,r,S32S0, r"))
+ (sign_extend: (match_operand:GPI 2 "arc64_nonmem_operand" "S12S0,U06S0,r, r,S32S0")))
+ (sign_extend: (plus:GPI (match_dup 1) (match_dup 2)))))
+ (set (match_operand:GPI 0 "register_operand" "= r, r,r, r, r")
+ (plus:GPI (match_dup 1) (match_dup 2)))])]
+ "register_operand (operands[1], mode)
+ || register_operand (operands[2], mode)"
+ "add.f\\t%0,%1,%2"
+ [(set_attr "length" "4,4,4,8,8")
+ (set_attr "type" "add")])
+
+(define_expand "uaddv4"
+ [(match_operand:GPI 0 "register_operand")
+ (match_operand:GPI 1 "register_operand")
+ (match_operand:GPI 2 "register_operand")
+ (label_ref (match_operand 3 "" ""))]
+ ""
+ {
+ emit_insn (gen_add3_Ccmp (operands[0], operands[1], operands[2]));
+ arc64_gen_unlikely_cbranch (LTU, CC_Cmode, operands[3]);
+ DONE;
+ })
+
+(define_expand "subv4"
+ [(match_operand:GPI 0 "register_operand")
+ (match_operand:GPI 1 "register_operand")
+ (match_operand:GPI 2 "register_operand")
+ (label_ref (match_operand 3 "" ""))]
+ ""
+ {
+ emit_insn (gen_sub3_Vcmp (operands[0], operands[1], operands[2]));
+ arc64_gen_unlikely_cbranch (NE, CC_Vmode, operands[3]);
+ DONE;
+ })
+
+(define_insn "sub3_Vcmp"
+ [(set
+ (reg:CC_V CC_REGNUM)
+ (compare:CC_V
+ (sign_extend:
+ (minus:GPI
+ (match_operand:GPI 1 "arc64_nonmem_operand" " 0, r,r,S32S0, r")
+ (match_operand:GPI 2 "arc64_nonmem_operand" "S12S0,U06S0,r, r,S32S0")))
+ (minus: (sign_extend: (match_dup 1))
+ (sign_extend: (match_dup 2)))))
+ (set (match_operand:GPI 0 "register_operand" "= r, r,r, r, r")
+ (minus:GPI (match_dup 1) (match_dup 2)))]
+ "register_operand (operands[1], mode)
+ || register_operand (operands[2], mode)"
+ "sub.f\\t%0,%1,%2"
+ [(set_attr "length" "4,4,4,8,8")
+ (set_attr "type" "sub")])
+
+(define_expand "negv3"
+ [(match_operand:GPI 0 "register_operand")
+ (match_operand:GPI 1 "register_operand")
+ (label_ref (match_operand 2 "" ""))]
+ ""
+ {
+ emit_insn (gen_neg2_Vcmp (operands[0], operands[1]));
+ arc64_gen_unlikely_cbranch (NE, CC_Vmode, operands[2]);
+ DONE;
+ })
+
+(define_insn "negsi2_Vcmp"
+ [(set (reg:CC_V CC_REGNUM)
+ (compare:CC_V
+ (sign_extend:DI
+ (neg:SI (match_operand:SI 1 "register_operand" "r")))
+ (neg:DI (sign_extend:DI (match_dup 1)))))
+ (set (match_operand:SI 0 "register_operand" "=r")
+ (neg:SI (match_dup 1)))]
+ ""
+ "neg.f\\t%0,%1"
+ [(set_attr "type" "neg")
+ (set_attr "length" "4")])
+
+(define_insn "negdi2_Vcmp"
+ [(set (reg:CC_V CC_REGNUM)
+ (compare:CC_V
+ (sign_extend:TI
+ (neg:DI (match_operand:DI 1 "register_operand" "r")))
+ (neg:TI (sign_extend:TI (match_dup 1)))))
+ (set (match_operand:DI 0 "register_operand" "=r")
+ (neg:DI (match_dup 1)))]
+ ""
+ "rsubl.f\\t%0,%1,0"
+ [(set_attr "type" "neg")
+ (set_attr "length" "4")])
+
+(define_expand "usubv4"
+ [(match_operand:GPI 0 "register_operand")
+ (match_operand:GPI 1 "register_operand")
+ (match_operand:GPI 2 "register_operand")
+ (label_ref (match_operand 3 "" ""))]
+ ""
+ {
+ emit_insn (gen_sub3_cmp (operands[0], operands[1], operands[2]));
+ arc64_gen_unlikely_cbranch (LTU, CCmode, operands[3]);
+ DONE;
+ })
+
+(define_expand "mulvsi4"
+ [(ANY_EXTEND:DI (match_operand:SI 0 "register_operand"))
+ (ANY_EXTEND:DI (match_operand:SI 1 "register_operand"))
+ (ANY_EXTEND:DI (match_operand:SI 2 "register_operand"))
+ (label_ref (match_operand 3 "" ""))]
+ ""
+ {
+ emit_insn (gen_mulsi3_Vcmp (operands[0], operands[1], operands[2]));
+ arc64_gen_unlikely_cbranch (NE, CC_Vmode, operands[3]);
+ DONE;
+ })
+
+(define_insn "mulsi3_Vcmp"
+ [(parallel
+ [(set
+ (reg:CC_V CC_REGNUM)
+ (compare:CC_V
+ (mult:DI
+ (ANY_EXTEND:DI (match_operand:SI 1 "register_operand" "%0, r,r, r"))
+ (ANY_EXTEND:DI (match_operand:SI 2 "arc64_nonmem_operand" "S12S0,U06S0,r,S32S0")))
+ (ANY_EXTEND:DI (mult:SI (match_dup 1) (match_dup 2)))))
+ (set (match_operand:SI 0 "register_operand" "=r, r,r, r")
+ (mult:SI (match_dup 1) (match_dup 2)))])]
+ "register_operand (operands[1], SImode)
+ || register_operand (operands[2], SImode)"
+ "mpy.f\\t%0,%1,%2"
+ [(set_attr "length" "4,4,4,8")
+ (set_attr "type" "mpy")])
+
+;; -------------------------------------------------------------------
+;; Comparison insns
+;; -------------------------------------------------------------------
+
+(define_expand "cmp"
+ [(set (reg:CC CC_REGNUM)
+ (compare:CC (match_operand:GPI 0 "register_operand" "")
+ (match_operand:GPI 1 "nonmemory_operand" "")))]
+ ""
+ {
+ if (!register_operand (operands[1], DImode))
+ operands[1] = force_reg (DImode, operands[1]);
+ })
+
+(define_insn "*cmp"
+ [(set (reg:CC CC_REGNUM)
+ (compare:CC
+ (match_operand:GPI 0 "nonmemory_operand" " q, qh,r, r, r,U06S0,S12S0,S32S0,r")
+ (match_operand:GPI 1 "nonmemory_operand" "qh,S03MV,r,U06S0,S12S0, r, r, r,S32S0")))]
+ "register_operand (operands[0], mode)
+ || register_operand (operands[1], mode)"
+ "@
+ cmp%?\\t%0,%1
+ cmp%?\\t%0,%1
+ cmp%?\\t%0,%1
+ cmp%?\\t%0,%1
+ cmp%?\\t%0,%1
+ rcmp%?\\t%1,%0
+ rcmp%?\\t%1,%0
+ rcmp%?\\t%1,%0
+ cmp%?\\t%0,%1"
+ [(set_attr "type" "cmp")
+ (set_attr "iscompact" "maybe,maybe,no,no,no,no,no,no,no")
+ (set_attr "predicable" "no,no,yes,yes,no,yes,no,no,no")
+ (set_attr "length" "*,*,4,4,4,4,4,8,8")])
+
+
+(define_insn "*cmp_ce"
+ [(cond_exec
+ (match_operator 2 "arc64_comparison_operator"
+ [(match_operand 3 "cc_register" "") (const_int 0)])
+ (set (reg:CC CC_REGNUM)
+ (compare:CC
+ (match_operand:GPI 0 "nonmemory_operand" "r, r,U06S0,S32S0,r")
+ (match_operand:GPI 1 "nonmemory_operand" "r,U06S0, r, r,S32S0"))))]
+ "register_operand (operands[0], mode)
+ || register_operand (operands[1], mode)"
+ "@
+ cmp.%m2\\t%0,%1
+ cmp.%m2\\t%0,%1
+ rcmp.%m2\\t%1,%0
+ rcmp.%m2\\t%1,%0
+ cmp.%m2\\t%0,%1"
+ [(set_attr "type" "cmp")
+ (set_attr "length" "4,4,4,8,8")])
+
+(define_insn "*cmp_zn"
+ [(set (reg:CC_ZN CC_REGNUM)
+ (compare:CC_ZN (match_operand:GPI 0 "register_operand" "q,r")
+ (const_int 0)))]
+ ""
+ "tst%?\\t%0,%0"
+ [(set_attr "type" "tst")
+ (set_attr "iscompact" "maybe,no")
+ (set_attr "length" "*,4")])
+
+(define_insn "*cmp_znce"
+ [(cond_exec
+ (match_operator 2 "arc64_comparison_operator"
+ [(match_operand 1 "cc_register" "") (const_int 0)])
+ (set (reg:CC_ZN CC_REGNUM)
+ (compare:CC_ZN (match_operand:GPI 0 "register_operand" "r")
+ (const_int 0))))]
+ ""
+ "tst.%m2\\t%0,%0"
+ [(set_attr "type" "tst")
+ (set_attr "length" "4")])
+
+(define_insn "fcmp"
+ [(set (reg:CC_FPU CC_REGNUM)
+ (compare:CC_FPU (match_operand:GPF_HF 0 "register_operand" "w")
+ (match_operand:GPF_HF 1 "register_operand" "w")))]
+ "ARC64_HAS_FP_BASE"
+ "fcmp\\t%0,%1"
+ [(set_attr "length" "4")
+ (set_attr "type" "fcmp")])
+
+(define_insn "fcmpf"
+ [(set (reg:CC_FPUE CC_REGNUM)
+ (compare:CC_FPUE (match_operand:GPF_HF 0 "register_operand" "w")
+ (match_operand:GPF_HF 1 "register_operand" "w")))]
+ "ARC64_HAS_FP_BASE"
+ "fcmpf\\t%0,%1"
+ [(set_attr "length" "4")
+ (set_attr "type" "fcmp")])
+
+;; -------------------------------------------------------------------
+;; Store-flag and conditional select insns
+;; -------------------------------------------------------------------
+
+(define_expand "cstore4"
+ [(set (match_operand:SI 0 "register_operand")
+ (match_operator:SI 1 "arc64_comparison_operator"
+ [(match_operand:GPI 2 "nonmemory_operand")
+ (match_operand:GPI 3 "nonmemory_operand")]))]
+ ""
+ {
+ if (!register_operand (operands[2], mode))
+ operands[2] = force_reg (mode, operands[2]);
+ if (!arc64_nonmem_operand (operands[3], mode))
+ operands[3] = force_reg (mode, operands[3]);
+ })
+
+(define_expand "cstore4"
+ [(set (match_operand:SI 0 "register_operand")
+ (match_operator:SI 1 "arc64_comparison_operator"
+ [(match_operand:GPF_HF 2 "register_operand")
+ (match_operand:GPF_HF 3 "register_operand")]))]
+ "ARC64_HAS_FP_BASE"
+ "
+ operands[2] = arc64_gen_compare_reg (GET_CODE (operands[1]), operands[2],
+ operands[3]);
+ operands[3] = const0_rtx;
+ "
+)
+
+(define_insn_and_split "*scc_insn"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (match_operator:SI 1 "arc64_comparison_operator"
+ [(reg CC_REGNUM) (const_int 0)]))]
+ ""
+ "#"
+ "reload_completed"
+ [(set (match_dup 0) (const_int 0))
+ (cond_exec
+ (match_dup 1)
+ (set (match_dup 0) (const_int 1)))]
+{
+ operands[1]
+ = gen_rtx_fmt_ee (GET_CODE (operands[1]),
+ VOIDmode,
+ XEXP (operands[1], 0), XEXP (operands[1], 1));
+}
+ [(set_attr "type" "movecc")])
+
+;; SETcc instructions
+(define_expand "set"
+ [(set (match_operand:SI 0 "register_operand")
+ (ALLCC:SI
+ (match_operand:GPI 1 "register_operand")
+ (match_operand:GPI 2 "nonmemory_operand")))]
+ ""
+ {
+ if (!arc64_nonmem_operand (operands[2], mode))
+ operands[2] = force_reg (mode, operands[2]);
+ })
+
+(define_insn "*set"
+ [(set (match_operand:SI 0 "register_operand" "=r, r, r,r")
+ (SETCC:SI
+ (match_operand:GPI 1 "register_operand" "r, r, 0,r")
+ (match_operand:GPI 2 "arc64_nonmem_operand" "r,U06S0,S12S0,n")))]
+ ""
+ "set%?\\t%0,%1,%2"
+ [(set_attr "length" "4,4,4,8")
+ (set_attr "type" "setcc")])
+
+(define_insn "*set_cmp"
+ [(set (reg:CC CC_REGNUM)
+ (compare:CC
+ (match_operand:GPI 1 "register_operand" "r, r, 0,r")
+ (match_operand:GPI 2 "arc64_nonmem_operand" "r,U06S0,S12S0,n")))
+ (set (match_operand:SI 0 "register_operand" "=r, r, r,r")
+ (SETCC:SI (match_dup 1) (match_dup 2)))]
+ ""
+ "set.f\\t%0,%1,%2"
+ [(set_attr "length" "4,4,4,8")
+ (set_attr "type" "setcc")])
+
+;; Special cases of SETCC
+(define_insn_and_split "*sethi"
+ [(set (match_operand:SI 0 "register_operand" "=r, r,r")
+ (gtu:SI
+ (match_operand:GPI 1 "register_operand" "r, r,r")
+ (match_operand:GPI 2 "arc64_nonmem_operand" "r,U06M1,n")))]
+ ""
+ "setlo%?\\t%0,%2,%1"
+ "reload_completed
+ && CONST_INT_P (operands[2])
+ && satisfies_constraint_U06M1 (operands[2])"
+ [(const_int 0)]
+ "{
+ /* sethi a,b,u6 => seths a,b,u6 + 1. */
+ operands[2] = GEN_INT (INTVAL (operands[2]) + 1);
+ emit_insn (gen_setgeu (operands[0], operands[1], operands[2]));
+ DONE;
+ }"
+ [(set_attr "length" "4,4,8")
+ (set_attr "type" "setcc")])
+
+(define_insn_and_split "*setls"
+ [(set (match_operand:SI 0 "register_operand" "=r, r,r")
+ (leu:SI
+ (match_operand:GPI 1 "register_operand" "r, r,r")
+ (match_operand:GPI 2 "arc64_nonmem_operand" "r,U06M1,n")))]
+ ""
+ "seths%?\\t%0,%2,%1"
+ "reload_completed
+ && satisfies_constraint_U06M1 (operands[2])"
+ [(const_int 0)]
+ "{
+ /* setls a,b,u6 => setlo a,b,u6 + 1. */
+ operands[2] = GEN_INT (INTVAL (operands[2]) + 1);
+ emit_insn (gen_setltu (operands[0], operands[1], operands[2]));
+ DONE;
+ }"
+ [(set_attr "length" "4,4,8")
+ (set_attr "type" "setcc")])
+
+;; MOVCC patterns
+(define_expand "movcc"
+ [(set (match_operand:ALLI 0 "register_operand")
+ (if_then_else:ALLI (match_operand 1 "arc64_comparison_operator")
+ (match_operand:ALLI 2 "register_operand")
+ (match_operand:ALLI 3 "register_operand")))]
+ ""
+ {
+ rtx tmp;
+ enum rtx_code code = GET_CODE (operands[1]);
+
+ if (code == UNEQ || code == LTGT)
+ FAIL;
+
+ tmp = arc64_gen_compare_reg (code, XEXP (operands[1], 0),
+ XEXP (operands[1], 1));
+ operands[1] = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
+ })
+
+(define_expand "movcc"
+ [(set (match_operand:GPF_HF 0 "register_operand")
+ (if_then_else:GPF_HF (match_operand 1 "arc64_comparison_operator")
+ (match_operand:GPF_HF 2 "register_operand")
+ (match_operand:GPF_HF 3 "register_operand")))]
+ ""
+ {
+ rtx tmp;
+ enum rtx_code code = GET_CODE (operands[1]);
+
+ if (code == UNEQ || code == LTGT)
+ FAIL;
+
+ tmp = arc64_gen_compare_reg (code, XEXP (operands[1], 0),
+ XEXP (operands[1], 1));
+ operands[1] = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
+ })
+
+(define_insn "*cmov"
+ [(set (match_operand:ALLI 0 "register_operand" "=r,r,r,r")
+ (if_then_else:ALLI
+ (match_operator 3 "arc64_comparison_operator"
+ [(match_operand 4 "cc_register" "") (const_int 0)])
+ (match_operand:ALLI 1 "nonmemory_operand" "0,0,rU06S0,S32S0")
+ (match_operand:ALLI 2 "nonmemory_operand" "rU06S0,S32S0,0,0")
+ ))]
+ "register_operand (operands[0], mode)
+ || register_operand (operands[1], mode)"
+ "@
+ mov.%M3\\t%0,%2
+ mov.%M3\\t%0,%2
+ mov.%m3\\t%0,%1
+ mov.%m3\\t%0,%1"
+ [(set_attr "length" "4,8,4,8")
+ (set_attr "type" "move")])
+
+(define_insn "*cmov"
+ [(set (match_operand:HF_SF 0 "register_operand" "=w,*r,*r,w,*r,*r")
+ (if_then_else:HF_SF
+ (match_operator 3 "arc64_comparison_operator"
+ [(match_operand 4 "cc_register" "") (const_int 0)])
+ (match_operand:HF_SF 1 "nonmemory_operand" "w,*r,*E,0, 0, 0")
+ (match_operand:HF_SF 2 "nonmemory_operand" "0, 0, 0,w,*r,*E")))]
+ "register_operand (operands[0], mode)
+ || register_operand (operands[1], mode)"
+ "@
+ fmov.%m3\\t%0,%1
+ mov.%m3\\t%0,%1
+ mov.%m3\\t%0,%1
+ fmov.%M3\\t%0,%2
+ mov.%M3\\t%0,%2
+ mov.%M3\\t%0,%2"
+ [(set_attr "length" "4,4,8,4,4,8")
+ (set_attr "type" "fmov,move,move,fmov,move,move")])
+
+(define_insn "*cmovdf"
+ [(set (match_operand:DF 0 "register_operand" "=w,*r,w,*r")
+ (if_then_else:DF
+ (match_operator 3 "arc64_comparison_operator"
+ [(match_operand 4 "cc_register" "") (const_int 0)])
+ (match_operand:DF 1 "register_operand" "w,*r,0, 0")
+ (match_operand:DF 2 "register_operand" "0, 0,w,*r")))]
+ "ARC64_HAS_FPUD"
+ "@
+ fdmov.%m3\\t%0,%1
+ movl.%m3\\t%0,%1
+ fdmov.%M3\\t%0,%2
+ movl.%M3\\t%0,%2"
+ [(set_attr "length" "4")
+ (set_attr "type" "fmov,move,fmov,move")])
+
+;; -------------------------------------------------------------------
+;; Logical operations
+;; -------------------------------------------------------------------
+
+(define_expand "3"
+ [(set (match_operand:GPI 0 "register_operand")
+ (LOGIC:GPI (match_operand:GPI 1 "register_operand")
+ (match_operand:GPI 2 "nonmemory_operand")))]
+ ""
+ {
+ if (!arc64_nonmem_operand (operands[2], mode))
+ operands[2] = force_reg (mode, operands[2]);
+ })
+
+(define_expand "2"
+ [(set (match_operand:GPI 0 "register_operand")
+ (NOT_ABS:GPI (match_operand:GPI 1 "register_operand")))]
+ ""
+ )
+
+(define_insn "negsi2"
+ [(set (match_operand:SI 0 "register_operand" "=q,q,r,r")
+ (neg:SI (match_operand:SI 1 "register_operand" "0,q,0,r")))]
+ ""
+ "neg%?\\t%0,%1"
+ [(set_attr "type" "neg")
+ (set_attr "iscompact" "maybe,yes,no,no")
+ (set_attr "predicable" "yes,no,yes,no")
+ (set_attr "length" "*,2,4,4")])
+
+(define_insn "*2"
+ [(set (match_operand:GPI 0 "register_operand" "=q,r")
+ (NOT_ABS:GPI (match_operand:GPI 1 "register_operand" "q,r")))]
+ ""
+ "%?\\t%0,%1"
+ [(set_attr "type" "")
+ (set_attr "iscompact" "maybe,no")
+ (set_attr "length" "*,4")])
+
+(define_insn "*3"
+ [(set (match_operand:GPI 0 "register_operand" "=r, r, r,r")
+ (MINMAX:GPI (match_operand:GPI 1 "register_operand" "%0, 0, r,r")
+ (match_operand:GPI 2 "nonmemory_operand" "rU06S0,S12S0,rU06S0,S32S0")))]
+ ""
+ "%?\\t%0,%1,%2"
+ [(set_attr "type" "")
+ (set_attr "length" "4,4,4,8")
+ (set_attr "predicable" "yes,no,no,no")]
+)
+
+;; Zero-extend pattern
+(define_insn "*si_zextend"
+ [(set (match_operand:DI 0 "register_operand" "=q,r")
+ (zero_extend:DI
+ (LOP2EX:SI (match_operand:SI 1 "register_operand" "q,r"))))]
+ "TARGET_64BIT"
+ "%?\\t%0,%1"
+ [(set_attr "type" "")
+ (set_attr "iscompact" "yes,no")
+ (set_attr "length" "*,4")])
+
+(define_insn "*3_zextend"
+ [(set (match_operand:DI 0 "register_operand" "=r, r, r,r")
+ (zero_extend:DI
+ (MINMAX:SI
+ (match_operand:SI 1 "register_operand" "%0, 0, r,r")
+ (match_operand:SI 2 "nonmemory_operand" "rU06S0,S12S0,rU06S0,S32S0"))))]
+ "TARGET_64BIT"
+ "%?\\t%0,%1,%2"
+ [(set_attr "type" "max")
+ (set_attr "length" "4,4,4,8")
+ (set_attr "predicable" "yes,no,no,no")])
+
+;; NEGCC and NOTCC patterns used by ifcvt.
+(define_expand "cc"
+ [(set (match_operand:GPI 0 "register_operand")
+ (if_then_else:GPI (match_operand 1 "arc64_comparison_operator")
+ (NEG_NOT:GPI (match_operand:GPI 2 "register_operand"))
+ (match_operand:GPI 3 "register_operand")))]
+ ""
+ {
+ rtx tmp;
+ enum rtx_code code = GET_CODE (operands[1]);
+
+ if (code == UNEQ || code == LTGT)
+ FAIL;
+
+ tmp = arc64_gen_compare_reg (code, XEXP (operands[1], 0),
+ XEXP (operands[1], 1));
+ operands[1] = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
+ })
+
+(define_insn "*cneg"
+ [(set (match_operand:GPI 0 "register_operand" "=r,r,r")
+ (if_then_else:GPI
+ (match_operator 3 "arc64_comparison_operator"
+ [(match_operand 4 "cc_register" "") (const_int 0)])
+ (neg:GPI (match_operand:GPI 1 "register_operand" "0,0,0"))
+ (match_operand:GPI 2 "nonmemory_operand" "0,rU06S0,S32S0")))]
+ ""
+ "@
+ rsub.%m3\\t%0,%1,0
+ rsub.%m3\\t%0,%1,0\\n\\tmov.%M3\\t%0,%2
+ rsub.%m3\\t%0,%1,0\\n\\tmov.%M3\\t%0,%2"
+ [(set_attr "length" "4,8,12")
+ (set_attr "type" "neg")])
+
+(define_insn "*cnot"
+ [(set (match_operand:GPI 0 "register_operand" "=r,r,r")
+ (if_then_else:GPI
+ (match_operator 3 "arc64_comparison_operator"
+ [(match_operand 4 "cc_register" "") (const_int 0)])
+ (not:GPI (match_operand:GPI 1 "register_operand" "0,0,0"))
+ (match_operand:GPI 2 "register_operand" "0,rU06S0,S32S0")))]
+ ""
+ "@
+ xor.%m3\\t%0,%1,-1
+ xor.%m3\\t%0,%1,-1\\n\\tmov.%M3\\t%0,%2
+ xor.%m3\\t%0,%1,-1\\n\\tmov.%M3\\t%0,%2"
+ [(set_attr "length" "8,12,16")
+ (set_attr "type" "xor")])
+
+;; -------------------------------------------------------------------
+;; Shifts
+;; -------------------------------------------------------------------
+
+;; FIXME! check if we get better code if we use QI for op 2.
+(define_expand "3"
+ [(set (match_operand:GPI 0 "register_operand")
+ (ASHIFT:GPI (match_operand:GPI 1 "register_operand")
+ (match_operand:GPI 2 "nonmemory_operand")))]
+ "")
+
+(define_expand "rotrsi3"
+ [(set (match_operand:SI 0 "register_operand")
+ (rotatert:SI (match_operand:SI 1 "nonmemory_operand")
+ (match_operand:SI 2 "nonmemory_operand")))]
+ "")
+
+(define_insn "*rotrsi3"
+ [(set (match_operand:SI 0 "register_operand" "=r, r, r, r,r")
+ (rotatert:SI (match_operand:SI 1 "nonmemory_operand" "r, r, r, r,i")
+ (match_operand:SI 2 "nonmemory_operand" "U0001,U0008,U0016,rU06S0,r")))]
+ ;; FIXME! this needs BARREL_SHIFTER option
+ "register_operand (operands[0], SImode)
+ || register_operand (operands[1], SImode)"
+ "@
+ ror\\t%0,%1
+ ror8\\t%0,%1
+ swap\\t%0,%1
+ ror\\t%0,%1,%2
+ ror\\t%0,%1,%2"
+ [(set_attr "type" "ror,ror,swap,ror,ror")
+ (set_attr "length" "4,4,4,4,8")])
+
+(define_expand "rotlsi3"
+ [(set (match_operand:SI 0 "register_operand")
+ (rotatert:SI (match_operand:SI 1 "nonmemory_operand")
+ (match_operand:SI 2 "nonmemory_operand")))]
+ ""
+ "
+ if (CONST_INT_P (operands[2])
+ && (INTVAL (operands[2]) == 1))
+ {
+ gen_rotl1 (operands[0], operands[1]);
+ DONE;
+ }
+
+ if (CONST_INT_P (operands[2])
+ && (INTVAL (operands[2]) == 8))
+ {
+ gen_rotl8 (operands[0], operands[1]);
+ DONE;
+ }
+
+ if (CONST_INT_P (operands[2]))
+ operands[2] = GEN_INT ((32 - INTVAL (operands[2])) % 32);
+ else
+ {
+ rtx reg = gen_reg_rtx (SImode);
+ emit_insn (gen_subsi3 (reg, GEN_INT (32), operands[2]));
+ operands[2] = reg;
+ }
+ ")
+
+(define_insn "rotl1"
+ [(set (match_operand:SI 0 "register_operand" "= r,r")
+ (rotate:SI (match_operand:SI 1 "nonmemory_operand" "rU06S0,i")
+ (const_int 1)))]
+ ""
+ "rol%?\\t%0,%1"
+ [(set_attr "type" "rol")
+ (set_attr "predicable" "no")
+ (set_attr "length" "4,8")])
+
+(define_insn "rotl8"
+ [(set (match_operand:SI 0 "register_operand" "= r,r")
+ (rotate:SI (match_operand:SI 1 "nonmemory_operand" "rU06S0,i")
+ (const_int 8)))]
+ ""
+ "rol8%?\\t%0,%1"
+ [(set_attr "type" "rol")
+ (set_attr "predicable" "no")
+ (set_attr "length" "4,8")])
+
+
+;; -------------------------------------------------------------------
+;; Bitfields
+;; -------------------------------------------------------------------
+
+(define_expand "extzv"
+ [(set (match_operand:GPI 0 "register_operand" "")
+ (zero_extract:GPI (match_operand:GPI 1 "register_operand" "")
+ (match_operand 2 "const_int_operand" "")
+ (match_operand 3 "const_int_operand" "")))]
+ "")
+
+(define_insn "*extzvsi"
+ [(set (match_operand:SI 0 "register_operand" "=r,r")
+ (zero_extract:SI (match_operand:SI 1 "register_operand" "0,r")
+ (match_operand 2 "const_int_operand" "n,n")
+ (match_operand 3 "const_int_operand" "n,n")))]
+ ""
+ {
+ int assemble_op2 = (((INTVAL (operands[2]) - 1) & 0x1f) << 5)
+ | (INTVAL (operands[3]) & 0x1f);
+ operands[2] = GEN_INT (assemble_op2);
+ return "xbfu%?\\t%0,%1,%2";
+ }
+ [(set_attr "type" "xbfu")
+ (set_attr "iscompact" "no")
+ (set_attr "length" "4,8")
+ (set_attr "predicable" "no")])
+
+(define_insn "*zextzvsi"
+ [(set (match_operand:DI 0 "register_operand" "=r,r")
+ (zero_extract:DI (match_operand:SI 1 "register_operand" "0,r")
+ (match_operand 2 "const_int_operand" "n,n")
+ (match_operand 3 "const_int_operand" "n,n")))]
+ ""
+ {
+ int assemble_op2 = (((INTVAL (operands[2]) - 1) & 0x1f) << 5)
+ | (INTVAL (operands[3]) & 0x1f);
+ operands[2] = GEN_INT (assemble_op2);
+ return "xbfu%?\\t%0,%1,%2";
+ }
+ [(set_attr "type" "xbfu")
+ (set_attr "iscompact" "no")
+ (set_attr "length" "4,8")
+ (set_attr "predicable" "no")])
+
+;;FIXME! compute length based on the input args.
+(define_insn "*extzvdi"
+ [(set (match_operand:DI 0 "register_operand" "=r,r")
+ (zero_extract:DI (match_operand:DI 1 "register_operand" "0,r")
+ (match_operand 2 "const_int_operand" "n,n")
+ (match_operand 3 "const_int_operand" "n,n")))]
+ ""
+ {
+ int assemble_op2 = (((INTVAL (operands[2]) - 1) & 0x3f) << 6)
+ | (INTVAL (operands[3]) & 0x3f);
+ operands[2] = GEN_INT (assemble_op2);
+ return "xbful%?\\t%0,%1,%2";
+ }
+ [(set_attr "type" "xbfu")
+ (set_attr "iscompact" "no")
+ (set_attr "length" "8,8")
+ (set_attr "predicable" "no")])
+
+(define_insn "*extzvsi_cmp0"
+ [(set (reg:CC_ZN CC_REGNUM)
+ (compare:CC_ZN
+ (zero_extract:SI
+ (match_operand:SI 1 "register_operand" "0,r")
+ (match_operand 2 "const_int_operand" "n,n")
+ (match_operand 3 "const_int_operand" "n,n"))
+ (const_int 0)))
+ (set (match_operand:SI 0 "register_operand" "=r,r")
+ (zero_extract:SI (match_dup 1)
+ (match_dup 2)
+ (match_dup 3)))]
+ ""
+ {
+ int assemble_op2 = (((INTVAL (operands[2]) - 1) & 0x1f) << 5)
+ | (INTVAL (operands[3]) & 0x1f);
+ operands[2] = GEN_INT (assemble_op2);
+ return "xbfu.f\\t%0,%1,%2";
+ }
+ [(set_attr "type" "xbfu")
+ (set_attr "length" "4,8")])
+
+(define_insn "*extzvsi_cmp0_noout"
+ [(set (reg:CC_ZN CC_REGNUM)
+ (compare:CC_ZN
+ (zero_extract:SI
+ (match_operand:SI 0 "register_operand" "r")
+ (match_operand 1 "const_int_operand" "n")
+ (match_operand 2 "const_int_operand" "n"))
+ (const_int 0)))]
+ ""
+ {
+ int assemble_op2 = (((INTVAL (operands[1]) - 1) & 0x1f) << 5)
+ | (INTVAL (operands[2]) & 0x1f);
+ operands[1] = GEN_INT (assemble_op2);
+ return "xbfu.f\\t0,%0,%1";
+ }
+ [(set_attr "type" "xbfu")
+ (set_attr "length" "8")])
+
+(define_insn "bswap2"
+ [(set (match_operand:GPI 0 "register_operand" "=r,r")
+ (bswap:GPI
+ (match_operand:GPI 1 "nonmemory_operand" "rU06S0,S32S0")))]
+ ""
+ "swape\\t%0,%1"
+ [(set_attr "length" "4,8")
+ (set_attr "type" "swap")])
+
+;; -------------------------------------------------------------------
+;; Bitscan
+;; -------------------------------------------------------------------
+
+(define_insn "clrsb2"
+ [(set (match_operand:EPI 0 "register_operand" "=r")
+ (clrsb:EPI (match_operand:EPI 1 "register_operand" "r")))]
+ "TARGET_BITSCAN"
+ "norm\\t%0,%1"
+ [(set_attr "length" "4")
+ (set_attr "type" "norm")])
+
+(define_expand "clz2"
+ [(match_operand:GPI 0 "register_operand")
+ (match_operand:GPI 1 "register_operand")]
+ "TARGET_BITSCAN"
+ {
+ rtx tmp = gen_reg_rtx (mode);
+ unsigned int size = GET_MODE_SIZE (mode) * BITS_PER_UNIT - 1;
+ emit_insn (gen_arc64_fls2 (tmp, operands[1]));
+ emit_insn (gen_sub3 (operands[0], GEN_INT (size), tmp));
+ DONE;
+ })
+
+(define_insn "ctz2"
+ [(set (match_operand:GPI 0 "register_operand" "=r")
+ (ctz:GPI (match_operand:GPI 1 "register_operand" "r")))]
+ "TARGET_BITSCAN"
+ "ffs\\t%0,%1"
+ [(set_attr "length" "4")
+ (set_attr "type" "ffs")])
+
+(define_insn "arc64_fls2"
+ [(set (match_operand:GPI 0 "register_operand" "=r")
+ (unspec:GPI [(match_operand:GPI 1 "register_operand" "r")]
+ ARC64_UNSPEC_FLS))]
+ "TARGET_BITSCAN"
+ "fls\\t%0,%1"
+ [(set_attr "length" "4")
+ (set_attr "type" "fls")])
+
+;; -------------------------------------------------------------------
+;; Floating-point intrinsics
+;; -------------------------------------------------------------------
+
+(define_insn "round2"
+ [(set (match_operand:GPF 0 "register_operand" "=w")
+ (unspec:GPF [(match_operand:GPF 1 "register_operand" "w")]
+ ARC64_UNSPEC_ROUND))]
+ "ARC64_HAS_FP_BASE"
+ "frnd\\t%0,%1"
+ [(set_attr "length" "4")
+ (set_attr "type" "frnd")])
+
+(define_insn "btrunc2"
+ [(set (match_operand:GPF 0 "register_operand" "=w")
+ (unspec:GPF [(match_operand:GPF 1 "register_operand" "w")]
+ ARC64_UNSPEC_BTRUNC))]
+ "ARC64_HAS_FP_BASE"
+ "frnd_rz\\t%0,%1"
+ [(set_attr "length" "4")
+ (set_attr "type" "frnd")])
+
+;; -------------------------------------------------------------------
+;; Floating-point conversions
+;; -------------------------------------------------------------------
+
+(define_insn "extendsfdf2"
+ [(set (match_operand:DF 0 "register_operand" "=w")
+ (float_extend:DF (match_operand:SF 1 "register_operand" "w")))]
+ "ARC64_HAS_FPUD"
+ "fs2d\\t%0,%1"
+ [(set_attr "length" "4")
+ (set_attr "type" "fs2d")])
+
+(define_insn "extendhfsf2"
+ [(set (match_operand:SF 0 "register_operand" "=w")
+ (float_extend:SF (match_operand:HF 1 "register_operand" "w")))]
+ "ARC64_HAS_FPUH"
+ "fh2s\\t%0,%1"
+ [(set_attr "length" "4")
+ (set_attr "type" "fh2s")])
+
+(define_expand "extendhfdf2"
+ [(match_operand:DF 0 "register_operand")
+ (match_operand:HF 1 "register_operand")]
+ "ARC64_HAS_FPUS"
+ {
+ rtx tmp = gen_reg_rtx (SFmode);
+ emit_insn (gen_extendhfsf2 (tmp, operands[1]));
+ if (ARC64_HAS_FPUD)
+ emit_insn (gen_extendsfdf2 (operands[0], tmp));
+ else
+ {
+ rtx ret;
+ ret = emit_library_call_value (gen_rtx_SYMBOL_REF (Pmode,
+ "__extendsfdf2"),
+ operands[0], LCT_NORMAL, DFmode,
+ tmp, SFmode);
+ if (ret != operands[0])
+ emit_move_insn (operands[0], ret);
+ }
+ DONE;
+ })
+
+(define_insn "truncdfsf2"
+ [(set (match_operand:SF 0 "register_operand" "=w")
+ (float_truncate:SF (match_operand:DF 1 "register_operand" "w")))]
+ "ARC64_HAS_FPUD"
+ "fd2s\\t%0,%1"
+ [(set_attr "length" "4")
+ (set_attr "type" "fd2s")])
+
+(define_insn "truncsfhf2"
+ [(set (match_operand:HF 0 "register_operand" "=w")
+ (float_truncate:HF (match_operand:SF 1 "register_operand" "w")))]
+ "ARC64_HAS_FPUH"
+ "fs2h\\t%0,%1"
+ [(set_attr "length" "4")
+ (set_attr "type" "fs2h")])
+
+(define_expand "truncdfhf2"
+ [(match_operand:HF 0 "register_operand")
+ (match_operand:DF 1 "register_operand")]
+ "ARC64_HAS_FPUS"
+ {
+ rtx tmp = gen_reg_rtx (SFmode);
+ if (ARC64_HAS_FPUD)
+ emit_insn (gen_truncdfsf2 (tmp, operands[1]));
+ else
+ {
+ rtx ret;
+ ret = emit_library_call_value (gen_rtx_SYMBOL_REF (Pmode,
+ "__truncdfsf2"),
+ tmp, LCT_NORMAL, SFmode,
+ operands[1], DFmode);
+ if (ret != tmp)
+ emit_move_insn (tmp, ret);
+ }
+ emit_insn (gen_truncsfhf2 (operands[0], tmp));
+ DONE;
+ })
+
+;; SI->SF SI->DF DI->SF DI->DF
+;; FINT2S FINT2D FL2S FL2D
+(define_insn "float