aboutsummaryrefslogtreecommitdiffstats
path: root/gcc-4.4.3/gcc/config
diff options
context:
space:
mode:
authorJing Yu <jingyu@google.com>2011-01-30 22:18:29 -0800
committerJing Yu <jingyu@google.com>2011-01-30 22:18:29 -0800
commit4a66e756636cb8364582ea503abd10d76f5b4aa3 (patch)
tree9660204ec085888a0601a6460c967b204a63d5f3 /gcc-4.4.3/gcc/config
parentb6be42e837844cce5283f42fcfac31e6d66a277d (diff)
downloadtoolchain_gcc-4a66e756636cb8364582ea503abd10d76f5b4aa3.tar.gz
toolchain_gcc-4a66e756636cb8364582ea503abd10d76f5b4aa3.tar.bz2
toolchain_gcc-4a66e756636cb8364582ea503abd10d76f5b4aa3.zip
Upgrade gcc-4.4.3 for Android toolchain.
- Backport upstream patches to support arm hardfp. - Backport gcc-4.5 patches to support -march=atom. Now it is able to build atom toolchain with glibc from this branch - Develop a bunch of optimizations - Fix a few arm dejagnu failures To-do list: - Support Android/atom - Fix ia32 bootstrap failure Change-Id: I5e10dcd21620d4d8ca984d1d1707a76067e61691
Diffstat (limited to 'gcc-4.4.3/gcc/config')
-rw-r--r--gcc-4.4.3/gcc/config/arm/arm-protos.h4
-rw-r--r--gcc-4.4.3/gcc/config/arm/arm.c1197
-rw-r--r--gcc-4.4.3/gcc/config/arm/arm.h99
-rw-r--r--gcc-4.4.3/gcc/config/arm/arm.md22
-rw-r--r--gcc-4.4.3/gcc/config/arm/bpabi.h14
-rw-r--r--gcc-4.4.3/gcc/config/arm/constraints.md21
-rw-r--r--gcc-4.4.3/gcc/config/arm/linux-eabi.h1
-rw-r--r--gcc-4.4.3/gcc/config/arm/t-arm-elf7
-rw-r--r--gcc-4.4.3/gcc/config/arm/thumb2.md57
-rw-r--r--gcc-4.4.3/gcc/config/i386/atom.md796
-rw-r--r--gcc-4.4.3/gcc/config/i386/cpuid.h2
-rw-r--r--gcc-4.4.3/gcc/config/i386/cygming.h4
-rw-r--r--gcc-4.4.3/gcc/config/i386/driver-i386.c23
-rw-r--r--gcc-4.4.3/gcc/config/i386/i386-c.c9
-rw-r--r--gcc-4.4.3/gcc/config/i386/i386-protos.h8
-rw-r--r--gcc-4.4.3/gcc/config/i386/i386.c912
-rw-r--r--gcc-4.4.3/gcc/config/i386/i386.h22
-rw-r--r--gcc-4.4.3/gcc/config/i386/i386.md219
-rw-r--r--gcc-4.4.3/gcc/config/i386/i386.opt12
-rw-r--r--gcc-4.4.3/gcc/config/i386/linux-unwind.h2
-rw-r--r--gcc-4.4.3/gcc/config/i386/linux.h2
-rw-r--r--gcc-4.4.3/gcc/config/i386/linux64.h10
-rw-r--r--gcc-4.4.3/gcc/config/i386/lwpintrin.h100
-rw-r--r--gcc-4.4.3/gcc/config/i386/mingw32.h2
-rw-r--r--gcc-4.4.3/gcc/config/i386/ppro.md8
-rw-r--r--gcc-4.4.3/gcc/config/i386/sse.md62
-rw-r--r--gcc-4.4.3/gcc/config/i386/ssemath.h25
-rw-r--r--gcc-4.4.3/gcc/config/i386/x86intrin.h4
-rw-r--r--gcc-4.4.3/gcc/config/linux-android.h6
-rw-r--r--gcc-4.4.3/gcc/config/linux.h8
-rw-r--r--gcc-4.4.3/gcc/config/mips/linux64.h4
-rw-r--r--gcc-4.4.3/gcc/config/rs6000/linux64.h2
-rw-r--r--gcc-4.4.3/gcc/config/rs6000/sysv4.h7
-rw-r--r--gcc-4.4.3/gcc/config/sparc/sparc.c10
34 files changed, 3345 insertions, 336 deletions
diff --git a/gcc-4.4.3/gcc/config/arm/arm-protos.h b/gcc-4.4.3/gcc/config/arm/arm-protos.h
index 8d03141e4..ee0a34393 100644
--- a/gcc-4.4.3/gcc/config/arm/arm-protos.h
+++ b/gcc-4.4.3/gcc/config/arm/arm-protos.h
@@ -154,13 +154,15 @@ extern bool arm_output_addr_const_extra (FILE *, rtx);
#if defined TREE_CODE
extern rtx arm_function_arg (CUMULATIVE_ARGS *, enum machine_mode, tree, int);
+extern void arm_function_arg_advance (CUMULATIVE_ARGS *, enum machine_mode,
+ tree, bool);
extern void arm_init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree);
extern bool arm_pad_arg_upward (enum machine_mode, const_tree);
extern bool arm_pad_reg_upward (enum machine_mode, tree, int);
extern bool arm_needs_doubleword_align (enum machine_mode, tree);
-extern rtx arm_function_value(const_tree, const_tree);
#endif
extern int arm_apply_result_size (void);
+extern rtx aapcs_libcall_value (enum machine_mode);
#endif /* RTX_CODE */
diff --git a/gcc-4.4.3/gcc/config/arm/arm.c b/gcc-4.4.3/gcc/config/arm/arm.c
index 2e2317b50..80bd3af3a 100644
--- a/gcc-4.4.3/gcc/config/arm/arm.c
+++ b/gcc-4.4.3/gcc/config/arm/arm.c
@@ -43,6 +43,7 @@
#include "optabs.h"
#include "toplev.h"
#include "recog.h"
+#include "cgraph.h"
#include "ggc.h"
#include "except.h"
#include "c-pragma.h"
@@ -54,6 +55,7 @@
#include "langhooks.h"
#include "df.h"
#include "libfuncs.h"
+#include "params.h"
/* Forward definitions of types. */
typedef struct minipool_node Mnode;
@@ -111,6 +113,7 @@ static unsigned long arm_compute_save_reg_mask (void);
static unsigned long arm_isr_value (tree);
static unsigned long arm_compute_func_type (void);
static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
+static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
@@ -124,6 +127,10 @@ static int arm_adjust_cost (rtx, rtx, rtx, int);
static int count_insns_for_constant (HOST_WIDE_INT, int);
static int arm_get_strip_length (int);
static bool arm_function_ok_for_sibcall (tree, tree);
+static bool arm_return_in_memory (const_tree, const_tree);
+static rtx arm_function_value (const_tree, const_tree, bool);
+static rtx arm_libcall_value (enum machine_mode, rtx);
+
static void arm_internal_label (FILE *, const char *, unsigned long);
static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
tree);
@@ -150,6 +157,9 @@ static void emit_constant_insn (rtx cond, rtx pattern);
static rtx emit_set_insn (rtx, rtx);
static int arm_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
tree, bool);
+static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
+ const_tree);
+static int aapcs_select_return_coproc (const_tree, const_tree);
#ifdef OBJECT_FORMAT_ELF
static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
@@ -203,8 +213,8 @@ static rtx arm_pic_static_addr (rtx orig, rtx reg);
static rtx arm_get_pic_reg (void);
static void arm_clear_pic_reg (void);
static bool arm_can_simplify_got_access (int, int);
-static rtx arm_loaded_global_var (rtx, rtx *);
-static void arm_load_global_address (rtx, rtx, rtx, rtx);
+static rtx arm_loaded_global_var (rtx, rtx *, rtx *);
+static void arm_load_global_address (rtx, rtx, rtx, rtx, rtx);
/* Initialize the GCC target structure. */
@@ -264,6 +274,12 @@ static void arm_load_global_address (rtx, rtx, rtx, rtx);
#undef TARGET_FUNCTION_OK_FOR_SIBCALL
#define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
+#undef TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE arm_function_value
+
+#undef TARGET_LIBCALL_VALUE
+#define TARGET_LIBCALL_VALUE arm_libcall_value
+
#undef TARGET_ASM_OUTPUT_MI_THUNK
#define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
@@ -621,6 +637,8 @@ static int after_arm_reorg = 0;
/* The maximum number of insns to be used when loading a constant. */
static int arm_constant_limit = 3;
+static enum arm_pcs arm_pcs_default;
+
/* For an explanation of these variables, see final_prescan_insn below. */
int arm_ccfsm_state;
/* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
@@ -1536,9 +1554,6 @@ arm_override_options (void)
else
arm_float_abi = TARGET_DEFAULT_FLOAT_ABI;
- if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
- sorry ("-mfloat-abi=hard and VFP");
-
/* FPA and iWMMXt are incompatible because the insn encodings overlap.
VFP and iWMMXt can theoretically coexist, but it's unlikely such silicon
will ever exist. GCC makes no attempt to support this combination. */
@@ -1553,6 +1568,28 @@ arm_override_options (void)
if (TARGET_SOFT_FLOAT)
arm_fpu_arch = FPUTYPE_NONE;
+ if (TARGET_AAPCS_BASED)
+ {
+ if (arm_abi == ARM_ABI_IWMMXT)
+ arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
+ else if (arm_float_abi == ARM_FLOAT_ABI_HARD
+ && TARGET_HARD_FLOAT
+ && TARGET_VFP)
+ arm_pcs_default = ARM_PCS_AAPCS_VFP;
+ else
+ arm_pcs_default = ARM_PCS_AAPCS;
+ }
+ else
+ {
+ if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
+ sorry ("-mfloat-abi=hard and VFP");
+
+ if (arm_abi == ARM_ABI_APCS)
+ arm_pcs_default = ARM_PCS_APCS;
+ else
+ arm_pcs_default = ARM_PCS_ATPCS;
+ }
+
/* For arm2/3 there is no need to do any scheduling if there is only
a floating point emulator, or we are doing software floating-point. */
if ((TARGET_SOFT_FLOAT
@@ -1682,6 +1719,16 @@ arm_override_options (void)
max_insns_skipped = 3;
}
+ /* Hot/Cold partitioning is not currently supported, since we can't
+ handle literal pool placement in that case. */
+ if (flag_reorder_blocks_and_partition)
+ {
+ inform (input_location,
+ "-freorder-blocks-and-partition not supported on this architecture");
+ flag_reorder_blocks_and_partition = 0;
+ flag_reorder_blocks = 1;
+ }
+
/* Ideally we would want to use CFI directives to generate
debug info. However this also creates the .eh_frame
section, so disable them until GAS can handle
@@ -1689,6 +1736,21 @@ arm_override_options (void)
if (TARGET_AAPCS_BASED)
flag_dwarf2_cfi_asm = 0;
+ if (!PARAM_SET_P (PARAM_INLINE_CALL_COST))
+ set_param_value ("inline-call-cost", 2);
+
+ if (!PARAM_SET_P (PARAM_INLINE_FUNCTION_SIZE_ADJUSTMENT))
+ set_param_value ("inline-function-size-adjustment", 3);
+
+ if (!PARAM_SET_P (PARAM_INLINE_ADDRESS_TAKEN_FUNCTION_EMIT_PROBABILITY))
+ set_param_value ("inline-address-taken-function-emit-probability", 100);
+
+ if (!PARAM_SET_P (PARAM_INLINE_ADDRESS_NOT_TAKEN_FUNCTION_EMIT_PROBABILITY))
+ set_param_value ("inline-address-not-taken-function-emit-probability", 100);
+
+ if (!PARAM_SET_P (PARAM_INLINE_PRIORITY_THRESHOLD))
+ set_param_value ("inline-priority-threshold", 0);
+
/* Register global variables with the garbage collector. */
arm_add_gc_roots ();
}
@@ -2911,14 +2973,19 @@ arm_canonicalize_comparison (enum rtx_code code, enum machine_mode mode,
/* Define how to find the value returned by a function. */
-rtx
-arm_function_value(const_tree type, const_tree func ATTRIBUTE_UNUSED)
+static rtx
+arm_function_value(const_tree type, const_tree func,
+ bool outgoing ATTRIBUTE_UNUSED)
{
enum machine_mode mode;
int unsignedp ATTRIBUTE_UNUSED;
rtx r ATTRIBUTE_UNUSED;
mode = TYPE_MODE (type);
+
+ if (TARGET_AAPCS_BASED)
+ return aapcs_allocate_return_reg (mode, type, func);
+
/* Promote integer types. */
if (INTEGRAL_TYPE_P (type))
PROMOTE_FUNCTION_MODE (mode, unsignedp, type);
@@ -2935,7 +3002,87 @@ arm_function_value(const_tree type, const_tree func ATTRIBUTE_UNUSED)
}
}
- return LIBCALL_VALUE(mode);
+ return LIBCALL_VALUE (mode);
+}
+
+static int
+libcall_eq (const void *p1, const void *p2)
+{
+ return rtx_equal_p ((const_rtx) p1, (const_rtx) p2);
+}
+
+static hashval_t
+libcall_hash (const void *p1)
+{
+ return hash_rtx ((const_rtx) p1, VOIDmode, NULL, NULL, FALSE);
+}
+
+static void
+add_libcall (htab_t htab, rtx libcall)
+{
+ *htab_find_slot (htab, libcall, INSERT) = libcall;
+}
+
+static bool
+arm_libcall_uses_aapcs_base (rtx libcall)
+{
+ static bool init_done = false;
+ static htab_t libcall_htab;
+
+ if (!init_done)
+ {
+ init_done = true;
+
+ libcall_htab = htab_create (31, libcall_hash, libcall_eq,
+ NULL);
+ add_libcall (libcall_htab,
+ convert_optab_libfunc (sfloat_optab, SFmode, SImode));
+ add_libcall (libcall_htab,
+ convert_optab_libfunc (sfloat_optab, DFmode, SImode));
+ add_libcall (libcall_htab,
+ convert_optab_libfunc (sfloat_optab, SFmode, DImode));
+ add_libcall (libcall_htab,
+ convert_optab_libfunc (sfloat_optab, DFmode, DImode));
+
+ add_libcall (libcall_htab,
+ convert_optab_libfunc (ufloat_optab, SFmode, SImode));
+ add_libcall (libcall_htab,
+ convert_optab_libfunc (ufloat_optab, DFmode, SImode));
+ add_libcall (libcall_htab,
+ convert_optab_libfunc (ufloat_optab, SFmode, DImode));
+ add_libcall (libcall_htab,
+ convert_optab_libfunc (ufloat_optab, DFmode, DImode));
+
+ /* add_libcall (libcall_htab,
+ convert_optab_libfunc (sext_optab, SFmode, HFmode));
+ add_libcall (libcall_htab,
+ convert_optab_libfunc (trunc_optab, HFmode, SFmode)); */
+ add_libcall (libcall_htab,
+ convert_optab_libfunc (sfix_optab, DImode, DFmode));
+ add_libcall (libcall_htab,
+ convert_optab_libfunc (ufix_optab, DImode, DFmode));
+ add_libcall (libcall_htab,
+ convert_optab_libfunc (sfix_optab, DImode, SFmode));
+ add_libcall (libcall_htab,
+ convert_optab_libfunc (ufix_optab, DImode, SFmode));
+ }
+
+ return libcall && htab_find (libcall_htab, libcall) != NULL;
+}
+
+rtx
+arm_libcall_value (enum machine_mode mode, rtx libcall)
+{
+ if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
+ && GET_MODE_CLASS (mode) == MODE_FLOAT)
+ {
+ /* The following libcalls return their result in integer registers,
+ even though they return a floating point value. */
+ if (arm_libcall_uses_aapcs_base (libcall))
+ return gen_rtx_REG (mode, ARG_REGISTER(1));
+ }
+
+ return LIBCALL_VALUE (mode);
}
/* Determine the amount of memory needed to store the possible return
@@ -2945,10 +3092,12 @@ arm_apply_result_size (void)
{
int size = 16;
- if (TARGET_ARM)
+ if (TARGET_32BIT)
{
if (TARGET_HARD_FLOAT_ABI)
{
+ if (TARGET_VFP)
+ size += 32;
if (TARGET_FPA)
size += 12;
if (TARGET_MAVERICK)
@@ -2961,27 +3110,56 @@ arm_apply_result_size (void)
return size;
}
-/* Decide whether a type should be returned in memory (true)
- or in a register (false). This is called as the target hook
- TARGET_RETURN_IN_MEMORY. */
+/* Decide whether TYPE should be returned in memory (true)
+ or in a register (false). FNTYPE is the type of the function making
+ the call. */
static bool
-arm_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
+arm_return_in_memory (const_tree type, const_tree fntype)
{
HOST_WIDE_INT size;
- size = int_size_in_bytes (type);
+ size = int_size_in_bytes (type); /* Negative if not fixed size. */
+
+ if (TARGET_AAPCS_BASED)
+ {
+ /* Simple, non-aggregate types (ie not including vectors and
+ complex) are always returned in a register (or registers).
+ We don't care about which register here, so we can short-cut
+ some of the detail. */
+ if (!AGGREGATE_TYPE_P (type)
+ && TREE_CODE (type) != VECTOR_TYPE
+ && TREE_CODE (type) != COMPLEX_TYPE)
+ return false;
+
+ /* Any return value that is no larger than one word can be
+ returned in r0. */
+ if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
+ return false;
+
+ /* Check any available co-processors to see if they accept the
+ type as a register candidate (VFP, for example, can return
+ some aggregates in consecutive registers). These aren't
+ available if the call is variadic. */
+ if (aapcs_select_return_coproc (type, fntype) >= 0)
+ return false;
+
+ /* Vector values should be returned using ARM registers, not
+ memory (unless they're over 16 bytes, which will break since
+ we only have four call-clobbered registers to play with). */
+ if (TREE_CODE (type) == VECTOR_TYPE)
+ return (size < 0 || size > (4 * UNITS_PER_WORD));
+
+ /* The rest go in memory. */
+ return true;
+ }
- /* Vector values should be returned using ARM registers, not memory (unless
- they're over 16 bytes, which will break since we only have four
- call-clobbered registers to play with). */
if (TREE_CODE (type) == VECTOR_TYPE)
return (size < 0 || size > (4 * UNITS_PER_WORD));
if (!AGGREGATE_TYPE_P (type) &&
- !(TARGET_AAPCS_BASED && TREE_CODE (type) == COMPLEX_TYPE))
- /* All simple types are returned in registers.
- For AAPCS, complex types are treated the same as aggregates. */
- return 0;
+ (TREE_CODE (type) != VECTOR_TYPE))
+ /* All simple types are returned in registers. */
+ return false;
if (arm_abi != ARM_ABI_APCS)
{
@@ -2998,7 +3176,7 @@ arm_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
the aggregate is either huge or of variable size, and in either case
we will want to return it via memory and not in a register. */
if (size < 0 || size > UNITS_PER_WORD)
- return 1;
+ return true;
if (TREE_CODE (type) == RECORD_TYPE)
{
@@ -3018,18 +3196,18 @@ arm_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
continue;
if (field == NULL)
- return 0; /* An empty structure. Allowed by an extension to ANSI C. */
+ return false; /* An empty structure. Allowed by an extension to ANSI C. */
/* Check that the first field is valid for returning in a register. */
/* ... Floats are not allowed */
if (FLOAT_TYPE_P (TREE_TYPE (field)))
- return 1;
+ return true;
/* ... Aggregates that are not themselves valid for returning in
a register are not allowed. */
if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
- return 1;
+ return true;
/* Now check the remaining fields, if any. Only bitfields are allowed,
since they are not addressable. */
@@ -3041,10 +3219,10 @@ arm_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
continue;
if (!DECL_BIT_FIELD_TYPE (field))
- return 1;
+ return true;
}
- return 0;
+ return false;
}
if (TREE_CODE (type) == UNION_TYPE)
@@ -3061,18 +3239,18 @@ arm_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
continue;
if (FLOAT_TYPE_P (TREE_TYPE (field)))
- return 1;
+ return true;
if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
- return 1;
+ return true;
}
- return 0;
+ return false;
}
#endif /* not ARM_WINCE */
/* Return all other types in memory. */
- return 1;
+ return false;
}
/* Indicate whether or not words of a double are in big-endian order. */
@@ -3097,14 +3275,752 @@ arm_float_words_big_endian (void)
return 1;
}
+const struct pcs_attribute_arg
+{
+ const char *arg;
+ enum arm_pcs value;
+} pcs_attribute_args[] =
+ {
+ {"aapcs", ARM_PCS_AAPCS},
+ {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
+#if 0
+ /* We could recognize these, but changes would be needed elsewhere
+ * to implement them. */
+ {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
+ {"atpcs", ARM_PCS_ATPCS},
+ {"apcs", ARM_PCS_APCS},
+#endif
+ {NULL, ARM_PCS_UNKNOWN}
+ };
+
+static enum arm_pcs
+arm_pcs_from_attribute (tree attr)
+{
+ const struct pcs_attribute_arg *ptr;
+ const char *arg;
+
+ /* Get the value of the argument. */
+ if (TREE_VALUE (attr) == NULL_TREE
+ || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
+ return ARM_PCS_UNKNOWN;
+
+ arg = TREE_STRING_POINTER (TREE_VALUE (attr));
+
+ /* Check it against the list of known arguments. */
+ for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
+ if (streq (arg, ptr->arg))
+ return ptr->value;
+
+ /* An unrecognized interrupt type. */
+ return ARM_PCS_UNKNOWN;
+}
+
+/* Get the PCS variant to use for this call. TYPE is the function's type
+ specification, DECL is the specific declartion. DECL may be null if
+ the call could be indirect or if this is a library call. */
+static enum arm_pcs
+arm_get_pcs_model (const_tree type, const_tree decl)
+{
+ bool user_convention = false;
+ enum arm_pcs user_pcs = arm_pcs_default;
+ tree attr;
+
+ gcc_assert (type);
+
+ attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
+ if (attr)
+ {
+ user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
+ user_convention = true;
+ }
+
+ if (TARGET_AAPCS_BASED)
+ {
+ /* Detect varargs functions. These always use the base rules
+ (no argument is ever a candidate for a co-processor
+ register). */
+ bool base_rules = (TYPE_ARG_TYPES (type) != 0
+ && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (type)))
+ != void_type_node));
+
+ if (user_convention)
+ {
+ if (user_pcs > ARM_PCS_AAPCS_LOCAL)
+ sorry ("Non-AAPCS derived PCS variant");
+ else if (base_rules && user_pcs != ARM_PCS_AAPCS)
+ error ("Variadic functions must use the base AAPCS variant");
+ }
+
+ if (base_rules)
+ return ARM_PCS_AAPCS;
+ else if (user_convention)
+ return user_pcs;
+ else if (decl && flag_unit_at_a_time)
+ {
+ /* Local functions never leak outside this compilation unit,
+ so we are free to use whatever conventions are
+ appropriate. */
+ /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
+ struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
+ if (i && i->local)
+ return ARM_PCS_AAPCS_LOCAL;
+ }
+ }
+ else if (user_convention && user_pcs != arm_pcs_default)
+ sorry ("PCS variant");
+
+ /* For everything else we use the target's default. */
+ return arm_pcs_default;
+}
+
+
+static void
+aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
+ const_tree fntype ATTRIBUTE_UNUSED,
+ rtx libcall ATTRIBUTE_UNUSED,
+ const_tree fndecl ATTRIBUTE_UNUSED)
+{
+ /* Record the unallocated VFP registers. */
+ pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
+ pcum->aapcs_vfp_reg_alloc = 0;
+}
+
+/* Walk down the type tree of TYPE counting consecutive base elements.
+ If *MODEP is VOIDmode, then set it to the first valid floating point
+ type. If a non-floating point type is found, or if a floating point
+ type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
+ otherwise return the count in the sub-tree. */
+static int
+aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
+{
+ enum machine_mode mode;
+ HOST_WIDE_INT size;
+
+ switch (TREE_CODE (type))
+ {
+ case REAL_TYPE:
+ mode = TYPE_MODE (type);
+ if (mode != DFmode && mode != SFmode)
+ return -1;
+
+ if (*modep == VOIDmode)
+ *modep = mode;
+
+ if (*modep == mode)
+ return 1;
+
+ break;
+
+ case COMPLEX_TYPE:
+ mode = TYPE_MODE (TREE_TYPE (type));
+ if (mode != DFmode && mode != SFmode)
+ return -1;
+
+ if (*modep == VOIDmode)
+ *modep = mode;
+
+ if (*modep == mode)
+ return 2;
+
+ break;
+
+ case VECTOR_TYPE:
+ /* Use V2SImode and V4SImode as representatives of all 64-bit
+ and 128-bit vector types, whether or not those modes are
+ supported with the present options. */
+ size = int_size_in_bytes (type);
+ switch (size)
+ {
+ case 8:
+ mode = V2SImode;
+ break;
+ case 16:
+ mode = V4SImode;
+ break;
+ default:
+ return -1;
+ }
+
+ if (*modep == VOIDmode)
+ *modep = mode;
+
+ /* Vector modes are considered to be opaque: two vectors are
+ equivalent for the purposes of being homogeneous aggregates
+ if they are the same size. */
+ if (*modep == mode)
+ return 1;
+
+ break;
+
+ case ARRAY_TYPE:
+ {
+ int count;
+ tree index = TYPE_DOMAIN (type);
+
+ /* Can't handle incomplete types. */
+ if (!COMPLETE_TYPE_P(type))
+ return -1;
+
+ count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
+ if (count == -1
+ || !index
+ || !TYPE_MAX_VALUE (index)
+ || !host_integerp (TYPE_MAX_VALUE (index), 1)
+ || !TYPE_MIN_VALUE (index)
+ || !host_integerp (TYPE_MIN_VALUE (index), 1)
+ || count < 0)
+ return -1;
+
+ count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
+ - tree_low_cst (TYPE_MIN_VALUE (index), 1));
+
+ /* There must be no padding. */
+ if (!host_integerp (TYPE_SIZE (type), 1)
+ || (tree_low_cst (TYPE_SIZE (type), 1)
+ != count * GET_MODE_BITSIZE (*modep)))
+ return -1;
+
+ return count;
+ }
+
+ case RECORD_TYPE:
+ {
+ int count = 0;
+ int sub_count;
+ tree field;
+
+ /* Can't handle incomplete types. */
+ if (!COMPLETE_TYPE_P(type))
+ return -1;
+
+ for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
+ {
+ if (TREE_CODE (field) != FIELD_DECL)
+ continue;
+
+ sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
+ if (sub_count < 0)
+ return -1;
+ count += sub_count;
+ }
+
+ /* There must be no padding. */
+ if (!host_integerp (TYPE_SIZE (type), 1)
+ || (tree_low_cst (TYPE_SIZE (type), 1)
+ != count * GET_MODE_BITSIZE (*modep)))
+ return -1;
+
+ return count;
+ }
+
+ case UNION_TYPE:
+ case QUAL_UNION_TYPE:
+ {
+ /* These aren't very interesting except in a degenerate case. */
+ int count = 0;
+ int sub_count;
+ tree field;
+
+ /* Can't handle incomplete types. */
+ if (!COMPLETE_TYPE_P(type))
+ return -1;
+
+ for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
+ {
+ if (TREE_CODE (field) != FIELD_DECL)
+ continue;
+
+ sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
+ if (sub_count < 0)
+ return -1;
+ count = count > sub_count ? count : sub_count;
+ }
+
+ /* There must be no padding. */
+ if (!host_integerp (TYPE_SIZE (type), 1)
+ || (tree_low_cst (TYPE_SIZE (type), 1)
+ != count * GET_MODE_BITSIZE (*modep)))
+ return -1;
+
+ return count;
+ }
+
+ default:
+ break;
+ }
+
+ return -1;
+}
+
+static bool
+aapcs_vfp_is_call_or_return_candidate (enum machine_mode mode, const_tree type,
+ int *base_mode,
+ int *count)
+{
+ if (GET_MODE_CLASS (mode) == MODE_FLOAT
+ || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
+ || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
+ {
+ *count = 1;
+ *base_mode = mode;
+ return true;
+ }
+ else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
+ {
+ *count = 2;
+ *base_mode = (mode == DCmode ? DFmode : SFmode);
+ return true;
+ }
+ else if (type && (mode == BLKmode || TREE_CODE (type) == VECTOR_TYPE))
+ {
+ enum machine_mode aggregate_mode = VOIDmode;
+ int ag_count = aapcs_vfp_sub_candidate (type, &aggregate_mode);
+
+ if (ag_count > 0 && ag_count <= 4)
+ {
+ *count = ag_count;
+ *base_mode = aggregate_mode;
+ return true;
+ }
+ }
+ return false;
+}
+
+static bool
+aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
+ enum machine_mode mode, const_tree type)
+{
+ int count ATTRIBUTE_UNUSED;
+ int ag_mode ATTRIBUTE_UNUSED;
+
+ if (!(pcs_variant == ARM_PCS_AAPCS_VFP
+ || (pcs_variant == ARM_PCS_AAPCS_LOCAL
+ && TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT)))
+ return false;
+ return aapcs_vfp_is_call_or_return_candidate (mode, type, &ag_mode, &count);
+}
+
+static bool
+aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
+ const_tree type)
+{
+ if (!(pcum->pcs_variant == ARM_PCS_AAPCS_VFP
+ || (pcum->pcs_variant == ARM_PCS_AAPCS_LOCAL
+ && TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT)))
+ return false;
+ return aapcs_vfp_is_call_or_return_candidate (mode, type,
+ &pcum->aapcs_vfp_rmode,
+ &pcum->aapcs_vfp_rcount);
+}
+
+static bool
+aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
+ const_tree type ATTRIBUTE_UNUSED)
+{
+ int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
+ unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
+ int regno;
+
+ for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
+ if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
+ {
+ pcum->aapcs_vfp_reg_alloc = mask << regno;
+ if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
+ {
+ int i;
+ int rcount = pcum->aapcs_vfp_rcount;
+ int rshift = shift;
+ enum machine_mode rmode = pcum->aapcs_vfp_rmode;
+ rtx par;
+ if (!TARGET_NEON)
+ {
+ /* Avoid using unsupported vector modes. */
+ if (rmode == V2SImode)
+ rmode = DImode;
+ else if (rmode == V4SImode)
+ {
+ rmode = DImode;
+ rcount *= 2;
+ rshift /= 2;
+ }
+ }
+ par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
+ for (i = 0; i < rcount; i++)
+ {
+ rtx tmp = gen_rtx_REG (rmode,
+ FIRST_VFP_REGNUM + regno + i * rshift);
+ tmp = gen_rtx_EXPR_LIST
+ (VOIDmode, tmp,
+ GEN_INT (i * GET_MODE_SIZE (rmode)));
+ XVECEXP (par, 0, i) = tmp;
+ }
+
+ pcum->aapcs_reg = par;
+ }
+ else
+ pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
+ return true;
+ }
+ return false;
+}
+
+static rtx
+aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
+ enum machine_mode mode,
+ const_tree type ATTRIBUTE_UNUSED)
+{
+ if (!(pcs_variant == ARM_PCS_AAPCS_VFP
+ || (pcs_variant == ARM_PCS_AAPCS_LOCAL
+ && TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT)))
+ return false;
+ if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
+ {
+ int count;
+ int ag_mode;
+ int i;
+ rtx par;
+ int shift;
+
+ aapcs_vfp_is_call_or_return_candidate (mode, type, &ag_mode, &count);
+
+ if (!TARGET_NEON)
+ {
+ if (ag_mode == V2SImode)
+ ag_mode = DImode;
+ else if (ag_mode == V4SImode)
+ {
+ ag_mode = DImode;
+ count *= 2;
+ }
+ }
+ shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
+ par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
+ for (i = 0; i < count; i++)
+ {
+ rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
+ tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
+ GEN_INT (i * GET_MODE_SIZE (ag_mode)));
+ XVECEXP (par, 0, i) = tmp;
+ }
+
+ return par;
+ }
+
+ return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
+}
+
+static void
+aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
+ enum machine_mode mode ATTRIBUTE_UNUSED,
+ const_tree type ATTRIBUTE_UNUSED)
+{
+ pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
+ pcum->aapcs_vfp_reg_alloc = 0;
+ return;
+}
+
+#define AAPCS_CP(X) \
+ { \
+ aapcs_ ## X ## _cum_init, \
+ aapcs_ ## X ## _is_call_candidate, \
+ aapcs_ ## X ## _allocate, \
+ aapcs_ ## X ## _is_return_candidate, \
+ aapcs_ ## X ## _allocate_return_reg, \
+ aapcs_ ## X ## _advance \
+ }
+
+/* Table of co-processors that can be used to pass arguments in
+ registers. Idealy no arugment should be a candidate for more than
+ one co-processor table entry, but the table is processed in order
+ and stops after the first match. If that entry then fails to put
+ the argument into a co-processor register, the argument will go on
+ the stack. */
+static struct
+{
+ /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
+ void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
+
+ /* Return true if an argument of mode MODE (or type TYPE if MODE is
+ BLKmode) is a candidate for this co-processor's registers; this
+ function should ignore any position-dependent state in
+ CUMULATIVE_ARGS and only use call-type dependent information. */
+ bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
+
+ /* Return true if the argument does get a co-processor register; it
+ should set aapcs_reg to an RTX of the register allocated as is
+ required for a return from FUNCTION_ARG. */
+ bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
+
+ /* Return true if a result of mode MODE (or type TYPE if MODE is
+ BLKmode) is can be returned in this co-processor's registers. */
+ bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
+
+ /* Allocate and return an RTX element to hold the return type of a
+ call, this routine must not fail and will only be called if
+ is_return_candidate returned true with the same parameters. */
+ rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
+
+ /* Finish processing this argument and prepare to start processing
+ the next one. */
+ void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
+} aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
+ {
+ AAPCS_CP(vfp)
+ };
+
+#undef AAPCS_CP
+
+static int
+aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
+ tree type)
+{
+ int i;
+
+ for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
+ if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
+ return i;
+
+ return -1;
+}
+
+static int
+aapcs_select_return_coproc (const_tree type, const_tree fntype)
+{
+ /* We aren't passed a decl, so we can't check that a call is local.
+ However, it isn't clear that that would be a win anyway, since it
+ might limit some tail-calling opportunities. */
+ enum arm_pcs pcs_variant;
+
+ if (fntype)
+ {
+ const_tree fndecl = NULL_TREE;
+ if (TREE_CODE (fntype) == FUNCTION_DECL)
+ {
+ fndecl = fntype;
+ fntype = TREE_TYPE (fntype);
+ }
+
+ pcs_variant = arm_get_pcs_model (fntype, fndecl);
+ }
+ else
+ pcs_variant = arm_pcs_default;
+
+ if (pcs_variant != ARM_PCS_AAPCS)
+ {
+ int i;
+
+ for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
+ if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
+ TYPE_MODE (type),
+ type))
+ return i;
+ }
+ return -1;
+}
+
+static rtx
+aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
+ const_tree fntype)
+{
+ /* We aren't passed a decl, so we can't check that a call is local.
+ However, it isn't clear that that would be a win anyway, since it
+ might limit some tail-calling opportunities. */
+ enum arm_pcs pcs_variant;
+ int unsignedp ATTRIBUTE_UNUSED;
+
+ if (fntype)
+ {
+ const_tree fndecl = NULL_TREE;
+
+ if (TREE_CODE (fntype) == FUNCTION_DECL)
+ {
+ fndecl = fntype;
+ fntype = TREE_TYPE (fntype);
+ }
+
+ pcs_variant = arm_get_pcs_model (fntype, fndecl);
+ }
+ else
+ pcs_variant = arm_pcs_default;
+
+ /* Promote integer types. */
+ if (type && INTEGRAL_TYPE_P (type))
+ PROMOTE_FUNCTION_MODE (mode, unsignedp, type);
+
+ if (pcs_variant != ARM_PCS_AAPCS)
+ {
+ int i;
+
+ for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
+ if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
+ type))
+ return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
+ mode, type);
+ }
+
+ /* Promotes small structs returned in a register to full-word size
+ for big-endian AAPCS. */
+ if (type && arm_return_in_msb (type))
+ {
+ HOST_WIDE_INT size = int_size_in_bytes (type);
+ if (size % UNITS_PER_WORD != 0)
+ {
+ size += UNITS_PER_WORD - size % UNITS_PER_WORD;
+ mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
+ }
+ }
+
+ return gen_rtx_REG (mode, R0_REGNUM);
+}
+
+rtx
+aapcs_libcall_value (enum machine_mode mode)
+{
+ return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
+}
+
+/* Lay out a function argument using the AAPCS rules. The rule
+ numbers referred to here are those in the AAPCS. */
+static void
+aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
+ tree type, int named)
+{
+ int nregs, nregs2;
+ int ncrn;
+
+ /* We only need to do this once per argument. */
+ if (pcum->aapcs_arg_processed)
+ return;
+
+ pcum->aapcs_arg_processed = true;
+
+ /* Special case: if named is false then we are handling an incoming
+ anonymous argument which is on the stack. */
+ if (!named)
+ return;
+
+ /* Is this a potential co-processor register candidate? */
+ if (pcum->pcs_variant != ARM_PCS_AAPCS)
+ {
+ int slot = aapcs_select_call_coproc (pcum, mode, type);
+ pcum->aapcs_cprc_slot = slot;
+
+ /* We don't have to apply any of the rules from part B of the
+ preparation phase, these are handled elsewhere in the
+ compiler. */
+
+ if (slot >= 0)
+ {
+ /* A Co-processor register candidate goes either in its own
+ class of registers or on the stack. */
+ if (!pcum->aapcs_cprc_failed[slot])
+ {
+ /* C1.cp - Try to allocate the argument to co-processor
+ registers. */
+ if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
+ return;
+
+ /* C2.cp - Put the argument on the stack and note that we
+ can't assign any more candidates in this slot. We also
+ need to note that we have allocated stack space, so that
+ we won't later try to split a non-cprc candidate between
+ core registers and the stack. */
+ pcum->aapcs_cprc_failed[slot] = true;
+ pcum->can_split = false;
+ }
+
+ /* We didn't get a register, so this argument goes on the
+ stack. */
+ gcc_assert (pcum->can_split == false);
+ return;
+ }
+ }
+
+ /* C3 - For double-word aligned arguments, round the NCRN up to the
+ next even number. */
+ ncrn = pcum->aapcs_ncrn;
+ if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
+ ncrn++;
+
+ nregs = ARM_NUM_REGS2(mode, type);
+
+ /* Sigh, this test should really assert that nregs > 0, but a GCC
+ extension allows empty structs and then gives them empty size; it
+ then allows such a structure to be passed by value. For some of
+ the code below we have to pretend that such an argument has
+ non-zero size so that we 'locate' it correctly either in
+ registers or on the stack. */
+ gcc_assert (nregs >= 0);
+
+ nregs2 = nregs ? nregs : 1;
+
+ /* C4 - Argument fits entirely in core registers. */
+ if (ncrn + nregs2 <= NUM_ARG_REGS)
+ {
+ pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
+ pcum->aapcs_next_ncrn = ncrn + nregs;
+ return;
+ }
+
+ /* C5 - Some core registers left and there are no arguments already
+ on the stack: split this argument between the remaining core
+ registers and the stack. */
+ if (ncrn < NUM_ARG_REGS && pcum->can_split)
+ {
+ pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
+ pcum->aapcs_next_ncrn = NUM_ARG_REGS;
+ pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
+ return;
+ }
+
+ /* C6 - NCRN is set to 4. */
+ pcum->aapcs_next_ncrn = NUM_ARG_REGS;
+
+ /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
+ return;
+}
+
/* Initialize a variable CUM of type CUMULATIVE_ARGS
for a call to a function whose data type is FNTYPE.
For a library call, FNTYPE is NULL. */
void
arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
- rtx libname ATTRIBUTE_UNUSED,
+ rtx libname,
tree fndecl ATTRIBUTE_UNUSED)
{
+ /* Long call handling. */
+ if (fntype)
+ pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
+ else
+ pcum->pcs_variant = arm_pcs_default;
+
+ if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
+ {
+ if (arm_libcall_uses_aapcs_base (libname))
+ pcum->pcs_variant = ARM_PCS_AAPCS;
+
+ pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
+ pcum->aapcs_reg = NULL_RTX;
+ pcum->aapcs_partial = 0;
+ pcum->aapcs_arg_processed = false;
+ pcum->aapcs_cprc_slot = -1;
+ pcum->can_split = true;
+
+ if (pcum->pcs_variant != ARM_PCS_AAPCS)
+ {
+ int i;
+
+ for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
+ {
+ pcum->aapcs_cprc_failed[i] = false;
+ aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
+ }
+ }
+ return;
+ }
+
+ /* Legacy ABIs */
+
/* On the ARM, the offset starts at 0. */
pcum->nregs = 0;
pcum->iwmmxt_nregs = 0;
@@ -3158,6 +4074,17 @@ arm_function_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
{
int nregs;
+ /* Handle the special case quickly. Pick an arbitrary value for op2 of
+ a call insn (op3 of a call_value insn). */
+ if (mode == VOIDmode)
+ return const0_rtx;
+
+ if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
+ {
+ aapcs_layout_arg (pcum, mode, type, named);
+ return pcum->aapcs_reg;
+ }
+
/* Varargs vectors are treated the same as long long.
named_count avoids having to change the way arm handles 'named' */
if (TARGET_IWMMXT_ABI
@@ -3199,10 +4126,16 @@ arm_function_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
static int
arm_arg_partial_bytes (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
- tree type, bool named ATTRIBUTE_UNUSED)
+ tree type, bool named)
{
int nregs = pcum->nregs;
+ if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
+ {
+ aapcs_layout_arg (pcum, mode, type, named);
+ return pcum->aapcs_partial;
+ }
+
if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
return 0;
@@ -3214,6 +4147,39 @@ arm_arg_partial_bytes (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
return 0;
}
+void
+arm_function_arg_advance (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
+ tree type, bool named)
+{
+ if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
+ {
+ aapcs_layout_arg (pcum, mode, type, named);
+
+ if (pcum->aapcs_cprc_slot >= 0)
+ {
+ aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
+ type);
+ pcum->aapcs_cprc_slot = -1;
+ }
+
+ /* Generic stuff. */
+ pcum->aapcs_arg_processed = false;
+ pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
+ pcum->aapcs_reg = NULL_RTX;
+ pcum->aapcs_partial = 0;
+ }
+ else
+ {
+ pcum->nargs += 1;
+ if (arm_vector_mode_supported_p (mode)
+ && pcum->named_count > pcum->nargs
+ && TARGET_IWMMXT_ABI)
+ pcum->iwmmxt_nregs += 1;
+ else
+ pcum->nregs += ARM_NUM_REGS2 (mode, type);
+ }
+}
+
/* Variable sized types are passed by reference. This is a GCC
extension to the ARM ABI. */
@@ -3264,6 +4230,8 @@ const struct attribute_spec arm_attribute_table[] =
/* Whereas these functions are always known to reside within the 26 bit
addressing range. */
{ "short_call", 0, 0, false, true, true, NULL },
+ /* Specify the procedure call conventions for a function. */
+ { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute },
/* Interrupt Service Routines have special prologue and epilogue requirements. */
{ "isr", 0, 1, false, false, false, arm_handle_isr_attribute },
{ "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute },
@@ -3366,6 +4334,20 @@ arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
return NULL_TREE;
}
+/* Handle a "pcs" attribute; arguments as in struct
+ attribute_spec.handler. */
+static tree
+arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
+ int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
+{
+ if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
+ {
+ warning (OPT_Wattributes, "%qE attribute ignored", name);
+ *no_add_attrs = true;
+ }
+ return NULL_TREE;
+}
+
#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
/* Handle the "notshared" attribute. This attribute is another way of
requesting hidden visibility. ARM's compiler supports
@@ -3527,7 +4509,7 @@ arm_is_long_call_p (tree decl)
/* Return nonzero if it is ok to make a tail-call to DECL. */
static bool
-arm_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
+arm_function_ok_for_sibcall (tree decl, tree exp)
{
unsigned long func_type;
@@ -3560,6 +4542,21 @@ arm_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
if (IS_INTERRUPT (func_type))
return false;
+ if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
+ {
+ /* Check that the return value locations are the same. For
+ example that we aren't returning a value from the sibling in
+ a VFP register but then need to transfer it to a core
+ register. */
+ rtx a, b;
+
+ a = arm_function_value (TREE_TYPE (exp), decl, false);
+ b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
+ cfun->decl, false);
+ if (!rtx_equal_p (a, b))
+ return false;
+ }
+
/* Never tailcall if function may be called with a misaligned SP. */
if (IS_STACKALIGN (func_type))
return false;
@@ -9995,6 +10992,82 @@ note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
return result;
}
+/* Convert instructions to their cc-clobbering variant if possible, since
+ that allows us to use smaller encodings. */
+
+static void
+thumb2_reorg (void)
+{
+ basic_block bb;
+ regset_head live;
+
+ INIT_REG_SET (&live);
+
+ /* We are freeing block_for_insn in the toplev to keep compatibility
+ with old MDEP_REORGS that are not CFG based. Recompute it now. */
+ compute_bb_for_insn ();
+ df_analyze ();
+
+ FOR_EACH_BB (bb)
+ {
+ rtx insn;
+
+ COPY_REG_SET (&live, DF_LR_OUT (bb));
+ df_simulate_initialize_backwards (bb, &live);
+ FOR_BB_INSNS_REVERSE (bb, insn)
+ {
+ if (NONJUMP_INSN_P (insn)
+ && !REGNO_REG_SET_P (&live, CC_REGNUM))
+ {
+ rtx pat = PATTERN (insn);
+ if (GET_CODE (pat) == SET
+ && low_register_operand (XEXP (pat, 0), SImode)
+ && thumb_16bit_operator (XEXP (pat, 1), SImode)
+ && low_register_operand (XEXP (XEXP (pat, 1), 0), SImode)
+ && low_register_operand (XEXP (XEXP (pat, 1), 1), SImode))
+ {
+ rtx dst = XEXP (pat, 0);
+ rtx src = XEXP (pat, 1);
+ rtx op0 = XEXP (src, 0);
+ rtx op1 = (GET_RTX_CLASS (GET_CODE (src)) == RTX_COMM_ARITH
+ ? XEXP (src, 1) : NULL);
+
+ if (rtx_equal_p (dst, op0)
+ || GET_CODE (src) == PLUS || GET_CODE (src) == MINUS)
+ {
+ rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
+ rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
+ rtvec vec = gen_rtvec (2, pat, clobber);
+ PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
+ INSN_CODE (insn) = -1;
+ }
+ /* We can also handle a commutative operation where the
+ second operand matches the destination. */
+ else if (op1 && rtx_equal_p (dst, op1))
+ {
+ rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
+ rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
+ rtvec vec;
+
+ src = copy_rtx (src);
+ XEXP (src, 0) = op1;
+ XEXP (src, 1) = op0;
+ pat = gen_rtx_SET (VOIDmode, dst, src);
+ vec = gen_rtvec (2, pat, clobber);
+ PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
+ INSN_CODE (insn) = -1;
+ }
+ }
+ }
+
+ if (NONDEBUG_INSN_P (insn))
+ df_simulate_one_insn_backwards (bb, insn, &live);
+ }
+ }
+
+ CLEAR_REG_SET (&live);
+}
+
/* Gcc puts the pool in the wrong place for ARM, since we can only
load addresses a limited distance around the pc. We do some
special munging to move the constant pool values to the correct
@@ -10006,6 +11079,9 @@ arm_reorg (void)
HOST_WIDE_INT address = 0;
Mfix * fix;
+ if (TARGET_THUMB2)
+ thumb2_reorg ();
+
minipool_fix_head = minipool_fix_tail = NULL;
/* The first insn must always be a note, or the code below won't
@@ -18823,19 +19899,24 @@ arm_output_load_gr (rtx *operands)
that way. */
static void
-arm_setup_incoming_varargs (CUMULATIVE_ARGS *cum,
+arm_setup_incoming_varargs (CUMULATIVE_ARGS *pcum,
enum machine_mode mode,
tree type,
int *pretend_size,
int second_time ATTRIBUTE_UNUSED)
{
- int nregs = cum->nregs;
- if (nregs & 1
- && ARM_DOUBLEWORD_ALIGN
- && arm_needs_doubleword_align (mode, type))
- nregs++;
+ int nregs;
cfun->machine->uses_anonymous_args = 1;
+ if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
+ {
+ nregs = pcum->aapcs_ncrn;
+ if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
+ nregs++;
+ }
+ else
+ nregs = pcum->nregs;
+
if (nregs < NUM_ARG_REGS)
*pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
}
@@ -19219,9 +20300,10 @@ arm_vector_mode_supported_p (enum machine_mode mode)
|| mode == V16QImode || mode == V4SFmode || mode == V2DImode))
return true;
- if ((mode == V2SImode)
- || (mode == V4HImode)
- || (mode == V8QImode))
+ if ((TARGET_NEON || TARGET_IWMMXT)
+ && ((mode == V2SImode)
+ || (mode == V4HImode)
+ || (mode == V8QImode)))
return true;
return false;
@@ -19967,9 +21049,9 @@ arm_can_simplify_got_access (int n_symbol, int n_access)
}
/* Detect if INSN loads a global address. If so returns the symbol and
- stores the register contains the offset of GOT entry into OFFSET_REG. */
+ sets the corresponding OFFSET_REG and OFFSET_INSN. */
rtx
-arm_loaded_global_var (rtx insn, rtx * offset_reg)
+arm_loaded_global_var (rtx insn, rtx * offset_reg, rtx * offset_insn)
{
rtx set = single_set (insn);
rtx pic_reg = cfun->machine->pic_reg;
@@ -20014,6 +21096,7 @@ arm_loaded_global_var (rtx insn, rtx * offset_reg)
return NULL_RTX;
*offset_reg = op1;
+ *offset_insn = def_insn;
return RTVEC_ELT (XVEC (src, 0), 0);
}
@@ -20026,9 +21109,9 @@ arm_loaded_global_var (rtx insn, rtx * offset_reg)
LOAD_INSN is the original insn which loads the address from GOT. */
void
arm_load_global_address (rtx symbol, rtx offset_reg,
- rtx address_reg, rtx load_insn)
+ rtx address_reg, rtx load_insn, rtx offset_insn)
{
- rtx offset, got_prel;
+ rtx offset, got_prel, new_insn;
rtx labelno = GEN_INT (pic_labelno++);
rtx l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
rtx set = single_set (load_insn);
@@ -20044,22 +21127,22 @@ arm_load_global_address (rtx symbol, rtx offset_reg,
UNSPEC_GOT_PREL_SYM);
got_prel = gen_rtx_CONST (Pmode, got_prel);
if (TARGET_32BIT)
- emit_insn_before (gen_pic_load_addr_32bit (offset_reg, got_prel),
- load_insn);
+ new_insn = emit_insn_after (gen_pic_load_addr_32bit (offset_reg, got_prel),
+ offset_insn);
else
- emit_insn_before (gen_pic_load_addr_thumb1 (offset_reg, got_prel),
- load_insn);
+ new_insn = emit_insn_after (gen_pic_load_addr_thumb1 (offset_reg, got_prel),
+ offset_insn);
/* The second insn:
(SET offset_reg (PLUS offset_reg pc_rtx)) */
if (TARGET_ARM)
- emit_insn_before (gen_pic_add_dot_plus_eight (offset_reg, offset_reg,
- labelno),
- load_insn);
- else
- emit_insn_before (gen_pic_add_dot_plus_four (offset_reg, offset_reg,
+ emit_insn_after (gen_pic_add_dot_plus_eight (offset_reg, offset_reg,
labelno),
- load_insn);
+ new_insn);
+ else
+ emit_insn_after (gen_pic_add_dot_plus_four (offset_reg, offset_reg,
+ labelno),
+ new_insn);
/* The last insn to access the GOT entry:
(SET address_reg (MEM offset_reg))
diff --git a/gcc-4.4.3/gcc/config/arm/arm.h b/gcc-4.4.3/gcc/config/arm/arm.h
index a413dd323..1189914f6 100644
--- a/gcc-4.4.3/gcc/config/arm/arm.h
+++ b/gcc-4.4.3/gcc/config/arm/arm.h
@@ -880,6 +880,9 @@ extern int arm_structure_size_boundary;
/* The number of (integer) argument register available. */
#define NUM_ARG_REGS 4
+/* And similarly for the VFP. */
+#define NUM_VFP_ARG_REGS 16
+
/* Return the register number of the N'th (integer) argument. */
#define ARG_REGISTER(N) (N - 1)
@@ -1494,9 +1497,10 @@ do { \
/* Define how to find the value returned by a library function
assuming the value has mode MODE. */
-#define LIBCALL_VALUE(MODE) \
- (TARGET_32BIT && TARGET_HARD_FLOAT_ABI && TARGET_FPA \
- && GET_MODE_CLASS (MODE) == MODE_FLOAT \
+#define LIBCALL_VALUE(MODE) \
+ (TARGET_AAPCS_BASED ? aapcs_libcall_value (MODE) \
+ : (TARGET_32BIT && TARGET_HARD_FLOAT_ABI && TARGET_FPA \
+ && GET_MODE_CLASS (MODE) == MODE_FLOAT) \
? gen_rtx_REG (MODE, FIRST_FPA_REGNUM) \
: TARGET_32BIT && TARGET_HARD_FLOAT_ABI && TARGET_MAVERICK \
&& GET_MODE_CLASS (MODE) == MODE_FLOAT \
@@ -1505,22 +1509,16 @@ do { \
? gen_rtx_REG (MODE, FIRST_IWMMXT_REGNUM) \
: gen_rtx_REG (MODE, ARG_REGISTER (1)))
-/* Define how to find the value returned by a function.
- VALTYPE is the data type of the value (as a tree).
- If the precise function being called is known, FUNC is its FUNCTION_DECL;
- otherwise, FUNC is 0. */
-#define FUNCTION_VALUE(VALTYPE, FUNC) \
- arm_function_value (VALTYPE, FUNC);
-
-/* 1 if N is a possible register number for a function value.
- On the ARM, only r0 and f0 can return results. */
-/* On a Cirrus chip, mvf0 can return results. */
-#define FUNCTION_VALUE_REGNO_P(REGNO) \
- ((REGNO) == ARG_REGISTER (1) \
- || (TARGET_32BIT && ((REGNO) == FIRST_CIRRUS_FP_REGNUM) \
- && TARGET_HARD_FLOAT_ABI && TARGET_MAVERICK) \
- || ((REGNO) == FIRST_IWMMXT_REGNUM && TARGET_IWMMXT_ABI) \
- || (TARGET_32BIT && ((REGNO) == FIRST_FPA_REGNUM) \
+/* 1 if REGNO is a possible register number for a function value. */
+#define FUNCTION_VALUE_REGNO_P(REGNO) \
+ ((REGNO) == ARG_REGISTER (1) \
+ || (TARGET_AAPCS_BASED && TARGET_32BIT \
+ && TARGET_VFP && TARGET_HARD_FLOAT \
+ && (REGNO) == FIRST_VFP_REGNUM) \
+ || (TARGET_32BIT && ((REGNO) == FIRST_CIRRUS_FP_REGNUM) \
+ && TARGET_HARD_FLOAT_ABI && TARGET_MAVERICK) \
+ || ((REGNO) == FIRST_IWMMXT_REGNUM && TARGET_IWMMXT_ABI) \
+ || (TARGET_32BIT && ((REGNO) == FIRST_FPA_REGNUM) \
&& TARGET_HARD_FLOAT_ABI && TARGET_FPA))
/* Amount of memory needed for an untyped call to save all possible return
@@ -1620,9 +1618,27 @@ machine_function;
that is in text_section. */
extern GTY(()) rtx thumb_call_via_label[14];
+/* The number of potential ways of assigning to a co-processor. */
+#define ARM_NUM_COPROC_SLOTS 1
+
+/* Enumeration of procedure calling standard variants. We don't really
+ support all of these yet. */
+enum arm_pcs
+{
+ ARM_PCS_AAPCS, /* Base standard AAPCS. */
+ ARM_PCS_AAPCS_VFP, /* Use VFP registers for floating point values. */
+ ARM_PCS_AAPCS_IWMMXT, /* Use iWMMXT registers for vectors. */
+ /* This must be the last AAPCS variant. */
+ ARM_PCS_AAPCS_LOCAL, /* Private call within this compilation unit. */
+ ARM_PCS_ATPCS, /* ATPCS. */
+ ARM_PCS_APCS, /* APCS (legacy Linux etc). */
+ ARM_PCS_UNKNOWN
+};
+
+/* We can't define this inside a generator file because it needs enum
+ machine_mode. */
/* A C type for declaring a variable that is used as the first argument of
- `FUNCTION_ARG' and other related values. For some target machines, the
- type `int' suffices and can hold the number of bytes of argument so far. */
+ `FUNCTION_ARG' and other related values. */
typedef struct
{
/* This is the number of registers of arguments scanned so far. */
@@ -1631,7 +1647,30 @@ typedef struct
int iwmmxt_nregs;
int named_count;
int nargs;
- int can_split;
+ /* Which procedure call variant to use for this call. */
+ enum arm_pcs pcs_variant;
+
+ /* AAPCS related state tracking. */
+ int aapcs_arg_processed; /* No need to lay out this argument again. */
+ int aapcs_cprc_slot; /* Index of co-processor rules to handle
+ this argument, or -1 if using core
+ registers. */
+ int aapcs_ncrn;
+ int aapcs_next_ncrn;
+ rtx aapcs_reg; /* Register assigned to this argument. */
+ int aapcs_partial; /* How many bytes are passed in regs (if
+ split between core regs and stack.
+ Zero otherwise. */
+ int aapcs_cprc_failed[ARM_NUM_COPROC_SLOTS];
+ int can_split; /* Argument can be split between core regs
+ and the stack. */
+ /* Private data for tracking VFP register allocation */
+ unsigned aapcs_vfp_regs_free;
+ unsigned aapcs_vfp_reg_alloc;
+ int aapcs_vfp_rcount;
+ /* Can't include insn-modes.h because this header is needed before we
+ generate it. */
+ int /* enum machine_mode */ aapcs_vfp_rmode;
} CUMULATIVE_ARGS;
/* Define where to put the arguments to a function.
@@ -1677,13 +1716,7 @@ typedef struct
of mode MODE and data type TYPE.
(TYPE is null for libcalls where that information may not be available.) */
#define FUNCTION_ARG_ADVANCE(CUM, MODE, TYPE, NAMED) \
- (CUM).nargs += 1; \
- if (arm_vector_mode_supported_p (MODE) \
- && (CUM).named_count > (CUM).nargs \
- && TARGET_IWMMXT_ABI) \
- (CUM).iwmmxt_nregs += 1; \
- else \
- (CUM).nregs += ARM_NUM_REGS2 (MODE, TYPE)
+ arm_function_arg_advance (&(CUM), (MODE), (TYPE), (NAMED))
/* If defined, a C expression that gives the alignment boundary, in bits, of an
argument with the specified mode and type. If it is not defined,
@@ -1695,9 +1728,11 @@ typedef struct
/* 1 if N is a possible register number for function argument passing.
On the ARM, r0-r3 are used to pass args. */
-#define FUNCTION_ARG_REGNO_P(REGNO) \
- (IN_RANGE ((REGNO), 0, 3) \
- || (TARGET_IWMMXT_ABI \
+#define FUNCTION_ARG_REGNO_P(REGNO) \
+ (IN_RANGE ((REGNO), 0, 3) \
+ || (TARGET_AAPCS_BASED && TARGET_VFP && TARGET_HARD_FLOAT \
+ && IN_RANGE ((REGNO), FIRST_VFP_REGNUM, FIRST_VFP_REGNUM + 15)) \
+ || (TARGET_IWMMXT_ABI \
&& IN_RANGE ((REGNO), FIRST_IWMMXT_REGNUM, FIRST_IWMMXT_REGNUM + 9)))
diff --git a/gcc-4.4.3/gcc/config/arm/arm.md b/gcc-4.4.3/gcc/config/arm/arm.md
index e9de45efd..ea16f1c31 100644
--- a/gcc-4.4.3/gcc/config/arm/arm.md
+++ b/gcc-4.4.3/gcc/config/arm/arm.md
@@ -743,14 +743,14 @@
[(set (reg:CC CC_REGNUM)
(compare:CC
(match_operand:SI 1 "s_register_operand" "r,r")
- (match_operand:SI 2 "arm_addimm_operand" "I,L")))
+ (match_operand:SI 2 "arm_addimm_operand" "L,I")))
(set (match_operand:SI 0 "s_register_operand" "=r,r")
(plus:SI (match_dup 1)
- (match_operand:SI 3 "arm_addimm_operand" "L,I")))]
+ (match_operand:SI 3 "arm_addimm_operand" "I,L")))]
"TARGET_32BIT && INTVAL (operands[2]) == -INTVAL (operands[3])"
"@
- sub%.\\t%0, %1, %2
- add%.\\t%0, %1, #%n2"
+ add%.\\t%0, %1, %3
+ sub%.\\t%0, %1, #%n3"
[(set_attr "conds" "set")]
)
@@ -1907,9 +1907,17 @@
{
if (GET_CODE (operands[2]) == CONST_INT)
{
- arm_split_constant (AND, SImode, NULL_RTX,
- INTVAL (operands[2]), operands[0],
- operands[1], optimize && can_create_pseudo_p ());
+ if (INTVAL (operands[2]) == 255 && arm_arch6)
+ {
+ operands[1] = convert_to_mode (QImode, operands[1], 1);
+ emit_insn (gen_thumb2_zero_extendqisi2_v6 (operands[0],
+ operands[1]));
+ }
+ else
+ arm_split_constant (AND, SImode, NULL_RTX,
+ INTVAL (operands[2]), operands[0],
+ operands[1],
+ optimize && can_create_pseudo_p ());
DONE;
}
diff --git a/gcc-4.4.3/gcc/config/arm/bpabi.h b/gcc-4.4.3/gcc/config/arm/bpabi.h
index 4d2974e32..4b37dcf63 100644
--- a/gcc-4.4.3/gcc/config/arm/bpabi.h
+++ b/gcc-4.4.3/gcc/config/arm/bpabi.h
@@ -90,16 +90,22 @@
#define DECLARE_LIBRARY_RENAMES RENAME_LIBRARY (muldi3, lmul)
#endif
#ifdef L_fixdfdi
-#define DECLARE_LIBRARY_RENAMES RENAME_LIBRARY (fixdfdi, d2lz)
+#define DECLARE_LIBRARY_RENAMES RENAME_LIBRARY (fixdfdi, d2lz) \
+ extern DWtype __fixdfdi (DFtype) __attribute__((pcs("aapcs"))); \
+ extern UDWtype __fixunsdfdi (DFtype) __asm__("__aeabi_d2ulz") __attribute__((pcs("aapcs")));
#endif
#ifdef L_fixunsdfdi
-#define DECLARE_LIBRARY_RENAMES RENAME_LIBRARY (fixunsdfdi, d2ulz)
+#define DECLARE_LIBRARY_RENAMES RENAME_LIBRARY (fixunsdfdi, d2ulz) \
+ extern UDWtype __fixunsdfdi (DFtype) __attribute__((pcs("aapcs")));
#endif
#ifdef L_fixsfdi
-#define DECLARE_LIBRARY_RENAMES RENAME_LIBRARY (fixsfdi, f2lz)
+#define DECLARE_LIBRARY_RENAMES RENAME_LIBRARY (fixsfdi, f2lz) \
+ extern DWtype __fixsfdi (SFtype) __attribute__((pcs("aapcs"))); \
+ extern UDWtype __fixunssfdi (SFtype) __asm__("__aeabi_f2ulz") __attribute__((pcs("aapcs")));
#endif
#ifdef L_fixunssfdi
-#define DECLARE_LIBRARY_RENAMES RENAME_LIBRARY (fixunssfdi, f2ulz)
+#define DECLARE_LIBRARY_RENAMES RENAME_LIBRARY (fixunssfdi, f2ulz) \
+ extern UDWtype __fixunssfdi (SFtype) __attribute__((pcs("aapcs")));
#endif
#ifdef L_floatdidf
#define DECLARE_LIBRARY_RENAMES RENAME_LIBRARY (floatdidf, l2d)
diff --git a/gcc-4.4.3/gcc/config/arm/constraints.md b/gcc-4.4.3/gcc/config/arm/constraints.md
index d14dafd2b..8cab39a66 100644
--- a/gcc-4.4.3/gcc/config/arm/constraints.md
+++ b/gcc-4.4.3/gcc/config/arm/constraints.md
@@ -31,6 +31,7 @@
;; The following multi-letter normal constraints have been used:
;; in ARM/Thumb-2 state: Da, Db, Dc, Dn, Dl, DL, Dv
;; in Thumb-1 state: Pa, Pb
+;; in Thumb-2 state: Ps, Pt, Pw, Px
;; The following memory constraints have been used:
;; in ARM/Thumb-2 state: Q, Ut, Uv, Uy, Un, Us
@@ -149,6 +150,26 @@
(match_test "TARGET_THUMB1 && ival >= -262 && ival <= 262
&& (ival > 255 || ival < -255)")))
+(define_constraint "Ps"
+ "@internal In Thumb-2 state a constant in the range -255 to +255"
+ (and (match_code "const_int")
+ (match_test "TARGET_THUMB2 && ival >= -255 && ival <= 255")))
+
+(define_constraint "Pt"
+ "@internal In Thumb-2 state a constant in the range -7 to +7"
+ (and (match_code "const_int")
+ (match_test "TARGET_THUMB2 && ival >= -7 && ival <= 7")))
+
+(define_constraint "Pw"
+ "@internal In Thumb-2 state a constant in the range -255 to -1"
+ (and (match_code "const_int")
+ (match_test "TARGET_THUMB2 && ival >= -255 && ival <= -1")))
+
+(define_constraint "Px"
+ "@internal In Thumb-2 state a constant in the range -7 to -1"
+ (and (match_code "const_int")
+ (match_test "TARGET_THUMB2 && ival >= -7 && ival <= -1")))
+
(define_constraint "G"
"In ARM/Thumb-2 state a valid FPA immediate constant."
(and (match_code "const_double")
diff --git a/gcc-4.4.3/gcc/config/arm/linux-eabi.h b/gcc-4.4.3/gcc/config/arm/linux-eabi.h
index 6dd44436a..2ca881890 100644
--- a/gcc-4.4.3/gcc/config/arm/linux-eabi.h
+++ b/gcc-4.4.3/gcc/config/arm/linux-eabi.h
@@ -27,6 +27,7 @@
{ \
TARGET_BPABI_CPP_BUILTINS(); \
LINUX_TARGET_OS_CPP_BUILTINS(); \
+ ANDROID_TARGET_OS_CPP_BUILTINS(); \
} \
while (false)
diff --git a/gcc-4.4.3/gcc/config/arm/t-arm-elf b/gcc-4.4.3/gcc/config/arm/t-arm-elf
index 9f6068258..5b812b750 100644
--- a/gcc-4.4.3/gcc/config/arm/t-arm-elf
+++ b/gcc-4.4.3/gcc/config/arm/t-arm-elf
@@ -32,6 +32,13 @@ MULTILIB_OPTIONS += $(MLOPT_armv7a)
MULTILIB_DIRNAMES += $(MLDIR_armv7a)
MULTILIB_MATCHES += $(MLMATCH_armv7a)
+# Not quite true. We can support hard-vfp calling in Thumb2, but how do we
+# express that here? Also, we really need architecture v5e or later
+# (mcrr etc).
+MULTILIB_OPTIONS += mfloat-abi=hard
+MULTILIB_DIRNAMES += fpu
+MULTILIB_EXCEPTIONS += *mthumb/*mfloat-abi=hard*
+
# MULTILIB_OPTIONS += mcpu=ep9312
# MULTILIB_DIRNAMES += ep9312
# MULTILIB_EXCEPTIONS += *mthumb/*mcpu=ep9312*
diff --git a/gcc-4.4.3/gcc/config/arm/thumb2.md b/gcc-4.4.3/gcc/config/arm/thumb2.md
index 2243172e6..6e03e8b21 100644
--- a/gcc-4.4.3/gcc/config/arm/thumb2.md
+++ b/gcc-4.4.3/gcc/config/arm/thumb2.md
@@ -943,7 +943,7 @@
(set_attr "neg_pool_range" "*,250")]
)
-(define_insn "*thumb2_zero_extendqisi2_v6"
+(define_insn "thumb2_zero_extendqisi2_v6"
[(set (match_operand:SI 0 "s_register_operand" "=r,r")
(zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "r,m")))]
"TARGET_THUMB2 && arm_arch6"
@@ -1006,29 +1006,6 @@
}"
)
-;; Peepholes and insns for 16-bit flag clobbering instructions.
-;; The conditional forms of these instructions do not clobber CC.
-;; However by the time peepholes are run it is probably too late to do
-;; anything useful with this information.
-(define_peephole2
- [(set (match_operand:SI 0 "low_register_operand" "")
- (match_operator:SI 3 "thumb_16bit_operator"
- [(match_operand:SI 1 "low_register_operand" "")
- (match_operand:SI 2 "low_register_operand" "")]))]
- "TARGET_THUMB2
- && (rtx_equal_p(operands[0], operands[1])
- || GET_CODE(operands[3]) == PLUS
- || GET_CODE(operands[3]) == MINUS)
- && peep2_regno_dead_p(0, CC_REGNUM)"
- [(parallel
- [(set (match_dup 0)
- (match_op_dup 3
- [(match_dup 1)
- (match_dup 2)]))
- (clobber (reg:CC CC_REGNUM))])]
- ""
-)
-
(define_insn "*thumb2_alusi3_short"
[(set (match_operand:SI 0 "s_register_operand" "=l")
(match_operator:SI 3 "thumb_16bit_operator"
@@ -1124,9 +1101,9 @@
)
(define_insn "*thumb2_addsi_short"
- [(set (match_operand:SI 0 "low_register_operand" "=l")
- (plus:SI (match_operand:SI 1 "low_register_operand" "l")
- (match_operand:SI 2 "low_reg_or_int_operand" "lIL")))
+ [(set (match_operand:SI 0 "low_register_operand" "=l,l")
+ (plus:SI (match_operand:SI 1 "low_register_operand" "l,0")
+ (match_operand:SI 2 "low_reg_or_int_operand" "lPt,Ps")))
(clobber (reg:CC CC_REGNUM))]
"TARGET_THUMB2 && reload_completed"
"*
@@ -1178,6 +1155,32 @@
(set_attr "length" "2")]
)
+(define_peephole2
+ [(set (match_operand:CC 0 "cc_register" "")
+ (compare:CC (match_operand:SI 1 "low_register_operand" "")
+ (match_operand:SI 2 "const_int_operand" "")))]
+ "TARGET_THUMB2
+ && peep2_reg_dead_p (1, operands[1])
+ && satisfies_constraint_Pw (operands[2])"
+ [(parallel
+ [(set (match_dup 0) (compare:CC (match_dup 1) (match_dup 2)))
+ (set (match_dup 1) (plus:SI (match_dup 1) (match_dup 3)))])]
+ "operands[3] = GEN_INT (- INTVAL (operands[2]));"
+)
+
+(define_peephole2
+ [(match_scratch:SI 3 "l")
+ (set (match_operand:CC 0 "cc_register" "")
+ (compare:CC (match_operand:SI 1 "low_register_operand" "")
+ (match_operand:SI 2 "const_int_operand" "")))]
+ "TARGET_THUMB2
+ && satisfies_constraint_Px (operands[2])"
+ [(parallel
+ [(set (match_dup 0) (compare:CC (match_dup 1) (match_dup 2)))
+ (set (match_dup 3) (plus:SI (match_dup 1) (match_dup 4)))])]
+ "operands[4] = GEN_INT (- INTVAL (operands[2]));"
+)
+
(define_insn "*thumb2_cbz"
[(set (pc) (if_then_else
(eq (match_operand:SI 0 "s_register_operand" "l,?r")
diff --git a/gcc-4.4.3/gcc/config/i386/atom.md b/gcc-4.4.3/gcc/config/i386/atom.md
new file mode 100644
index 000000000..1664269ba
--- /dev/null
+++ b/gcc-4.4.3/gcc/config/i386/atom.md
@@ -0,0 +1,796 @@
+;; Atom Scheduling
+;; Copyright (C) 2009 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3. If not see
+;; <http://www.gnu.org/licenses/>.
+;;
+;; Atom is an in-order core with two integer pipelines.
+
+
+(define_attr "atom_unit" "sishuf,simul,jeu,complex,other"
+ (const_string "other"))
+
+(define_attr "atom_sse_attr" "rcp,movdup,lfence,fence,prefetch,sqrt,mxcsr,other"
+ (const_string "other"))
+
+(define_automaton "atom")
+
+;; Atom has two ports: port 0 and port 1 connecting to all execution units
+(define_cpu_unit "atom-port-0,atom-port-1" "atom")
+
+;; EU: Execution Unit
+;; Atom EUs are connected by port 0 or port 1.
+
+(define_cpu_unit "atom-eu-0, atom-eu-1,
+ atom-imul-1, atom-imul-2, atom-imul-3, atom-imul-4"
+ "atom")
+
+;; Some EUs have duplicated copied and can be accessed via either
+;; port 0 or port 1
+;; (define_reservation "atom-port-either" "(atom-port-0 | atom-port-1)")
+
+;;; Some instructions is dual-pipe execution, need both ports
+;;; Complex multi-op macro-instructoins need both ports and all EUs
+(define_reservation "atom-port-dual" "(atom-port-0 + atom-port-1)")
+(define_reservation "atom-all-eu" "(atom-eu-0 + atom-eu-1 +
+ atom-imul-1 + atom-imul-2 + atom-imul-3 +
+ atom-imul-4)")
+
+;;; Most of simple instructions have 1 cycle latency. Some of them
+;;; issue in port 0, some in port 0 and some in either port.
+(define_reservation "atom-simple-0" "(atom-port-0 + atom-eu-0)")
+(define_reservation "atom-simple-1" "(atom-port-1 + atom-eu-1)")
+(define_reservation "atom-simple-either" "(atom-simple-0 | atom-simple-1)")
+
+;;; Some insn issues in port 0 with 3 cycle latency and 1 cycle tput
+(define_reservation "atom-eu-0-3-1" "(atom-port-0 + atom-eu-0, nothing*2)")
+
+;;; fmul insn can have 4 or 5 cycles latency
+(define_reservation "atom-fmul-5c" "(atom-port-0 + atom-eu-0), nothing*4")
+(define_reservation "atom-fmul-4c" "(atom-port-0 + atom-eu-0), nothing*3")
+
+;;; fadd can has 5 cycles latency depends on instruction forms
+(define_reservation "atom-fadd-5c" "(atom-port-1 + atom-eu-1), nothing*5")
+
+;;; imul insn has 5 cycles latency
+(define_reservation "atom-imul-32"
+ "atom-imul-1, atom-imul-2, atom-imul-3, atom-imul-4,
+ atom-port-0")
+;;; imul instruction excludes other non-FP instructions.
+(exclusion_set "atom-eu-0, atom-eu-1"
+ "atom-imul-1, atom-imul-2, atom-imul-3, atom-imul-4")
+
+;;; dual-execution instructions can have 1,2,4,5 cycles latency depends on
+;;; instruction forms
+(define_reservation "atom-dual-1c" "(atom-port-dual + atom-eu-0 + atom-eu-1)")
+(define_reservation "atom-dual-2c"
+ "(atom-port-dual + atom-eu-0 + atom-eu-1, nothing)")
+(define_reservation "atom-dual-5c"
+ "(atom-port-dual + atom-eu-0 + atom-eu-1, nothing*4)")
+
+;;; Complex macro-instruction has variants of latency, and uses both ports.
+(define_reservation "atom-complex" "(atom-port-dual + atom-all-eu)")
+
+(define_insn_reservation "atom_other" 9
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "other")
+ (eq_attr "atom_unit" "!jeu")))
+ "atom-complex, atom-all-eu*8")
+
+;; return has type "other" with atom_unit "jeu"
+(define_insn_reservation "atom_other_2" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "other")
+ (eq_attr "atom_unit" "jeu")))
+ "atom-dual-1c")
+
+(define_insn_reservation "atom_multi" 9
+ (and (eq_attr "cpu" "atom")
+ (eq_attr "type" "multi"))
+ "atom-complex, atom-all-eu*8")
+
+;; Normal alu insns without carry
+(define_insn_reservation "atom_alu" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "alu")
+ (and (eq_attr "memory" "none")
+ (eq_attr "use_carry" "0"))))
+ "atom-simple-either")
+
+;; Normal alu insns without carry
+(define_insn_reservation "atom_alu_mem" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "alu")
+ (and (eq_attr "memory" "!none")
+ (eq_attr "use_carry" "0"))))
+ "atom-simple-either")
+
+;; Alu insn consuming CF, such as add/sbb
+(define_insn_reservation "atom_alu_carry" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "alu")
+ (and (eq_attr "memory" "none")
+ (eq_attr "use_carry" "1"))))
+ "atom-simple-either")
+
+;; Alu insn consuming CF, such as add/sbb
+(define_insn_reservation "atom_alu_carry_mem" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "alu")
+ (and (eq_attr "memory" "!none")
+ (eq_attr "use_carry" "1"))))
+ "atom-simple-either")
+
+(define_insn_reservation "atom_alu1" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "alu1")
+ (eq_attr "memory" "none")))
+ "atom-simple-either")
+
+(define_insn_reservation "atom_alu1_mem" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "alu1")
+ (eq_attr "memory" "!none")))
+ "atom-simple-either")
+
+(define_insn_reservation "atom_negnot" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "negnot")
+ (eq_attr "memory" "none")))
+ "atom-simple-either")
+
+(define_insn_reservation "atom_negnot_mem" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "negnot")
+ (eq_attr "memory" "!none")))
+ "atom-simple-either")
+
+(define_insn_reservation "atom_imov" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "imov")
+ (eq_attr "memory" "none")))
+ "atom-simple-either")
+
+(define_insn_reservation "atom_imov_mem" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "imov")
+ (eq_attr "memory" "!none")))
+ "atom-simple-either")
+
+;; 16<-16, 32<-32
+(define_insn_reservation "atom_imovx" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "imovx")
+ (and (eq_attr "memory" "none")
+ (ior (and (match_operand:HI 0 "register_operand")
+ (match_operand:HI 1 "general_operand"))
+ (and (match_operand:SI 0 "register_operand")
+ (match_operand:SI 1 "general_operand"))))))
+ "atom-simple-either")
+
+;; 16<-16, 32<-32, mem
+(define_insn_reservation "atom_imovx_mem" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "imovx")
+ (and (eq_attr "memory" "!none")
+ (ior (and (match_operand:HI 0 "register_operand")
+ (match_operand:HI 1 "general_operand"))
+ (and (match_operand:SI 0 "register_operand")
+ (match_operand:SI 1 "general_operand"))))))
+ "atom-simple-either")
+
+;; 32<-16, 32<-8, 64<-16, 64<-8, 64<-32, 8<-8
+(define_insn_reservation "atom_imovx_2" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "imovx")
+ (and (eq_attr "memory" "none")
+ (ior (match_operand:QI 0 "register_operand")
+ (ior (and (match_operand:SI 0 "register_operand")
+ (not (match_operand:SI 1 "general_operand")))
+ (match_operand:DI 0 "register_operand"))))))
+ "atom-simple-0")
+
+;; 32<-16, 32<-8, 64<-16, 64<-8, 64<-32, 8<-8, mem
+(define_insn_reservation "atom_imovx_2_mem" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "imovx")
+ (and (eq_attr "memory" "!none")
+ (ior (match_operand:QI 0 "register_operand")
+ (ior (and (match_operand:SI 0 "register_operand")
+ (not (match_operand:SI 1 "general_operand")))
+ (match_operand:DI 0 "register_operand"))))))
+ "atom-simple-0")
+
+;; 16<-8
+(define_insn_reservation "atom_imovx_3" 3
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "imovx")
+ (and (match_operand:HI 0 "register_operand")
+ (match_operand:QI 1 "general_operand"))))
+ "atom-complex, atom-all-eu*2")
+
+(define_insn_reservation "atom_lea" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "lea")
+ (eq_attr "mode" "!HI")))
+ "atom-simple-either")
+
+;; lea 16bit address is complex insn
+(define_insn_reservation "atom_lea_2" 2
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "lea")
+ (eq_attr "mode" "HI")))
+ "atom-complex, atom-all-eu")
+
+(define_insn_reservation "atom_incdec" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "incdec")
+ (eq_attr "memory" "none")))
+ "atom-simple-either")
+
+(define_insn_reservation "atom_incdec_mem" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "incdec")
+ (eq_attr "memory" "!none")))
+ "atom-simple-either")
+
+;; simple shift instruction use SHIFT eu, none memory
+(define_insn_reservation "atom_ishift" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "ishift")
+ (and (eq_attr "memory" "none") (eq_attr "prefix_0f" "0"))))
+ "atom-simple-0")
+
+;; simple shift instruction use SHIFT eu, memory
+(define_insn_reservation "atom_ishift_mem" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "ishift")
+ (and (eq_attr "memory" "!none") (eq_attr "prefix_0f" "0"))))
+ "atom-simple-0")
+
+;; DF shift (prefixed with 0f) is complex insn with latency of 7 cycles
+(define_insn_reservation "atom_ishift_3" 7
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "ishift")
+ (eq_attr "prefix_0f" "1")))
+ "atom-complex, atom-all-eu*6")
+
+(define_insn_reservation "atom_ishift1" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "ishift1")
+ (eq_attr "memory" "none")))
+ "atom-simple-0")
+
+(define_insn_reservation "atom_ishift1_mem" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "ishift1")
+ (eq_attr "memory" "!none")))
+ "atom-simple-0")
+
+(define_insn_reservation "atom_rotate" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "rotate")
+ (eq_attr "memory" "none")))
+ "atom-simple-0")
+
+(define_insn_reservation "atom_rotate_mem" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "rotate")
+ (eq_attr "memory" "!none")))
+ "atom-simple-0")
+
+(define_insn_reservation "atom_rotate1" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "rotate1")
+ (eq_attr "memory" "none")))
+ "atom-simple-0")
+
+(define_insn_reservation "atom_rotate1_mem" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "rotate1")
+ (eq_attr "memory" "!none")))
+ "atom-simple-0")
+
+(define_insn_reservation "atom_imul" 5
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "imul")
+ (and (eq_attr "memory" "none") (eq_attr "mode" "SI"))))
+ "atom-imul-32")
+
+(define_insn_reservation "atom_imul_mem" 5
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "imul")
+ (and (eq_attr "memory" "!none") (eq_attr "mode" "SI"))))
+ "atom-imul-32")
+
+;; latency set to 10 as common 64x64 imul
+(define_insn_reservation "atom_imul_3" 10
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "imul")
+ (eq_attr "mode" "!SI")))
+ "atom-complex, atom-all-eu*9")
+
+(define_insn_reservation "atom_idiv" 65
+ (and (eq_attr "cpu" "atom")
+ (eq_attr "type" "idiv"))
+ "atom-complex, atom-all-eu*32, nothing*32")
+
+(define_insn_reservation "atom_icmp" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "icmp")
+ (eq_attr "memory" "none")))
+ "atom-simple-either")
+
+(define_insn_reservation "atom_icmp_mem" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "icmp")
+ (eq_attr "memory" "!none")))
+ "atom-simple-either")
+
+(define_insn_reservation "atom_test" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "test")
+ (eq_attr "memory" "none")))
+ "atom-simple-either")
+
+(define_insn_reservation "atom_test_mem" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "test")
+ (eq_attr "memory" "!none")))
+ "atom-simple-either")
+
+(define_insn_reservation "atom_ibr" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "ibr")
+ (eq_attr "memory" "!load")))
+ "atom-simple-1")
+
+;; complex if jump target is from address
+(define_insn_reservation "atom_ibr_2" 2
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "ibr")
+ (eq_attr "memory" "load")))
+ "atom-complex, atom-all-eu")
+
+(define_insn_reservation "atom_setcc" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "setcc")
+ (eq_attr "memory" "!store")))
+ "atom-simple-either")
+
+;; 2 cycles complex if target is in memory
+(define_insn_reservation "atom_setcc_2" 2
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "setcc")
+ (eq_attr "memory" "store")))
+ "atom-complex, atom-all-eu")
+
+(define_insn_reservation "atom_icmov" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "icmov")
+ (eq_attr "memory" "none")))
+ "atom-simple-either")
+
+(define_insn_reservation "atom_icmov_mem" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "icmov")
+ (eq_attr "memory" "!none")))
+ "atom-simple-either")
+
+;; UCODE if segreg, ignored
+(define_insn_reservation "atom_push" 2
+ (and (eq_attr "cpu" "atom")
+ (eq_attr "type" "push"))
+ "atom-dual-2c")
+
+;; pop r64 is 1 cycle. UCODE if segreg, ignored
+(define_insn_reservation "atom_pop" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "pop")
+ (eq_attr "mode" "DI")))
+ "atom-dual-1c")
+
+;; pop non-r64 is 2 cycles. UCODE if segreg, ignored
+(define_insn_reservation "atom_pop_2" 2
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "pop")
+ (eq_attr "mode" "!DI")))
+ "atom-dual-2c")
+
+;; UCODE if segreg, ignored
+(define_insn_reservation "atom_call" 1
+ (and (eq_attr "cpu" "atom")
+ (eq_attr "type" "call"))
+ "atom-dual-1c")
+
+(define_insn_reservation "atom_callv" 1
+ (and (eq_attr "cpu" "atom")
+ (eq_attr "type" "callv"))
+ "atom-dual-1c")
+
+(define_insn_reservation "atom_leave" 3
+ (and (eq_attr "cpu" "atom")
+ (eq_attr "type" "leave"))
+ "atom-complex, atom-all-eu*2")
+
+(define_insn_reservation "atom_str" 3
+ (and (eq_attr "cpu" "atom")
+ (eq_attr "type" "str"))
+ "atom-complex, atom-all-eu*2")
+
+(define_insn_reservation "atom_sselog" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "sselog")
+ (eq_attr "memory" "none")))
+ "atom-simple-either")
+
+(define_insn_reservation "atom_sselog_mem" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "sselog")
+ (eq_attr "memory" "!none")))
+ "atom-simple-either")
+
+(define_insn_reservation "atom_sselog1" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "sselog1")
+ (eq_attr "memory" "none")))
+ "atom-simple-0")
+
+(define_insn_reservation "atom_sselog1_mem" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "sselog1")
+ (eq_attr "memory" "!none")))
+ "atom-simple-0")
+
+;; not pmad, not psad
+(define_insn_reservation "atom_sseiadd" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "sseiadd")
+ (and (not (match_operand:V2DI 0 "register_operand"))
+ (and (eq_attr "atom_unit" "!simul")
+ (eq_attr "atom_unit" "!complex")))))
+ "atom-simple-either")
+
+;; pmad, psad and 64
+(define_insn_reservation "atom_sseiadd_2" 4
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "sseiadd")
+ (and (not (match_operand:V2DI 0 "register_operand"))
+ (and (eq_attr "atom_unit" "simul" )
+ (eq_attr "mode" "DI")))))
+ "atom-fmul-4c")
+
+;; pmad, psad and 128
+(define_insn_reservation "atom_sseiadd_3" 5
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "sseiadd")
+ (and (not (match_operand:V2DI 0 "register_operand"))
+ (and (eq_attr "atom_unit" "simul" )
+ (eq_attr "mode" "TI")))))
+ "atom-fmul-5c")
+
+;; if paddq(64 bit op), phadd/phsub
+(define_insn_reservation "atom_sseiadd_4" 6
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "sseiadd")
+ (ior (match_operand:V2DI 0 "register_operand")
+ (eq_attr "atom_unit" "complex"))))
+ "atom-complex, atom-all-eu*5")
+
+;; if immediate op.
+(define_insn_reservation "atom_sseishft" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "sseishft")
+ (and (eq_attr "atom_unit" "!sishuf")
+ (match_operand 2 "immediate_operand"))))
+ "atom-simple-either")
+
+;; if palignr or psrldq
+(define_insn_reservation "atom_sseishft_2" 1
+ (and (eq_attr "cpu" "atom")
+ (ior (eq_attr "type" "sseishft1")
+ (and (eq_attr "type" "sseishft")
+ (and (eq_attr "atom_unit" "sishuf")
+ (match_operand 2 "immediate_operand")))))
+ "atom-simple-0")
+
+;; if reg/mem op
+(define_insn_reservation "atom_sseishft_3" 2
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "sseishft")
+ (not (match_operand 2 "immediate_operand"))))
+ "atom-complex, atom-all-eu")
+
+(define_insn_reservation "atom_sseimul" 1
+ (and (eq_attr "cpu" "atom")
+ (eq_attr "type" "sseimul"))
+ "atom-simple-0")
+
+;; rcpss or rsqrtss
+(define_insn_reservation "atom_sse" 4
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "sse")
+ (and (eq_attr "atom_sse_attr" "rcp") (eq_attr "mode" "SF"))))
+ "atom-fmul-4c")
+
+;; movshdup, movsldup. Suggest to type sseishft
+(define_insn_reservation "atom_sse_2" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "sse")
+ (eq_attr "atom_sse_attr" "movdup")))
+ "atom-simple-0")
+
+;; lfence
+(define_insn_reservation "atom_sse_3" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "sse")
+ (eq_attr "atom_sse_attr" "lfence")))
+ "atom-simple-either")
+
+;; sfence,clflush,mfence, prefetch
+(define_insn_reservation "atom_sse_4" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "sse")
+ (ior (eq_attr "atom_sse_attr" "fence")
+ (eq_attr "atom_sse_attr" "prefetch"))))
+ "atom-simple-0")
+
+;; rcpps, rsqrtss, sqrt, ldmxcsr
+(define_insn_reservation "atom_sse_5" 7
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "sse")
+ (ior (ior (eq_attr "atom_sse_attr" "sqrt")
+ (eq_attr "atom_sse_attr" "mxcsr"))
+ (and (eq_attr "atom_sse_attr" "rcp")
+ (eq_attr "mode" "V4SF")))))
+ "atom-complex, atom-all-eu*6")
+
+;; xmm->xmm
+(define_insn_reservation "atom_ssemov" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "ssemov")
+ (and (match_operand 0 "register_operand" "xy") (match_operand 1 "register_operand" "xy"))))
+ "atom-simple-either")
+
+;; reg->xmm
+(define_insn_reservation "atom_ssemov_2" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "ssemov")
+ (and (match_operand 0 "register_operand" "xy") (match_operand 1 "register_operand" "r"))))
+ "atom-simple-0")
+
+;; xmm->reg
+(define_insn_reservation "atom_ssemov_3" 3
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "ssemov")
+ (and (match_operand 0 "register_operand" "r") (match_operand 1 "register_operand" "xy"))))
+ "atom-eu-0-3-1")
+
+;; mov mem
+(define_insn_reservation "atom_ssemov_4" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "ssemov")
+ (and (eq_attr "movu" "0") (eq_attr "memory" "!none"))))
+ "atom-simple-0")
+
+;; movu mem
+(define_insn_reservation "atom_ssemov_5" 2
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "ssemov")
+ (ior (eq_attr "movu" "1") (eq_attr "memory" "!none"))))
+ "atom-complex, atom-all-eu")
+
+;; no memory simple
+(define_insn_reservation "atom_sseadd" 5
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "sseadd")
+ (and (eq_attr "memory" "none")
+ (and (eq_attr "mode" "!V2DF")
+ (eq_attr "atom_unit" "!complex")))))
+ "atom-fadd-5c")
+
+;; memory simple
+(define_insn_reservation "atom_sseadd_mem" 5
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "sseadd")
+ (and (eq_attr "memory" "!none")
+ (and (eq_attr "mode" "!V2DF")
+ (eq_attr "atom_unit" "!complex")))))
+ "atom-dual-5c")
+
+;; maxps, minps, *pd, hadd, hsub
+(define_insn_reservation "atom_sseadd_3" 8
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "sseadd")
+ (ior (eq_attr "mode" "V2DF") (eq_attr "atom_unit" "complex"))))
+ "atom-complex, atom-all-eu*7")
+
+;; Except dppd/dpps
+(define_insn_reservation "atom_ssemul" 5
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "ssemul")
+ (eq_attr "mode" "!SF")))
+ "atom-fmul-5c")
+
+;; Except dppd/dpps, 4 cycle if mulss
+(define_insn_reservation "atom_ssemul_2" 4
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "ssemul")
+ (eq_attr "mode" "SF")))
+ "atom-fmul-4c")
+
+(define_insn_reservation "atom_ssecmp" 1
+ (and (eq_attr "cpu" "atom")
+ (eq_attr "type" "ssecmp"))
+ "atom-simple-either")
+
+(define_insn_reservation "atom_ssecomi" 10
+ (and (eq_attr "cpu" "atom")
+ (eq_attr "type" "ssecomi"))
+ "atom-complex, atom-all-eu*9")
+
+;; no memory and cvtpi2ps, cvtps2pi, cvttps2pi
+(define_insn_reservation "atom_ssecvt" 5
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "ssecvt")
+ (ior (and (match_operand:V2SI 0 "register_operand")
+ (match_operand:V4SF 1 "register_operand"))
+ (and (match_operand:V4SF 0 "register_operand")
+ (match_operand:V2SI 1 "register_operand")))))
+ "atom-fadd-5c")
+
+;; memory and cvtpi2ps, cvtps2pi, cvttps2pi
+(define_insn_reservation "atom_ssecvt_2" 5
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "ssecvt")
+ (ior (and (match_operand:V2SI 0 "register_operand")
+ (match_operand:V4SF 1 "memory_operand"))
+ (and (match_operand:V4SF 0 "register_operand")
+ (match_operand:V2SI 1 "memory_operand")))))
+ "atom-dual-5c")
+
+;; otherwise. 7 cycles average for cvtss2sd
+(define_insn_reservation "atom_ssecvt_3" 7
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "ssecvt")
+ (not (ior (and (match_operand:V2SI 0 "register_operand")
+ (match_operand:V4SF 1 "nonimmediate_operand"))
+ (and (match_operand:V4SF 0 "register_operand")
+ (match_operand:V2SI 1 "nonimmediate_operand"))))))
+ "atom-complex, atom-all-eu*6")
+
+;; memory and cvtsi2sd
+(define_insn_reservation "atom_sseicvt" 5
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "sseicvt")
+ (and (match_operand:V2DF 0 "register_operand")
+ (match_operand:SI 1 "memory_operand"))))
+ "atom-dual-5c")
+
+;; otherwise. 8 cycles average for cvtsd2si
+(define_insn_reservation "atom_sseicvt_2" 8
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "sseicvt")
+ (not (and (match_operand:V2DF 0 "register_operand")
+ (match_operand:SI 1 "memory_operand")))))
+ "atom-complex, atom-all-eu*7")
+
+(define_insn_reservation "atom_ssediv" 62
+ (and (eq_attr "cpu" "atom")
+ (eq_attr "type" "ssediv"))
+ "atom-complex, atom-all-eu*12, nothing*49")
+
+;; simple for fmov
+(define_insn_reservation "atom_fmov" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "fmov")
+ (eq_attr "memory" "none")))
+ "atom-simple-either")
+
+;; simple for fmov
+(define_insn_reservation "atom_fmov_mem" 1
+ (and (eq_attr "cpu" "atom")
+ (and (eq_attr "type" "fmov")
+ (eq_attr "memory" "!none")))
+ "atom-simple-either")
+
+;; Define bypass here
+
+;; There will be no stall from lea to non-mem EX insns
+(define_bypass 0 "atom_lea"
+ "atom_alu_carry,
+ atom_alu,atom_alu1,atom_negnot,atom_imov,atom_imovx,
+ atom_incdec, atom_setcc, atom_icmov, atom_pop")
+
+(define_bypass 0 "atom_lea"
+ "atom_alu_mem, atom_alu_carry_mem, atom_alu1_mem,
+ atom_imovx_mem, atom_imovx_2_mem,
+ atom_imov_mem, atom_icmov_mem, atom_fmov_mem"
+ "!ix86_agi_dependent")
+
+;; There will be 3 cycles stall from EX insns to AGAN insns LEA
+(define_bypass 4 "atom_alu_carry,
+ atom_alu,atom_alu1,atom_negnot,atom_imov,atom_imovx,
+ atom_incdec,atom_ishift,atom_ishift1,atom_rotate,
+ atom_rotate1, atom_setcc, atom_icmov, atom_pop,
+ atom_alu_mem, atom_alu_carry_mem, atom_alu1_mem,
+ atom_imovx_mem, atom_imovx_2_mem,
+ atom_imov_mem, atom_icmov_mem, atom_fmov_mem"
+ "atom_lea")
+
+;; There will be 3 cycles stall from EX insns to insns need addr calculation
+(define_bypass 4 "atom_alu_carry,
+ atom_alu,atom_alu1,atom_negnot,atom_imov,atom_imovx,
+ atom_incdec,atom_ishift,atom_ishift1,atom_rotate,
+ atom_rotate1, atom_setcc, atom_icmov, atom_pop,
+ atom_imovx_mem, atom_imovx_2_mem,
+ atom_alu_mem, atom_alu_carry_mem, atom_alu1_mem,
+ atom_imov_mem, atom_icmov_mem, atom_fmov_mem"
+ "atom_alu_mem, atom_alu_carry_mem, atom_alu1_mem,
+ atom_negnot_mem, atom_imov_mem, atom_incdec_mem,
+ atom_imovx_mem, atom_imovx_2_mem,
+ atom_imul_mem, atom_icmp_mem,
+ atom_test_mem, atom_icmov_mem, atom_sselog_mem,
+ atom_sselog1_mem, atom_fmov_mem, atom_sseadd_mem,
+ atom_ishift_mem, atom_ishift1_mem,
+ atom_rotate_mem, atom_rotate1_mem"
+ "ix86_agi_dependent")
+
+;; Stall from imul to lea is 8 cycles.
+(define_bypass 9 "atom_imul, atom_imul_mem" "atom_lea")
+
+;; Stall from imul to memory address is 8 cycles.
+(define_bypass 9 "atom_imul, atom_imul_mem"
+ "atom_alu_mem, atom_alu_carry_mem, atom_alu1_mem,
+ atom_negnot_mem, atom_imov_mem, atom_incdec_mem,
+ atom_ishift_mem, atom_ishift1_mem, atom_rotate_mem,
+ atom_rotate1_mem, atom_imul_mem, atom_icmp_mem,
+ atom_test_mem, atom_icmov_mem, atom_sselog_mem,
+ atom_sselog1_mem, atom_fmov_mem, atom_sseadd_mem"
+ "ix86_agi_dependent")
+
+;; There will be 0 cycle stall from cmp/test to jcc
+
+;; There will be 1 cycle stall from flag producer to cmov and adc/sbb
+(define_bypass 2 "atom_icmp, atom_test, atom_alu, atom_alu_carry,
+ atom_alu1, atom_negnot, atom_incdec, atom_ishift,
+ atom_ishift1, atom_rotate, atom_rotate1"
+ "atom_icmov, atom_alu_carry")
+
+;; lea to shift count stall is 2 cycles
+(define_bypass 3 "atom_lea"
+ "atom_ishift, atom_ishift1, atom_rotate, atom_rotate1,
+ atom_ishift_mem, atom_ishift1_mem,
+ atom_rotate_mem, atom_rotate1_mem"
+ "ix86_dep_by_shift_count")
+
+;; lea to shift source stall is 1 cycle
+(define_bypass 2 "atom_lea"
+ "atom_ishift, atom_ishift1, atom_rotate, atom_rotate1"
+ "!ix86_dep_by_shift_count")
+
+;; non-lea to shift count stall is 1 cycle
+(define_bypass 2 "atom_alu_carry,
+ atom_alu,atom_alu1,atom_negnot,atom_imov,atom_imovx,
+ atom_incdec,atom_ishift,atom_ishift1,atom_rotate,
+ atom_rotate1, atom_setcc, atom_icmov, atom_pop,
+ atom_alu_mem, atom_alu_carry_mem, atom_alu1_mem,
+ atom_imovx_mem, atom_imovx_2_mem,
+ atom_imov_mem, atom_icmov_mem, atom_fmov_mem"
+ "atom_ishift, atom_ishift1, atom_rotate, atom_rotate1,
+ atom_ishift_mem, atom_ishift1_mem,
+ atom_rotate_mem, atom_rotate1_mem"
+ "ix86_dep_by_shift_count")
diff --git a/gcc-4.4.3/gcc/config/i386/cpuid.h b/gcc-4.4.3/gcc/config/i386/cpuid.h
index b5258652e..003c202f0 100644
--- a/gcc-4.4.3/gcc/config/i386/cpuid.h
+++ b/gcc-4.4.3/gcc/config/i386/cpuid.h
@@ -29,6 +29,7 @@
#define bit_CMPXCHG16B (1 << 13)
#define bit_SSE4_1 (1 << 19)
#define bit_SSE4_2 (1 << 20)
+#define bit_MOVBE (1 << 22)
#define bit_POPCNT (1 << 23)
#define bit_AES (1 << 25)
#define bit_XSAVE (1 << 26)
@@ -46,6 +47,7 @@
/* Extended Features */
/* %ecx */
#define bit_LAHF_LM (1 << 0)
+#define bit_LWP (1 << 15)
#define bit_SSE4a (1 << 6)
#define bit_SSE5 (1 << 11)
diff --git a/gcc-4.4.3/gcc/config/i386/cygming.h b/gcc-4.4.3/gcc/config/i386/cygming.h
index 4f508a776..39650b107 100644
--- a/gcc-4.4.3/gcc/config/i386/cygming.h
+++ b/gcc-4.4.3/gcc/config/i386/cygming.h
@@ -34,7 +34,7 @@ along with GCC; see the file COPYING3. If not see
#endif
#undef TARGET_64BIT_MS_ABI
-#define TARGET_64BIT_MS_ABI (!cfun ? DEFAULT_ABI == MS_ABI : TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
+#define TARGET_64BIT_MS_ABI (!cfun ? ix86_abi == MS_ABI : TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
#undef DEFAULT_ABI
#define DEFAULT_ABI (TARGET_64BIT ? MS_ABI : SYSV_ABI)
@@ -202,7 +202,7 @@ do { \
#define CHECK_STACK_LIMIT 4000
#undef STACK_BOUNDARY
-#define STACK_BOUNDARY (DEFAULT_ABI == MS_ABI ? 128 : BITS_PER_WORD)
+#define STACK_BOUNDARY (ix86_abi == MS_ABI ? 128 : BITS_PER_WORD)
/* By default, target has a 80387, uses IEEE compatible arithmetic,
returns float values in the 387 and needs stack probes.
diff --git a/gcc-4.4.3/gcc/config/i386/driver-i386.c b/gcc-4.4.3/gcc/config/i386/driver-i386.c
index 9aa33d27c..c506b2bc3 100644
--- a/gcc-4.4.3/gcc/config/i386/driver-i386.c
+++ b/gcc-4.4.3/gcc/config/i386/driver-i386.c
@@ -378,6 +378,7 @@ const char *host_detect_local_cpu (int argc, const char **argv)
/* Extended features */
unsigned int has_lahf_lm = 0, has_sse4a = 0;
unsigned int has_longmode = 0, has_3dnowp = 0, has_3dnow = 0;
+ unsigned int has_movbe = 0;
unsigned int has_sse4_1 = 0, has_sse4_2 = 0;
unsigned int has_popcnt = 0, has_aes = 0, has_avx = 0;
unsigned int has_pclmul = 0;
@@ -398,9 +399,22 @@ const char *host_detect_local_cpu (int argc, const char **argv)
__cpuid (1, eax, ebx, ecx, edx);
- /* We don't care for extended family. */
model = (eax >> 4) & 0x0f;
family = (eax >> 8) & 0x0f;
+ if (vendor == SIG_INTEL)
+ {
+ unsigned int extended_model, extended_family;
+
+ extended_model = (eax >> 12) & 0xf0;
+ extended_family = (eax >> 20) & 0xff;
+ if (family == 0x0f)
+ {
+ family += extended_family;
+ model += extended_model;
+ }
+ else if (family == 0x06)
+ model += extended_model;
+ }
has_sse3 = ecx & bit_SSE3;
has_ssse3 = ecx & bit_SSSE3;
@@ -408,6 +422,7 @@ const char *host_detect_local_cpu (int argc, const char **argv)
has_sse4_2 = ecx & bit_SSE4_2;
has_avx = ecx & bit_AVX;
has_cmpxchg16b = ecx & bit_CMPXCHG16B;
+ has_movbe = ecx & bit_MOVBE;
has_popcnt = ecx & bit_POPCNT;
has_aes = ecx & bit_AES;
has_pclmul = ecx & bit_PCLMUL;
@@ -505,8 +520,8 @@ const char *host_detect_local_cpu (int argc, const char **argv)
break;
case PROCESSOR_PENTIUMPRO:
if (has_longmode)
- /* It is Core 2 Duo. */
- cpu = "core2";
+ /* It is Core 2 or Atom. */
+ cpu = (model == 28 || model == 38) ? "atom" : "core2";
else if (arch)
{
if (has_sse3)
@@ -597,6 +612,8 @@ const char *host_detect_local_cpu (int argc, const char **argv)
options = concat (options, "-mcx16 ", NULL);
if (has_lahf_lm)
options = concat (options, "-msahf ", NULL);
+ if (has_movbe)
+ options = concat (options, "-mmovbe", NULL);
if (has_aes)
options = concat (options, "-maes ", NULL);
if (has_pclmul)
diff --git a/gcc-4.4.3/gcc/config/i386/i386-c.c b/gcc-4.4.3/gcc/config/i386/i386-c.c
index 3d17c104e..1bb394542 100644
--- a/gcc-4.4.3/gcc/config/i386/i386-c.c
+++ b/gcc-4.4.3/gcc/config/i386/i386-c.c
@@ -119,6 +119,10 @@ ix86_target_macros_internal (int isa_flag,
def_or_undef (parse_in, "__core2");
def_or_undef (parse_in, "__core2__");
break;
+ case PROCESSOR_ATOM:
+ def_or_undef (parse_in, "__atom");
+ def_or_undef (parse_in, "__atom__");
+ break;
/* use PROCESSOR_max to not set/unset the arch macro. */
case PROCESSOR_max:
break;
@@ -187,6 +191,9 @@ ix86_target_macros_internal (int isa_flag,
case PROCESSOR_CORE2:
def_or_undef (parse_in, "__tune_core2__");
break;
+ case PROCESSOR_ATOM:
+ def_or_undef (parse_in, "__tune_atom__");
+ break;
case PROCESSOR_GENERIC32:
case PROCESSOR_GENERIC64:
break;
@@ -225,6 +232,8 @@ ix86_target_macros_internal (int isa_flag,
def_or_undef (parse_in, "__SSE4A__");
if (isa_flag & OPTION_MASK_ISA_SSE5)
def_or_undef (parse_in, "__SSE5__");
+ if (isa_flag & OPTION_MASK_ISA_LWP)
+ def_or_undef (parse_in, "__LWP__");
if ((fpmath & FPMATH_SSE) && (isa_flag & OPTION_MASK_ISA_SSE))
def_or_undef (parse_in, "__SSE_MATH__");
if ((fpmath & FPMATH_SSE) && (isa_flag & OPTION_MASK_ISA_SSE2))
diff --git a/gcc-4.4.3/gcc/config/i386/i386-protos.h b/gcc-4.4.3/gcc/config/i386/i386-protos.h
index 4b0e2c01c..96e01ea49 100644
--- a/gcc-4.4.3/gcc/config/i386/i386-protos.h
+++ b/gcc-4.4.3/gcc/config/i386/i386-protos.h
@@ -86,6 +86,9 @@ extern void ix86_fixup_binary_operands_no_copy (enum rtx_code,
extern void ix86_expand_binary_operator (enum rtx_code,
enum machine_mode, rtx[]);
extern int ix86_binary_operator_ok (enum rtx_code, enum machine_mode, rtx[]);
+extern bool ix86_lea_for_add_ok (enum rtx_code, rtx, rtx[]);
+extern bool ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn);
+extern bool ix86_agi_dependent (rtx set_insn, rtx use_insn);
extern void ix86_expand_unary_operator (enum rtx_code, enum machine_mode,
rtx[]);
extern rtx ix86_build_const_vector (enum machine_mode, bool, rtx);
@@ -140,9 +143,8 @@ extern int ix86_function_arg_boundary (enum machine_mode, tree);
extern bool ix86_sol10_return_in_memory (const_tree,const_tree);
extern rtx ix86_force_to_memory (enum machine_mode, rtx);
extern void ix86_free_from_memory (enum machine_mode);
-extern int ix86_cfun_abi (void);
-extern int ix86_function_abi (const_tree);
-extern int ix86_function_type_abi (const_tree);
+extern enum calling_abi ix86_cfun_abi (void);
+extern enum calling_abi ix86_function_type_abi (const_tree);
extern void ix86_call_abi_override (const_tree);
extern tree ix86_fn_abi_va_list (tree);
extern tree ix86_canonical_va_list_type (tree);
diff --git a/gcc-4.4.3/gcc/config/i386/i386.c b/gcc-4.4.3/gcc/config/i386/i386.c
index 350b214ed..fa148be65 100644
--- a/gcc-4.4.3/gcc/config/i386/i386.c
+++ b/gcc-4.4.3/gcc/config/i386/i386.c
@@ -1036,6 +1036,79 @@ struct processor_costs core2_cost = {
1, /* cond_not_taken_branch_cost. */
};
+static const
+struct processor_costs atom_cost = {
+ COSTS_N_INSNS (1), /* cost of an add instruction */
+ COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
+ COSTS_N_INSNS (1), /* variable shift costs */
+ COSTS_N_INSNS (1), /* constant shift costs */
+ {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
+ COSTS_N_INSNS (4), /* HI */
+ COSTS_N_INSNS (3), /* SI */
+ COSTS_N_INSNS (4), /* DI */
+ COSTS_N_INSNS (2)}, /* other */
+ 0, /* cost of multiply per each bit set */
+ {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
+ COSTS_N_INSNS (26), /* HI */
+ COSTS_N_INSNS (42), /* SI */
+ COSTS_N_INSNS (74), /* DI */
+ COSTS_N_INSNS (74)}, /* other */
+ COSTS_N_INSNS (1), /* cost of movsx */
+ COSTS_N_INSNS (1), /* cost of movzx */
+ 8, /* "large" insn */
+ 17, /* MOVE_RATIO */
+ 2, /* cost for loading QImode using movzbl */
+ {4, 4, 4}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {4, 4, 4}, /* cost of storing integer registers */
+ 4, /* cost of reg,reg fld/fst */
+ {12, 12, 12}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {6, 6, 8}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+ 2, /* cost of moving MMX register */
+ {8, 8}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {8, 8}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 2, /* cost of moving SSE register */
+ {8, 8, 8}, /* cost of loading SSE registers
+ in SImode, DImode and TImode */
+ {8, 8, 8}, /* cost of storing SSE registers
+ in SImode, DImode and TImode */
+ 5, /* MMX or SSE register to integer */
+ 32, /* size of l1 cache. */
+ 256, /* size of l2 cache. */
+ 64, /* size of prefetch block */
+ 6, /* number of parallel prefetches */
+ 3, /* Branch cost */
+ COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
+ COSTS_N_INSNS (8), /* cost of FMUL instruction. */
+ COSTS_N_INSNS (20), /* cost of FDIV instruction. */
+ COSTS_N_INSNS (8), /* cost of FABS instruction. */
+ COSTS_N_INSNS (8), /* cost of FCHS instruction. */
+ COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
+ {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
+ {libcall, {{32, loop}, {64, rep_prefix_4_byte},
+ {8192, rep_prefix_8_byte}, {-1, libcall}}}},
+ {{libcall, {{8, loop}, {15, unrolled_loop},
+ {2048, rep_prefix_4_byte}, {-1, libcall}}},
+ {libcall, {{24, loop}, {32, unrolled_loop},
+ {8192, rep_prefix_8_byte}, {-1, libcall}}}},
+ 1, /* scalar_stmt_cost. */
+ 1, /* scalar load_cost. */
+ 1, /* scalar_store_cost. */
+ 1, /* vec_stmt_cost. */
+ 1, /* vec_to_scalar_cost. */
+ 1, /* scalar_to_vec_cost. */
+ 1, /* vec_align_load_cost. */
+ 2, /* vec_unalign_load_cost. */
+ 1, /* vec_store_cost. */
+ 3, /* cond_taken_branch_cost. */
+ 1, /* cond_not_taken_branch_cost. */
+};
+
/* Generic64 should produce code tuned for Nocona and K8. */
static const
struct processor_costs generic64_cost = {
@@ -1194,6 +1267,7 @@ const struct processor_costs *ix86_cost = &pentium_cost;
#define m_PENT4 (1<<PROCESSOR_PENTIUM4)
#define m_NOCONA (1<<PROCESSOR_NOCONA)
#define m_CORE2 (1<<PROCESSOR_CORE2)
+#define m_ATOM (1<<PROCESSOR_ATOM)
#define m_GEODE (1<<PROCESSOR_GEODE)
#define m_K6 (1<<PROCESSOR_K6)
@@ -1231,10 +1305,11 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
m_486 | m_PENT,
/* X86_TUNE_UNROLL_STRLEN */
- m_486 | m_PENT | m_PPRO | m_AMD_MULTIPLE | m_K6 | m_CORE2 | m_GENERIC,
+ m_486 | m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_K6
+ | m_CORE2 | m_GENERIC,
/* X86_TUNE_DEEP_BRANCH_PREDICTION */
- m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
+ m_ATOM | m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
/* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
on simulation result. But after P4 was made, no performance benefit
@@ -1246,12 +1321,12 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
~m_386,
/* X86_TUNE_USE_SAHF */
- m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
+ m_ATOM | m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
| m_NOCONA | m_CORE2 | m_GENERIC,
/* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
partial dependencies. */
- m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA
+ m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA
| m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
/* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
@@ -1271,13 +1346,13 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
m_386 | m_486 | m_K6_GEODE,
/* X86_TUNE_USE_SIMODE_FIOP */
- ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_CORE2 | m_GENERIC),
+ ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_ATOM | m_CORE2 | m_GENERIC),
/* X86_TUNE_USE_MOV0 */
m_K6,
/* X86_TUNE_USE_CLTD */
- ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC),
+ ~(m_PENT | m_ATOM | m_K6 | m_CORE2 | m_GENERIC),
/* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
m_PENT4,
@@ -1292,8 +1367,8 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
~(m_PENT | m_PPRO),
/* X86_TUNE_PROMOTE_QIMODE */
- m_K6_GEODE | m_PENT | m_386 | m_486 | m_AMD_MULTIPLE | m_CORE2
- | m_GENERIC /* | m_PENT4 ? */,
+ m_K6_GEODE | m_PENT | m_ATOM | m_386 | m_486 | m_AMD_MULTIPLE
+ | m_CORE2 | m_GENERIC /* | m_PENT4 ? */,
/* X86_TUNE_FAST_PREFIX */
~(m_PENT | m_486 | m_386),
@@ -1317,26 +1392,28 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
m_PPRO,
/* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
- m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
+ m_ATOM | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA
+ | m_CORE2 | m_GENERIC,
/* X86_TUNE_ADD_ESP_8 */
- m_AMD_MULTIPLE | m_PPRO | m_K6_GEODE | m_386
+ m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_K6_GEODE | m_386
| m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
/* X86_TUNE_SUB_ESP_4 */
- m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
+ m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2
+ | m_GENERIC,
/* X86_TUNE_SUB_ESP_8 */
- m_AMD_MULTIPLE | m_PPRO | m_386 | m_486
+ m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_386 | m_486
| m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
/* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
for DFmode copies */
- ~(m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
+ ~(m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
| m_GENERIC | m_GEODE),
/* X86_TUNE_PARTIAL_REG_DEPENDENCY */
- m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
+ m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
/* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
conflict here in between PPro/Pentium4 based chips that thread 128bit
@@ -1347,7 +1424,8 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
shows that disabling this option on P4 brings over 20% SPECfp regression,
while enabling it on K8 brings roughly 2.4% regression that can be partly
masked by careful scheduling of moves. */
- m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_AMDFAM10,
+ m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC
+ | m_AMDFAM10,
/* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
m_AMDFAM10,
@@ -1365,13 +1443,13 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
m_PPRO | m_PENT4 | m_NOCONA,
/* X86_TUNE_MEMORY_MISMATCH_STALL */
- m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
+ m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
/* X86_TUNE_PROLOGUE_USING_MOVE */
- m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
+ m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
/* X86_TUNE_EPILOGUE_USING_MOVE */
- m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
+ m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
/* X86_TUNE_SHIFT1 */
~m_486,
@@ -1387,22 +1465,25 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
/* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
than 4 branch instructions in the 16 byte window. */
- m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
+ m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2
+ | m_GENERIC,
/* X86_TUNE_SCHEDULE */
- m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC,
+ m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_ATOM | m_CORE2
+ | m_GENERIC,
/* X86_TUNE_USE_BT */
- m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
+ m_AMD_MULTIPLE | m_ATOM | m_CORE2 | m_GENERIC,
/* X86_TUNE_USE_INCDEC */
- ~(m_PENT4 | m_NOCONA | m_GENERIC),
+ ~(m_PENT4 | m_NOCONA | m_GENERIC | m_ATOM),
/* X86_TUNE_PAD_RETURNS */
m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
/* X86_TUNE_EXT_80387_CONSTANTS */
- m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC,
+ m_K6_GEODE | m_ATHLON_K8 | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO
+ | m_CORE2 | m_GENERIC,
/* X86_TUNE_SHORTEN_X87_SSE */
~m_K8,
@@ -1447,6 +1528,10 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
with a subsequent conditional jump instruction into a single
compare-and-branch uop. */
m_CORE2,
+
+ /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
+ will impact LEA instruction selection. */
+ m_ATOM,
};
/* Feature tests against the various architecture variations. */
@@ -1472,10 +1557,11 @@ static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
};
static const unsigned int x86_accumulate_outgoing_args
- = m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
+ = m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
+ | m_GENERIC;
static const unsigned int x86_arch_always_fancy_math_387
- = m_PENT | m_PPRO | m_AMD_MULTIPLE | m_PENT4
+ = m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4
| m_NOCONA | m_CORE2 | m_GENERIC;
static enum stringop_alg stringop_alg = no_stringop;
@@ -1743,6 +1829,9 @@ static unsigned int ix86_default_incoming_stack_boundary;
/* Alignment for incoming stack boundary in bits. */
unsigned int ix86_incoming_stack_boundary;
+/* The abi used by target. */
+enum calling_abi ix86_abi;
+
/* Values 1-5: see jump.c */
int ix86_branch_cost;
@@ -1818,6 +1907,9 @@ static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
static bool ix86_valid_target_attribute_inner_p (tree, char *[]);
static bool ix86_can_inline_p (tree, tree);
static void ix86_set_current_function (tree);
+static unsigned int ix86_minimum_incoming_stack_boundary (bool);
+
+static enum calling_abi ix86_function_abi (const_tree);
/* The svr4 ABI for the i386 says that records and unions are returned
@@ -1868,6 +1960,8 @@ static int ix86_isa_flags_explicit;
(OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
#define OPTION_MASK_ISA_SSE5_SET \
(OPTION_MASK_ISA_SSE5 | OPTION_MASK_ISA_SSE4A_SET)
+#define OPTION_MASK_ISA_LWP_SET \
+ OPTION_MASK_ISA_LWP
/* AES and PCLMUL need SSE2 because they use xmm registers */
#define OPTION_MASK_ISA_AES_SET \
@@ -1877,9 +1971,11 @@ static int ix86_isa_flags_explicit;
#define OPTION_MASK_ISA_ABM_SET \
(OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
+
#define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
#define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
#define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
+#define OPTION_MASK_ISA_MOVBE_SET OPTION_MASK_ISA_MOVBE
/* Define a set of ISAs which aren't available when a given ISA is
disabled. MMX and SSE ISAs are handled separately. */
@@ -1915,12 +2011,14 @@ static int ix86_isa_flags_explicit;
#define OPTION_MASK_ISA_SSE4A_UNSET \
(OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE5_UNSET)
#define OPTION_MASK_ISA_SSE5_UNSET OPTION_MASK_ISA_SSE5
+#define OPTION_MASK_ISA_LWP_UNSET OPTION_MASK_ISA_LWP
#define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
#define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
#define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
#define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
#define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
#define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
+#define OPTION_MASK_ISA_MOVBE_UNSET OPTION_MASK_ISA_MOVBE
/* Vectorization library interface and handlers. */
tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL;
@@ -1953,7 +2051,8 @@ static const struct ptt processor_target_table[PROCESSOR_max] =
{&core2_cost, 16, 10, 16, 10, 16},
{&generic32_cost, 16, 7, 16, 7, 16},
{&generic64_cost, 16, 10, 16, 10, 16},
- {&amdfam10_cost, 32, 24, 32, 7, 32}
+ {&amdfam10_cost, 32, 24, 32, 7, 32},
+ {&atom_cost, 16, 7, 16, 7, 16}
};
static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
@@ -1971,6 +2070,7 @@ static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
"prescott",
"nocona",
"core2",
+ "atom",
"geode",
"k6",
"k6-2",
@@ -2157,6 +2257,19 @@ ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
}
return true;
+ case OPT_mlwp:
+ if (value)
+ {
+ ix86_isa_flags |= OPTION_MASK_ISA_LWP_SET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_SET;
+ }
+ else
+ {
+ ix86_isa_flags &= ~OPTION_MASK_ISA_LWP_UNSET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_UNSET;
+ }
+ return true;
+
case OPT_mabm:
if (value)
{
@@ -2209,6 +2322,19 @@ ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
}
return true;
+ case OPT_mmovbe:
+ if (value)
+ {
+ ix86_isa_flags |= OPTION_MASK_ISA_MOVBE_SET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_SET;
+ }
+ else
+ {
+ ix86_isa_flags &= ~OPTION_MASK_ISA_MOVBE_UNSET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_UNSET;
+ }
+ return true;
+
case OPT_maes:
if (value)
{
@@ -2259,6 +2385,7 @@ ix86_target_string (int isa, int flags, const char *arch, const char *tune,
{
{ "-m64", OPTION_MASK_ISA_64BIT },
{ "-msse5", OPTION_MASK_ISA_SSE5 },
+ { "-mlwp", OPTION_MASK_ISA_LWP },
{ "-msse4a", OPTION_MASK_ISA_SSE4A },
{ "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
{ "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
@@ -2271,6 +2398,7 @@ ix86_target_string (int isa, int flags, const char *arch, const char *tune,
{ "-mmmx", OPTION_MASK_ISA_MMX },
{ "-mabm", OPTION_MASK_ISA_ABM },
{ "-mpopcnt", OPTION_MASK_ISA_POPCNT },
+ { "-mmovbe", OPTION_MASK_ISA_MOVBE },
{ "-maes", OPTION_MASK_ISA_AES },
{ "-mpclmul", OPTION_MASK_ISA_PCLMUL },
};
@@ -2488,7 +2616,9 @@ override_options (bool main_args_p)
PTA_AES = 1 << 17,
PTA_PCLMUL = 1 << 18,
PTA_AVX = 1 << 19,
- PTA_FMA = 1 << 20
+ PTA_FMA = 1 << 20,
+ PTA_LWP = 1 << 21,
+ PTA_MOVBE = 1 << 22
};
static struct pta
@@ -2530,6 +2660,9 @@ override_options (bool main_args_p)
{"core2", PROCESSOR_CORE2, CPU_CORE2,
PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
| PTA_SSSE3 | PTA_CX16},
+ {"atom", PROCESSOR_ATOM, CPU_ATOM,
+ PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
+ | PTA_SSSE3 | PTA_CX16 | PTA_MOVBE},
{"geode", PROCESSOR_GEODE, CPU_GEODE,
PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
{"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
@@ -2717,6 +2850,20 @@ override_options (bool main_args_p)
error ("bad value (%s) for %sarch=%s %s",
ix86_arch_string, prefix, suffix, sw);
+ /* Validate -mabi= value. */
+ if (ix86_abi_string)
+ {
+ if (strcmp (ix86_abi_string, "sysv") == 0)
+ ix86_abi = SYSV_ABI;
+ else if (strcmp (ix86_abi_string, "ms") == 0)
+ ix86_abi = MS_ABI;
+ else
+ error ("unknown ABI (%s) for %sabi=%s %s",
+ ix86_abi_string, prefix, suffix, sw);
+ }
+ else
+ ix86_abi = DEFAULT_ABI;
+
if (ix86_cmodel_string != 0)
{
if (!strcmp (ix86_cmodel_string, "small"))
@@ -2817,6 +2964,9 @@ override_options (bool main_args_p)
if (processor_alias_table[i].flags & PTA_SSE5
&& !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE5))
ix86_isa_flags |= OPTION_MASK_ISA_SSE5;
+ if (processor_alias_table[i].flags & PTA_LWP
+ && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
+ ix86_isa_flags |= OPTION_MASK_ISA_LWP;
if (processor_alias_table[i].flags & PTA_ABM
&& !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
ix86_isa_flags |= OPTION_MASK_ISA_ABM;
@@ -2829,6 +2979,9 @@ override_options (bool main_args_p)
if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
&& !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
+ if (processor_alias_table[i].flags & PTA_MOVBE
+ && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
+ ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
if (processor_alias_table[i].flags & PTA_AES
&& !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
ix86_isa_flags |= OPTION_MASK_ISA_AES;
@@ -3092,12 +3245,10 @@ override_options (bool main_args_p)
if (ix86_force_align_arg_pointer == -1)
ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
+ ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
+
/* Validate -mincoming-stack-boundary= value or default it to
MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
- if (ix86_force_align_arg_pointer)
- ix86_default_incoming_stack_boundary = MIN_STACK_BOUNDARY;
- else
- ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
if (ix86_incoming_stack_boundary_string)
{
@@ -3503,6 +3654,7 @@ ix86_valid_target_attribute_inner_p (tree args, char *p_strings[])
IX86_ATTR_ISA ("sse4a", OPT_msse4a),
IX86_ATTR_ISA ("sse5", OPT_msse5),
IX86_ATTR_ISA ("ssse3", OPT_mssse3),
+ IX86_ATTR_ISA ("lwp", OPT_mlwp),
/* string options */
IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
@@ -4147,7 +4299,8 @@ ix86_function_ok_for_sibcall (tree decl, tree exp)
/* If we need to align the outgoing stack, then sibcalling would
unalign the stack, which may break the called function. */
- if (ix86_incoming_stack_boundary < PREFERRED_STACK_BOUNDARY)
+ if (ix86_minimum_incoming_stack_boundary (true)
+ < PREFERRED_STACK_BOUNDARY)
return false;
if (decl)
@@ -4608,14 +4761,14 @@ ix86_function_arg_regno_p (int regno)
default ABI. */
/* RAX is used as hidden argument to va_arg functions. */
- if (DEFAULT_ABI == SYSV_ABI && regno == AX_REG)
+ if (ix86_abi == SYSV_ABI && regno == AX_REG)
return true;
- if (DEFAULT_ABI == MS_ABI)
+ if (ix86_abi == MS_ABI)
parm_regs = x86_64_ms_abi_int_parameter_registers;
else
parm_regs = x86_64_int_parameter_registers;
- for (i = 0; i < (DEFAULT_ABI == MS_ABI ? X64_REGPARM_MAX
+ for (i = 0; i < (ix86_abi == MS_ABI ? X64_REGPARM_MAX
: X86_64_REGPARM_MAX); i++)
if (regno == parm_regs[i])
return true;
@@ -4643,7 +4796,7 @@ ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
int
ix86_reg_parm_stack_space (const_tree fndecl)
{
- int call_abi = SYSV_ABI;
+ enum calling_abi call_abi = SYSV_ABI;
if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
call_abi = ix86_function_abi (fndecl);
else
@@ -4655,37 +4808,39 @@ ix86_reg_parm_stack_space (const_tree fndecl)
/* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
call abi used. */
-int
+enum calling_abi
ix86_function_type_abi (const_tree fntype)
{
if (TARGET_64BIT && fntype != NULL)
{
- int abi;
- if (DEFAULT_ABI == SYSV_ABI)
- abi = lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)) ? MS_ABI : SYSV_ABI;
- else
- abi = lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)) ? SYSV_ABI : MS_ABI;
-
+ enum calling_abi abi = ix86_abi;
+ if (abi == SYSV_ABI)
+ {
+ if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
+ abi = MS_ABI;
+ }
+ else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
+ abi = SYSV_ABI;
return abi;
}
- return DEFAULT_ABI;
+ return ix86_abi;
}
-int
+static enum calling_abi
ix86_function_abi (const_tree fndecl)
{
if (! fndecl)
- return DEFAULT_ABI;
+ return ix86_abi;
return ix86_function_type_abi (TREE_TYPE (fndecl));
}
/* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
call abi used. */
-int
+enum calling_abi
ix86_cfun_abi (void)
{
if (! cfun || ! TARGET_64BIT)
- return DEFAULT_ABI;
+ return ix86_abi;
return cfun->machine->call_abi;
}
@@ -4699,7 +4854,7 @@ void
ix86_call_abi_override (const_tree fndecl)
{
if (fndecl == NULL_TREE)
- cfun->machine->call_abi = DEFAULT_ABI;
+ cfun->machine->call_abi = ix86_abi;
else
cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
}
@@ -4740,8 +4895,8 @@ init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
cum->nregs = ix86_regparm;
if (TARGET_64BIT)
{
- if (cum->call_abi != DEFAULT_ABI)
- cum->nregs = DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX
+ if (cum->call_abi != ix86_abi)
+ cum->nregs = ix86_abi != SYSV_ABI ? X86_64_REGPARM_MAX
: X64_REGPARM_MAX;
}
if (TARGET_SSE)
@@ -4749,8 +4904,8 @@ init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
cum->sse_nregs = SSE_REGPARM_MAX;
if (TARGET_64BIT)
{
- if (cum->call_abi != DEFAULT_ABI)
- cum->sse_nregs = DEFAULT_ABI != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
+ if (cum->call_abi != ix86_abi)
+ cum->sse_nregs = ix86_abi != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
: X64_SSE_REGPARM_MAX;
}
}
@@ -5716,7 +5871,7 @@ function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
if (type)
mode = type_natural_mode (type, NULL);
- if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
+ if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
function_arg_advance_ms_64 (cum, bytes, words);
else if (TARGET_64BIT)
function_arg_advance_64 (cum, mode, type, words, named);
@@ -5862,9 +6017,9 @@ function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
if (mode == VOIDmode)
return GEN_INT (cum->maybe_vaarg
? (cum->sse_nregs < 0
- ? (cum->call_abi == DEFAULT_ABI
+ ? (cum->call_abi == ix86_abi
? SSE_REGPARM_MAX
- : (DEFAULT_ABI != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
+ : (ix86_abi != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
: X64_SSE_REGPARM_MAX))
: cum->sse_regno)
: -1);
@@ -5958,7 +6113,7 @@ function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
if (type && TREE_CODE (type) == VECTOR_TYPE)
mode = type_natural_mode (type, cum);
- if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
+ if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
return function_arg_ms_64 (cum, mode, omode, named, bytes);
else if (TARGET_64BIT)
return function_arg_64 (cum, mode, omode, type, named);
@@ -5978,7 +6133,7 @@ ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
const_tree type, bool named ATTRIBUTE_UNUSED)
{
/* See Windows x64 Software Convention. */
- if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
+ if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
{
int msize = (int) GET_MODE_SIZE (mode);
if (type)
@@ -6118,7 +6273,7 @@ ix86_function_value_regno_p (int regno)
/* TODO: The function should depend on current function ABI but
builtins.c would need updating then. Therefore we use the
default ABI. */
- if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
+ if (TARGET_64BIT && ix86_abi == MS_ABI)
return false;
return TARGET_FLOAT_RETURNS_IN_80387;
@@ -6514,13 +6669,13 @@ ix86_build_builtin_va_list_abi (enum calling_abi abi)
static tree
ix86_build_builtin_va_list (void)
{
- tree ret = ix86_build_builtin_va_list_abi (DEFAULT_ABI);
+ tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
/* Initialize abi specific va_list builtin types. */
if (TARGET_64BIT)
{
tree t;
- if (DEFAULT_ABI == MS_ABI)
+ if (ix86_abi == MS_ABI)
{
t = ix86_build_builtin_va_list_abi (SYSV_ABI);
if (TREE_CODE (t) != RECORD_TYPE)
@@ -6534,7 +6689,7 @@ ix86_build_builtin_va_list (void)
t = build_variant_type_copy (t);
sysv_va_list_type_node = t;
}
- if (DEFAULT_ABI != MS_ABI)
+ if (ix86_abi != MS_ABI)
{
t = ix86_build_builtin_va_list_abi (MS_ABI);
if (TREE_CODE (t) != RECORD_TYPE)
@@ -6567,8 +6722,8 @@ setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
int i;
int regparm = ix86_regparm;
- if (cum->call_abi != DEFAULT_ABI)
- regparm = DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX : X64_REGPARM_MAX;
+ if (cum->call_abi != ix86_abi)
+ regparm = ix86_abi != SYSV_ABI ? X86_64_REGPARM_MAX : X64_REGPARM_MAX;
/* GPR size of varargs save area. */
if (cfun->va_list_gpr_size)
@@ -6721,7 +6876,7 @@ is_va_list_char_pointer (tree type)
return true;
canonic = ix86_canonical_va_list_type (type);
return (canonic == ms_va_list_type_node
- || (DEFAULT_ABI == MS_ABI && canonic == va_list_type_node));
+ || (ix86_abi == MS_ABI && canonic == va_list_type_node));
}
/* Implement va_start. */
@@ -6965,7 +7120,7 @@ ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
}
if (need_temp)
{
- int i;
+ int i, prev_size = 0;
tree temp = create_tmp_var (type, "va_arg_tmp");
/* addr = &temp; */
@@ -6977,13 +7132,29 @@ ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
rtx slot = XVECEXP (container, 0, i);
rtx reg = XEXP (slot, 0);
enum machine_mode mode = GET_MODE (reg);
- tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
- tree addr_type = build_pointer_type (piece_type);
- tree daddr_type = build_pointer_type_for_mode (piece_type,
- ptr_mode, true);
+ tree piece_type;
+ tree addr_type;
+ tree daddr_type;
tree src_addr, src;
int src_offset;
tree dest_addr, dest;
+ int cur_size = GET_MODE_SIZE (mode);
+
+ if (prev_size + cur_size > size)
+ {
+ cur_size = size - prev_size;
+ mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
+ if (mode == BLKmode)
+ mode = QImode;
+ }
+ piece_type = lang_hooks.types.type_for_mode (mode, 1);
+ if (mode == GET_MODE (reg))
+ addr_type = build_pointer_type (piece_type);
+ else
+ addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
+ true);
+ daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
+ true);
if (SSE_REGNO_P (REGNO (reg)))
{
@@ -6998,14 +7169,26 @@ ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
src_addr = fold_convert (addr_type, src_addr);
src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
size_int (src_offset));
- src = build_va_arg_indirect_ref (src_addr);
dest_addr = fold_convert (daddr_type, addr);
dest_addr = fold_build2 (POINTER_PLUS_EXPR, daddr_type, dest_addr,
size_int (INTVAL (XEXP (slot, 1))));
- dest = build_va_arg_indirect_ref (dest_addr);
+ if (cur_size == GET_MODE_SIZE (mode))
+ {
+ src = build_va_arg_indirect_ref (src_addr);
+ dest = build_va_arg_indirect_ref (dest_addr);
- gimplify_assign (dest, src, pre_p);
+ gimplify_assign (dest, src, pre_p);
+ }
+ else
+ {
+ tree copy
+ = build_call_expr (implicit_built_in_decls[BUILT_IN_MEMCPY],
+ 3, dest_addr, src_addr,
+ size_int (cur_size));
+ gimplify_and_add (copy, pre_p);
+ }
+ prev_size += cur_size;
}
}
@@ -8088,37 +8271,58 @@ find_drap_reg (void)
}
}
-/* Update incoming stack boundary and estimated stack alignment. */
+/* Return minimum incoming stack alignment. */
-static void
-ix86_update_stack_boundary (void)
+static unsigned int
+ix86_minimum_incoming_stack_boundary (bool sibcall)
{
+ unsigned int incoming_stack_boundary;
+
/* Prefer the one specified at command line. */
- ix86_incoming_stack_boundary
- = (ix86_user_incoming_stack_boundary
- ? ix86_user_incoming_stack_boundary
- : ix86_default_incoming_stack_boundary);
+ if (ix86_user_incoming_stack_boundary)
+ incoming_stack_boundary = ix86_user_incoming_stack_boundary;
+ /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
+ if -mstackrealign is used, it isn't used for sibcall check and
+ estimated stack alignment is 128bit. */
+ else if (!sibcall
+ && !TARGET_64BIT
+ && ix86_force_align_arg_pointer
+ && crtl->stack_alignment_estimated == 128)
+ incoming_stack_boundary = MIN_STACK_BOUNDARY;
+ else
+ incoming_stack_boundary = ix86_default_incoming_stack_boundary;
/* Incoming stack alignment can be changed on individual functions
via force_align_arg_pointer attribute. We use the smallest
incoming stack boundary. */
- if (ix86_incoming_stack_boundary > MIN_STACK_BOUNDARY
+ if (incoming_stack_boundary > MIN_STACK_BOUNDARY
&& lookup_attribute (ix86_force_align_arg_pointer_string,
TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
- ix86_incoming_stack_boundary = MIN_STACK_BOUNDARY;
+ incoming_stack_boundary = MIN_STACK_BOUNDARY;
/* The incoming stack frame has to be aligned at least at
parm_stack_boundary. */
- if (ix86_incoming_stack_boundary < crtl->parm_stack_boundary)
- ix86_incoming_stack_boundary = crtl->parm_stack_boundary;
+ if (incoming_stack_boundary < crtl->parm_stack_boundary)
+ incoming_stack_boundary = crtl->parm_stack_boundary;
/* Stack at entrance of main is aligned by runtime. We use the
smallest incoming stack boundary. */
- if (ix86_incoming_stack_boundary > MAIN_STACK_BOUNDARY
+ if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
&& DECL_NAME (current_function_decl)
&& MAIN_NAME_P (DECL_NAME (current_function_decl))
&& DECL_FILE_SCOPE_P (current_function_decl))
- ix86_incoming_stack_boundary = MAIN_STACK_BOUNDARY;
+ incoming_stack_boundary = MAIN_STACK_BOUNDARY;
+
+ return incoming_stack_boundary;
+}
+
+/* Update incoming stack boundary and estimated stack alignment. */
+
+static void
+ix86_update_stack_boundary (void)
+{
+ ix86_incoming_stack_boundary
+ = ix86_minimum_incoming_stack_boundary (false);
/* x86_64 vararg needs 16byte stack alignment for register save
area. */
@@ -13228,6 +13432,316 @@ ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
emit_move_insn (operands[0], dst);
}
+#define LEA_SEARCH_THRESHOLD 12
+
+/* Search backward for non-agu definition of register number REGNO1
+ or register number REGNO2 in INSN's basic block until
+ 1. Pass LEA_SEARCH_THRESHOLD instructions, or
+ 2. Reach BB boundary, or
+ 3. Reach agu definition.
+ Returns the distance between the non-agu definition point and INSN.
+ If no definition point, returns -1. */
+
+static int
+distance_non_agu_define (unsigned int regno1, unsigned int regno2,
+ rtx insn)
+{
+ basic_block bb = BLOCK_FOR_INSN (insn);
+ int distance = 0;
+ df_ref *def_rec;
+ enum attr_type insn_type;
+
+ if (insn != BB_HEAD (bb))
+ {
+ rtx prev = PREV_INSN (insn);
+ while (prev && distance < LEA_SEARCH_THRESHOLD)
+ {
+ if (INSN_P (prev))
+ {
+ distance++;
+ for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
+ if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
+ && !DF_REF_IS_ARTIFICIAL (*def_rec)
+ && (regno1 == DF_REF_REGNO (*def_rec)
+ || regno2 == DF_REF_REGNO (*def_rec)))
+ {
+ insn_type = get_attr_type (prev);
+ if (insn_type != TYPE_LEA)
+ goto done;
+ }
+ }
+ if (prev == BB_HEAD (bb))
+ break;
+ prev = PREV_INSN (prev);
+ }
+ }
+
+ if (distance < LEA_SEARCH_THRESHOLD)
+ {
+ edge e;
+ edge_iterator ei;
+ bool simple_loop = false;
+
+ FOR_EACH_EDGE (e, ei, bb->preds)
+ if (e->src == bb)
+ {
+ simple_loop = true;
+ break;
+ }
+
+ if (simple_loop)
+ {
+ rtx prev = BB_END (bb);
+ while (prev
+ && prev != insn
+ && distance < LEA_SEARCH_THRESHOLD)
+ {
+ if (INSN_P (prev))
+ {
+ distance++;
+ for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
+ if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
+ && !DF_REF_IS_ARTIFICIAL (*def_rec)
+ && (regno1 == DF_REF_REGNO (*def_rec)
+ || regno2 == DF_REF_REGNO (*def_rec)))
+ {
+ insn_type = get_attr_type (prev);
+ if (insn_type != TYPE_LEA)
+ goto done;
+ }
+ }
+ prev = PREV_INSN (prev);
+ }
+ }
+ }
+
+ distance = -1;
+
+done:
+ /* get_attr_type may modify recog data. We want to make sure
+ that recog data is valid for instruction INSN, on which
+ distance_non_agu_define is called. INSN is unchanged here. */
+ extract_insn_cached (insn);
+ return distance;
+}
+
+/* Return the distance between INSN and the next insn that uses
+ register number REGNO0 in memory address. Return -1 if no such
+ a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
+
+static int
+distance_agu_use (unsigned int regno0, rtx insn)
+{
+ basic_block bb = BLOCK_FOR_INSN (insn);
+ int distance = 0;
+ df_ref *def_rec;
+ df_ref *use_rec;
+
+ if (insn != BB_END (bb))
+ {
+ rtx next = NEXT_INSN (insn);
+ while (next && distance < LEA_SEARCH_THRESHOLD)
+ {
+ if (INSN_P (next))
+ {
+ distance++;
+
+ for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
+ if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
+ || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
+ && regno0 == DF_REF_REGNO (*use_rec))
+ {
+ /* Return DISTANCE if OP0 is used in memory
+ address in NEXT. */
+ return distance;
+ }
+
+ for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
+ if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
+ && !DF_REF_IS_ARTIFICIAL (*def_rec)
+ && regno0 == DF_REF_REGNO (*def_rec))
+ {
+ /* Return -1 if OP0 is set in NEXT. */
+ return -1;
+ }
+ }
+ if (next == BB_END (bb))
+ break;
+ next = NEXT_INSN (next);
+ }
+ }
+
+ if (distance < LEA_SEARCH_THRESHOLD)
+ {
+ edge e;
+ edge_iterator ei;
+ bool simple_loop = false;
+
+ FOR_EACH_EDGE (e, ei, bb->succs)
+ if (e->dest == bb)
+ {
+ simple_loop = true;
+ break;
+ }
+
+ if (simple_loop)
+ {
+ rtx next = BB_HEAD (bb);
+ while (next
+ && next != insn
+ && distance < LEA_SEARCH_THRESHOLD)
+ {
+ if (INSN_P (next))
+ {
+ distance++;
+
+ for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
+ if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
+ || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
+ && regno0 == DF_REF_REGNO (*use_rec))
+ {
+ /* Return DISTANCE if OP0 is used in memory
+ address in NEXT. */
+ return distance;
+ }
+
+ for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
+ if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
+ && !DF_REF_IS_ARTIFICIAL (*def_rec)
+ && regno0 == DF_REF_REGNO (*def_rec))
+ {
+ /* Return -1 if OP0 is set in NEXT. */
+ return -1;
+ }
+
+ }
+ next = NEXT_INSN (next);
+ }
+ }
+ }
+
+ return -1;
+}
+
+/* Define this macro to tune LEA priority vs ADD, it take effect when
+ there is a dilemma of choicing LEA or ADD
+ Negative value: ADD is more preferred than LEA
+ Zero: Netrual
+ Positive value: LEA is more preferred than ADD*/
+#define IX86_LEA_PRIORITY 2
+
+/* Return true if it is ok to optimize an ADD operation to LEA
+ operation to avoid flag register consumation. For the processors
+ like ATOM, if the destination register of LEA holds an actual
+ address which will be used soon, LEA is better and otherwise ADD
+ is better. */
+
+bool
+ix86_lea_for_add_ok (enum rtx_code code ATTRIBUTE_UNUSED,
+ rtx insn, rtx operands[])
+{
+ unsigned int regno0 = true_regnum (operands[0]);
+ unsigned int regno1 = true_regnum (operands[1]);
+ unsigned int regno2;
+
+ if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
+ return regno0 != regno1;
+
+ regno2 = true_regnum (operands[2]);
+
+ /* If a = b + c, (a!=b && a!=c), must use lea form. */
+ if (regno0 != regno1 && regno0 != regno2)
+ return true;
+ else
+ {
+ int dist_define, dist_use;
+ dist_define = distance_non_agu_define (regno1, regno2, insn);
+ if (dist_define <= 0)
+ return true;
+
+ /* If this insn has both backward non-agu dependence and forward
+ agu dependence, the one with short distance take effect. */
+ dist_use = distance_agu_use (regno0, insn);
+ if (dist_use <= 0
+ || (dist_define + IX86_LEA_PRIORITY) < dist_use)
+ return false;
+
+ return true;
+ }
+}
+
+/* Return true if destination reg of SET_BODY is shift count of
+ USE_BODY. */
+
+static bool
+ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
+{
+ rtx set_dest;
+ rtx shift_rtx;
+ int i;
+
+ /* Retrieve destination of SET_BODY. */
+ switch (GET_CODE (set_body))
+ {
+ case SET:
+ set_dest = SET_DEST (set_body);
+ if (!set_dest || !REG_P (set_dest))
+ return false;
+ break;
+ case PARALLEL:
+ for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
+ if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
+ use_body))
+ return true;
+ default:
+ return false;
+ break;
+ }
+
+ /* Retrieve shift count of USE_BODY. */
+ switch (GET_CODE (use_body))
+ {
+ case SET:
+ shift_rtx = XEXP (use_body, 1);
+ break;
+ case PARALLEL:
+ for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
+ if (ix86_dep_by_shift_count_body (set_body,
+ XVECEXP (use_body, 0, i)))
+ return true;
+ default:
+ return false;
+ break;
+ }
+
+ if (shift_rtx
+ && (GET_CODE (shift_rtx) == ASHIFT
+ || GET_CODE (shift_rtx) == LSHIFTRT
+ || GET_CODE (shift_rtx) == ASHIFTRT
+ || GET_CODE (shift_rtx) == ROTATE
+ || GET_CODE (shift_rtx) == ROTATERT))
+ {
+ rtx shift_count = XEXP (shift_rtx, 1);
+
+ /* Return true if shift count is dest of SET_BODY. */
+ if (REG_P (shift_count)
+ && true_regnum (set_dest) == true_regnum (shift_count))
+ return true;
+ }
+
+ return false;
+}
+
+/* Return true if destination reg of SET_INSN is shift count of
+ USE_INSN. */
+
+bool
+ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
+{
+ return ix86_dep_by_shift_count_body (PATTERN (set_insn),
+ PATTERN (use_insn));
+}
+
/* Return TRUE or FALSE depending on whether the unary operator meets the
appropriate constraints. */
@@ -19084,7 +19598,7 @@ ix86_init_machine_status (void)
f = GGC_CNEW (struct machine_function);
f->use_fast_prologue_epilogue_nregs = -1;
f->tls_descriptor_call_expanded_p = 0;
- f->call_abi = DEFAULT_ABI;
+ f->call_abi = ix86_abi;
return f;
}
@@ -19345,6 +19859,7 @@ ix86_issue_rate (void)
switch (ix86_tune)
{
case PROCESSOR_PENTIUM:
+ case PROCESSOR_ATOM:
case PROCESSOR_K6:
return 2;
@@ -19411,41 +19926,21 @@ ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
return 1;
}
-/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
- address with operands set by DEP_INSN. */
+/* Return true iff USE_INSN has a memory address with operands set by
+ SET_INSN. */
-static int
-ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
+bool
+ix86_agi_dependent (rtx set_insn, rtx use_insn)
{
- rtx addr;
-
- if (insn_type == TYPE_LEA
- && TARGET_PENTIUM)
- {
- addr = PATTERN (insn);
-
- if (GET_CODE (addr) == PARALLEL)
- addr = XVECEXP (addr, 0, 0);
-
- gcc_assert (GET_CODE (addr) == SET);
-
- addr = SET_SRC (addr);
- }
- else
- {
- int i;
- extract_insn_cached (insn);
- for (i = recog_data.n_operands - 1; i >= 0; --i)
- if (MEM_P (recog_data.operand[i]))
- {
- addr = XEXP (recog_data.operand[i], 0);
- goto found;
- }
- return 0;
- found:;
- }
-
- return modified_in_p (addr, dep_insn);
+ int i;
+ extract_insn_cached (use_insn);
+ for (i = recog_data.n_operands - 1; i >= 0; --i)
+ if (MEM_P (recog_data.operand[i]))
+ {
+ rtx addr = XEXP (recog_data.operand[i], 0);
+ return modified_in_p (addr, set_insn) != 0;
+ }
+ return false;
}
static int
@@ -19473,7 +19968,20 @@ ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
{
case PROCESSOR_PENTIUM:
/* Address Generation Interlock adds a cycle of latency. */
- if (ix86_agi_dependent (insn, dep_insn, insn_type))
+ if (insn_type == TYPE_LEA)
+ {
+ rtx addr = PATTERN (insn);
+
+ if (GET_CODE (addr) == PARALLEL)
+ addr = XVECEXP (addr, 0, 0);
+
+ gcc_assert (GET_CODE (addr) == SET);
+
+ addr = SET_SRC (addr);
+ if (modified_in_p (addr, dep_insn))
+ cost += 1;
+ }
+ else if (ix86_agi_dependent (dep_insn, insn))
cost += 1;
/* ??? Compares pair with jump/setcc. */
@@ -19483,7 +19991,7 @@ ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
/* Floating point stores require value to be ready one cycle earlier. */
if (insn_type == TYPE_FMOV
&& get_attr_memory (insn) == MEMORY_STORE
- && !ix86_agi_dependent (insn, dep_insn, insn_type))
+ && !ix86_agi_dependent (dep_insn, insn))
cost += 1;
break;
@@ -19506,7 +20014,7 @@ ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
in parallel with previous instruction in case
previous instruction is not needed to compute the address. */
if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
- && !ix86_agi_dependent (insn, dep_insn, insn_type))
+ && !ix86_agi_dependent (dep_insn, insn))
{
/* Claim moves to take one cycle, as core can issue one load
at time and the next load can start cycle later. */
@@ -19535,7 +20043,7 @@ ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
in parallel with previous instruction in case
previous instruction is not needed to compute the address. */
if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
- && !ix86_agi_dependent (insn, dep_insn, insn_type))
+ && !ix86_agi_dependent (dep_insn, insn))
{
/* Claim moves to take one cycle, as core can issue one load
at time and the next load can start cycle later. */
@@ -19552,6 +20060,7 @@ ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
case PROCESSOR_ATHLON:
case PROCESSOR_K8:
case PROCESSOR_AMDFAM10:
+ case PROCESSOR_ATOM:
case PROCESSOR_GENERIC32:
case PROCESSOR_GENERIC64:
memory = get_attr_memory (insn);
@@ -19560,7 +20069,7 @@ ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
in parallel with previous instruction in case
previous instruction is not needed to compute the address. */
if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
- && !ix86_agi_dependent (insn, dep_insn, insn_type))
+ && !ix86_agi_dependent (dep_insn, insn))
{
enum attr_unit unit = get_attr_unit (insn);
int loadcost = 3;
@@ -20815,6 +21324,14 @@ enum ix86_builtins
IX86_BUILTIN_PCOMFALSEQ,
IX86_BUILTIN_PCOMTRUEQ,
+ /* LWP instructions. */
+ IX86_BUILTIN_LLWPCB,
+ IX86_BUILTIN_SLWPCB,
+ IX86_BUILTIN_LWPVAL32,
+ IX86_BUILTIN_LWPVAL64,
+ IX86_BUILTIN_LWPINS32,
+ IX86_BUILTIN_LWPINS64,
+
IX86_BUILTIN_MAX
};
@@ -20998,6 +21515,8 @@ enum ix86_special_builtin_type
{
SPECIAL_FTYPE_UNKNOWN,
VOID_FTYPE_VOID,
+ VOID_FTYPE_PVOID,
+ PVOID_FTYPE_VOID,
V32QI_FTYPE_PCCHAR,
V16QI_FTYPE_PCCHAR,
V8SF_FTYPE_PCV4SF,
@@ -21027,7 +21546,13 @@ enum ix86_special_builtin_type
VOID_FTYPE_PV8SF_V8SF_V8SF,
VOID_FTYPE_PV4DF_V4DF_V4DF,
VOID_FTYPE_PV4SF_V4SF_V4SF,
- VOID_FTYPE_PV2DF_V2DF_V2DF
+ VOID_FTYPE_PV2DF_V2DF_V2DF,
+ VOID_FTYPE_USHORT_UINT_USHORT,
+ VOID_FTYPE_UINT_UINT_UINT,
+ VOID_FTYPE_UINT64_UINT_UINT,
+ UCHAR_FTYPE_USHORT_UINT_USHORT,
+ UCHAR_FTYPE_UINT_UINT_UINT,
+ UCHAR_FTYPE_UINT64_UINT_UINT
};
/* Builtin types */
@@ -21266,6 +21791,14 @@ static const struct builtin_description bdesc_special_args[] =
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_V4SF },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_V4DF },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_V8SF },
+
+ { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
+ { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
+ { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
+ { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
+ { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
+ { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
+
};
/* Builtins with variable number of arguments. */
@@ -22255,6 +22788,7 @@ ix86_init_mmx_sse_builtins (void)
NULL_TREE);
tree unsigned_ftype_void
= build_function_type (unsigned_type_node, void_list_node);
+
tree v2si_ftype_v4sf
= build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
/* Loads/stores. */
@@ -22915,6 +23449,56 @@ ix86_init_mmx_sse_builtins (void)
= build_function_type_list (V2DF_type_node,
V2DF_type_node, V2DI_type_node, NULL_TREE);
+ /* LWP instructions. */
+
+ tree pvoid_ftype_void
+ = build_function_type (ptr_type_node, void_list_node);
+
+ tree void_ftype_pvoid
+ = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
+
+ tree void_ftype_ushort_unsigned_ushort
+ = build_function_type_list (void_type_node,
+ short_unsigned_type_node,
+ unsigned_type_node,
+ short_unsigned_type_node,
+ NULL_TREE);
+
+ tree void_ftype_unsigned_unsigned_unsigned
+ = build_function_type_list (void_type_node,
+ unsigned_type_node,
+ unsigned_type_node,
+ unsigned_type_node,
+ NULL_TREE);
+
+ tree void_ftype_uint64_unsigned_unsigned
+ = build_function_type_list (void_type_node,
+ long_long_unsigned_type_node,
+ unsigned_type_node,
+ unsigned_type_node,
+ NULL_TREE);
+
+ tree uchar_ftype_ushort_unsigned_ushort
+ = build_function_type_list (unsigned_char_type_node,
+ short_unsigned_type_node,
+ unsigned_type_node,
+ short_unsigned_type_node,
+ NULL_TREE);
+
+ tree uchar_ftype_unsigned_unsigned_unsigned
+ = build_function_type_list (unsigned_char_type_node,
+ unsigned_type_node,
+ unsigned_type_node,
+ unsigned_type_node,
+ NULL_TREE);
+
+ tree uchar_ftype_uint64_unsigned_unsigned
+ = build_function_type_list (unsigned_char_type_node,
+ long_long_unsigned_type_node,
+ unsigned_type_node,
+ unsigned_type_node,
+ NULL_TREE);
+
tree ftype;
/* Add all special builtins with variable number of operands. */
@@ -23022,6 +23606,31 @@ ix86_init_mmx_sse_builtins (void)
case VOID_FTYPE_PV2DF_V2DF_V2DF:
type = void_ftype_pv2df_v2df_v2df;
break;
+ case VOID_FTYPE_USHORT_UINT_USHORT:
+ type = void_ftype_ushort_unsigned_ushort;
+ break;
+ case VOID_FTYPE_UINT_UINT_UINT:
+ type = void_ftype_unsigned_unsigned_unsigned;
+ break;
+ case VOID_FTYPE_UINT64_UINT_UINT:
+ type = void_ftype_uint64_unsigned_unsigned;
+ break;
+ case VOID_FTYPE_PVOID:
+ type = void_ftype_pvoid;
+ break;
+ case PVOID_FTYPE_VOID:
+ type = pvoid_ftype_void;
+ break;
+ case UCHAR_FTYPE_USHORT_UINT_USHORT:
+ type = uchar_ftype_ushort_unsigned_ushort;
+ break;
+ case UCHAR_FTYPE_UINT_UINT_UINT:
+ type = uchar_ftype_unsigned_unsigned_unsigned;
+ break;
+ case UCHAR_FTYPE_UINT64_UINT_UINT:
+ type = uchar_ftype_uint64_unsigned_unsigned;
+ break;
+
default:
gcc_unreachable ();
}
@@ -24803,7 +25412,7 @@ ix86_expand_special_args_builtin (const struct builtin_description *d,
{
rtx op;
enum machine_mode mode;
- } args[2];
+ } args[3];
enum insn_code icode = d->icode;
bool last_arg_constant = false;
const struct insn_data *insn_p = &insn_data[icode];
@@ -24824,6 +25433,7 @@ ix86_expand_special_args_builtin (const struct builtin_description *d,
case V4DF_FTYPE_PCV2DF:
case V4DF_FTYPE_PCDOUBLE:
case V2DF_FTYPE_PCDOUBLE:
+ case VOID_FTYPE_PVOID:
nargs = 1;
klass = load;
memory = 0;
@@ -24867,6 +25477,15 @@ ix86_expand_special_args_builtin (const struct builtin_description *d,
/* Reserve memory operand for target. */
memory = ARRAY_SIZE (args);
break;
+ case VOID_FTYPE_UINT_UINT_UINT:
+ case VOID_FTYPE_UINT64_UINT_UINT:
+ case UCHAR_FTYPE_UINT_UINT_UINT:
+ case UCHAR_FTYPE_UINT64_UINT_UINT:
+ nargs = 3;
+ klass = load;
+ memory = ARRAY_SIZE (args);
+ last_arg_constant = true;
+ break;
default:
gcc_unreachable ();
}
@@ -24903,12 +25522,16 @@ ix86_expand_special_args_builtin (const struct builtin_description *d,
if (last_arg_constant && (i + 1) == nargs)
{
if (!match)
- switch (icode)
- {
- default:
+ {
+ if (icode == CODE_FOR_lwp_lwpvalsi3
+ || icode == CODE_FOR_lwp_lwpinssi3
+ || icode == CODE_FOR_lwp_lwpvaldi3
+ || icode == CODE_FOR_lwp_lwpinsdi3)
+ error ("the last argument must be a 32-bit immediate");
+ else
error ("the last argument must be an 8-bit immediate");
- return const0_rtx;
- }
+ return const0_rtx;
+ }
}
else
{
@@ -24943,6 +25566,9 @@ ix86_expand_special_args_builtin (const struct builtin_description *d,
case 2:
pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
break;
+ case 3:
+ pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
+ break;
default:
gcc_unreachable ();
}
@@ -25240,6 +25866,23 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
return target;
}
+ case IX86_BUILTIN_LLWPCB:
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ op0 = expand_normal (arg0);
+ icode = CODE_FOR_lwp_llwpcb;
+ if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
+ op0 = copy_to_mode_reg (Pmode, op0);
+ emit_insn (gen_lwp_llwpcb (op0));
+ return 0;
+
+ case IX86_BUILTIN_SLWPCB:
+ icode = CODE_FOR_lwp_slwpcb;
+ if (!target
+ || ! (*insn_data[icode].operand[0].predicate) (target, Pmode))
+ target = gen_reg_rtx (Pmode);
+ emit_insn (gen_lwp_slwpcb (target));
+ return target;
+
default:
break;
}
@@ -29840,14 +30483,11 @@ x86_builtin_vectorization_cost (bool runtime_test)
tree
ix86_fn_abi_va_list (tree fndecl)
{
- int abi;
-
if (!TARGET_64BIT)
return va_list_type_node;
gcc_assert (fndecl != NULL_TREE);
- abi = ix86_function_abi ((const_tree) fndecl);
- if (abi == MS_ABI)
+ if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
return ms_va_list_type_node;
else
return sysv_va_list_type_node;
diff --git a/gcc-4.4.3/gcc/config/i386/i386.h b/gcc-4.4.3/gcc/config/i386/i386.h
index abdc7d019..e9014b79a 100644
--- a/gcc-4.4.3/gcc/config/i386/i386.h
+++ b/gcc-4.4.3/gcc/config/i386/i386.h
@@ -55,10 +55,12 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
#define TARGET_FMA OPTION_ISA_FMA
#define TARGET_SSE4A OPTION_ISA_SSE4A
#define TARGET_SSE5 OPTION_ISA_SSE5
+#define TARGET_LWP OPTION_ISA_LWP
#define TARGET_ROUND OPTION_ISA_ROUND
#define TARGET_ABM OPTION_ISA_ABM
#define TARGET_POPCNT OPTION_ISA_POPCNT
#define TARGET_SAHF OPTION_ISA_SAHF
+#define TARGET_MOVBE OPTION_ISA_MOVBE
#define TARGET_AES OPTION_ISA_AES
#define TARGET_PCLMUL OPTION_ISA_PCLMUL
#define TARGET_CMPXCHG16B OPTION_ISA_CX16
@@ -236,6 +238,7 @@ extern const struct processor_costs ix86_size_cost;
#define TARGET_GENERIC64 (ix86_tune == PROCESSOR_GENERIC64)
#define TARGET_GENERIC (TARGET_GENERIC32 || TARGET_GENERIC64)
#define TARGET_AMDFAM10 (ix86_tune == PROCESSOR_AMDFAM10)
+#define TARGET_ATOM (ix86_tune == PROCESSOR_ATOM)
/* Feature tests against the various tunings. */
enum ix86_tune_indices {
@@ -300,6 +303,7 @@ enum ix86_tune_indices {
X86_TUNE_USE_VECTOR_FP_CONVERTS,
X86_TUNE_USE_VECTOR_CONVERTS,
X86_TUNE_FUSE_CMP_AND_BRANCH,
+ X86_TUNE_OPT_AGU,
X86_TUNE_LAST
};
@@ -387,6 +391,7 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
ix86_tune_features[X86_TUNE_USE_VECTOR_CONVERTS]
#define TARGET_FUSE_CMP_AND_BRANCH \
ix86_tune_features[X86_TUNE_FUSE_CMP_AND_BRANCH]
+#define TARGET_OPT_AGU ix86_tune_features[X86_TUNE_OPT_AGU]
/* Feature tests against the various architecture variations. */
enum ix86_arch_indices {
@@ -470,7 +475,10 @@ enum calling_abi
MS_ABI = 1
};
-/* The default abi form used by target. */
+/* The abi used by target. */
+extern enum calling_abi ix86_abi;
+
+/* The default abi used by target. */
#define DEFAULT_ABI SYSV_ABI
/* Subtargets may reset this to 1 in order to enable 96-bit long double
@@ -569,6 +577,7 @@ enum target_cpu_default
TARGET_CPU_DEFAULT_prescott,
TARGET_CPU_DEFAULT_nocona,
TARGET_CPU_DEFAULT_core2,
+ TARGET_CPU_DEFAULT_atom,
TARGET_CPU_DEFAULT_geode,
TARGET_CPU_DEFAULT_k6,
@@ -658,7 +667,7 @@ enum target_cpu_default
/* Boundary (in *bits*) on which stack pointer should be aligned. */
#define STACK_BOUNDARY \
- (TARGET_64BIT && DEFAULT_ABI == MS_ABI ? 128 : BITS_PER_WORD)
+ (TARGET_64BIT && ix86_abi == MS_ABI ? 128 : BITS_PER_WORD)
/* Stack boundary of the main function guaranteed by OS. */
#define MAIN_STACK_BOUNDARY (TARGET_64BIT ? 128 : 32)
@@ -679,9 +688,7 @@ enum target_cpu_default
generate an alternate prologue and epilogue that realigns the
runtime stack if nessary. This supports mixing codes that keep a
4-byte aligned stack, as specified by i386 psABI, with codes that
- need a 16-byte aligned stack, as required by SSE instructions. If
- STACK_REALIGN_DEFAULT is 1 and PREFERRED_STACK_BOUNDARY_DEFAULT is
- 128, stacks for all functions may be realigned. */
+ need a 16-byte aligned stack, as required by SSE instructions. */
#define STACK_REALIGN_DEFAULT 0
/* Boundary (in *bits*) on which the incoming stack is aligned. */
@@ -1584,7 +1591,7 @@ typedef struct ix86_args {
int maybe_vaarg; /* true for calls to possibly vardic fncts. */
int float_in_sse; /* 1 if in 32-bit mode SFmode (2 for DFmode) should
be passed in SSE registers. Otherwise 0. */
- int call_abi; /* Set to SYSV_ABI for sysv abi. Otherwise
+ enum calling_abi call_abi; /* Set to SYSV_ABI for sysv abi. Otherwise
MS_ABI for ms abi. */
} CUMULATIVE_ARGS;
@@ -2230,6 +2237,7 @@ enum processor_type
PROCESSOR_GENERIC32,
PROCESSOR_GENERIC64,
PROCESSOR_AMDFAM10,
+ PROCESSOR_ATOM,
PROCESSOR_max
};
@@ -2410,7 +2418,7 @@ struct machine_function GTY(())
int tls_descriptor_call_expanded_p;
/* This value is used for amd64 targets and specifies the current abi
to be used. MS_ABI means ms abi. Otherwise SYSV_ABI means sysv abi. */
- int call_abi;
+ enum calling_abi call_abi;
struct machine_cfa_state cfa;
};
#endif
diff --git a/gcc-4.4.3/gcc/config/i386/i386.md b/gcc-4.4.3/gcc/config/i386/i386.md
index 22c891c49..bbe915112 100644
--- a/gcc-4.4.3/gcc/config/i386/i386.md
+++ b/gcc-4.4.3/gcc/config/i386/i386.md
@@ -227,6 +227,11 @@
(UNSPECV_CLD 15)
(UNSPECV_VZEROALL 16)
(UNSPECV_VZEROUPPER 17)
+ (UNSPECV_VSWAPMOV 18)
+ (UNSPECV_LLWP_INTRINSIC 19)
+ (UNSPECV_SLWP_INTRINSIC 20)
+ (UNSPECV_LWPVAL_INTRINSIC 21)
+ (UNSPECV_LWPINS_INTRINSIC 22)
])
;; Constants to represent pcomtrue/pcomfalse variants
@@ -315,7 +320,7 @@
;; Processor type.
-(define_attr "cpu" "none,pentium,pentiumpro,geode,k6,athlon,k8,core2,
+(define_attr "cpu" "none,pentium,pentiumpro,geode,k6,athlon,k8,core2,atom,
generic64,amdfam10"
(const (symbol_ref "ix86_schedule")))
@@ -329,9 +334,9 @@
push,pop,call,callv,leave,
str,bitmanip,
fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,fisttp,frndint,
- sselog,sselog1,sseiadd,sseiadd1,sseishft,sseimul,
+ sselog,sselog1,sseiadd,sseiadd1,sseishft,sseishft1,sseimul,
sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,ssecvt1,sseicvt,ssediv,sseins,
- ssemuladd,sse4arg,
+ ssemuladd,sse4arg,lwp,
mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft"
(const_string "other"))
@@ -344,7 +349,7 @@
(define_attr "unit" "integer,i387,sse,mmx,unknown"
(cond [(eq_attr "type" "fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,fisttp,frndint")
(const_string "i387")
- (eq_attr "type" "sselog,sselog1,sseiadd,sseiadd1,sseishft,sseimul,
+ (eq_attr "type" "sselog,sselog1,sseiadd,sseiadd1,sseishft,sseishft1,sseimul,
sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,
ssecvt1,sseicvt,ssediv,sseins,ssemuladd,sse4arg")
(const_string "sse")
@@ -524,7 +529,7 @@
;; if the instruction is complex.
(define_attr "memory" "none,load,store,both,unknown"
- (cond [(eq_attr "type" "other,multi,str")
+ (cond [(eq_attr "type" "other,multi,str,lwp")
(const_string "unknown")
(eq_attr "type" "lea,fcmov,fpspc")
(const_string "none")
@@ -611,6 +616,12 @@
(define_attr "i387_cw" "trunc,floor,ceil,mask_pm,uninitialized,any"
(const_string "any"))
+;; Define attribute to classify add/sub insns that consumes carry flag (CF)
+(define_attr "use_carry" "0,1" (const_string "0"))
+
+;; Define attribute to indicate unaligned ssemov insns
+(define_attr "movu" "0,1" (const_string "0"))
+
;; Describe a user's asm statement.
(define_asm_attributes
[(set_attr "length" "128")
@@ -673,6 +684,9 @@
;; Single word integer modes without QImode.
(define_mode_iterator SWI248 [HI SI (DI "TARGET_64BIT")])
+;; Single word integer modes without QImode and HImode.
+(define_mode_iterator SWI48 [SI (DI "TARGET_64BIT")])
+
;; Instruction suffix for integer modes.
(define_mode_attr imodesuffix [(QI "b") (HI "w") (SI "l") (DI "q")])
@@ -726,6 +740,7 @@
(include "k6.md")
(include "athlon.md")
(include "geode.md")
+(include "atom.md")
;; Operand and operator predicates and constraints
@@ -5789,6 +5804,7 @@
"TARGET_64BIT && ix86_binary_operator_ok (PLUS, DImode, operands)"
"adc{q}\t{%2, %0|%0, %2}"
[(set_attr "type" "alu")
+ (set_attr "use_carry" "1")
(set_attr "pent_pair" "pu")
(set_attr "mode" "DI")])
@@ -5863,6 +5879,7 @@
"ix86_binary_operator_ok (PLUS, QImode, operands)"
"adc{b}\t{%2, %0|%0, %2}"
[(set_attr "type" "alu")
+ (set_attr "use_carry" "1")
(set_attr "pent_pair" "pu")
(set_attr "mode" "QI")])
@@ -5875,6 +5892,7 @@
"ix86_binary_operator_ok (PLUS, HImode, operands)"
"adc{w}\t{%2, %0|%0, %2}"
[(set_attr "type" "alu")
+ (set_attr "use_carry" "1")
(set_attr "pent_pair" "pu")
(set_attr "mode" "HI")])
@@ -5887,6 +5905,7 @@
"ix86_binary_operator_ok (PLUS, SImode, operands)"
"adc{l}\t{%2, %0|%0, %2}"
[(set_attr "type" "alu")
+ (set_attr "use_carry" "1")
(set_attr "pent_pair" "pu")
(set_attr "mode" "SI")])
@@ -5900,6 +5919,7 @@
"TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands)"
"adc{l}\t{%2, %k0|%k0, %2}"
[(set_attr "type" "alu")
+ (set_attr "use_carry" "1")
(set_attr "pent_pair" "pu")
(set_attr "mode" "SI")])
@@ -6129,9 +6149,9 @@
(set_attr "mode" "SI")])
(define_insn "*adddi_1_rex64"
- [(set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r")
- (plus:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0,r")
- (match_operand:DI 2 "x86_64_general_operand" "rme,re,le")))
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r,r")
+ (plus:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0,r,r")
+ (match_operand:DI 2 "x86_64_general_operand" "rme,re,0,le")))
(clobber (reg:CC FLAGS_REG))]
"TARGET_64BIT && ix86_binary_operator_ok (PLUS, DImode, operands)"
{
@@ -6152,6 +6172,10 @@
}
default:
+ /* Use add as much as possible to replace lea for AGU optimization. */
+ if (which_alternative == 2 && TARGET_OPT_AGU)
+ return "add{q}\t{%1, %0|%0, %1}";
+
gcc_assert (rtx_equal_p (operands[0], operands[1]));
/* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
@@ -6170,8 +6194,11 @@
}
}
[(set (attr "type")
- (cond [(eq_attr "alternative" "2")
+ (cond [(and (eq_attr "alternative" "2")
+ (eq (symbol_ref "TARGET_OPT_AGU") (const_int 0)))
(const_string "lea")
+ (eq_attr "alternative" "3")
+ (const_string "lea")
; Current assemblers are broken and do not allow @GOTOFF in
; ought but a memory context.
(match_operand:DI 2 "pic_symbolic_operand" "")
@@ -6188,8 +6215,8 @@
(plus:DI (match_operand:DI 1 "register_operand" "")
(match_operand:DI 2 "x86_64_nonmemory_operand" "")))
(clobber (reg:CC FLAGS_REG))]
- "TARGET_64BIT && reload_completed
- && true_regnum (operands[0]) != true_regnum (operands[1])"
+ "TARGET_64BIT && reload_completed
+ && ix86_lea_for_add_ok (PLUS, insn, operands)"
[(set (match_dup 0)
(plus:DI (match_dup 1)
(match_dup 2)))]
@@ -6393,9 +6420,9 @@
(define_insn "*addsi_1"
- [(set (match_operand:SI 0 "nonimmediate_operand" "=r,rm,r")
- (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0,r")
- (match_operand:SI 2 "general_operand" "g,ri,li")))
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=r,rm,r,r")
+ (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0,r,r")
+ (match_operand:SI 2 "general_operand" "g,ri,0,li")))
(clobber (reg:CC FLAGS_REG))]
"ix86_binary_operator_ok (PLUS, SImode, operands)"
{
@@ -6416,6 +6443,10 @@
}
default:
+ /* Use add as much as possible to replace lea for AGU optimization. */
+ if (which_alternative == 2 && TARGET_OPT_AGU)
+ return "add{l}\t{%1, %0|%0, %1}";
+
gcc_assert (rtx_equal_p (operands[0], operands[1]));
/* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
@@ -6432,7 +6463,10 @@
}
}
[(set (attr "type")
- (cond [(eq_attr "alternative" "2")
+ (cond [(and (eq_attr "alternative" "2")
+ (eq (symbol_ref "TARGET_OPT_AGU") (const_int 0)))
+ (const_string "lea")
+ (eq_attr "alternative" "3")
(const_string "lea")
; Current assemblers are broken and do not allow @GOTOFF in
; ought but a memory context.
@@ -6450,8 +6484,7 @@
(plus (match_operand 1 "register_operand" "")
(match_operand 2 "nonmemory_operand" "")))
(clobber (reg:CC FLAGS_REG))]
- "reload_completed
- && true_regnum (operands[0]) != true_regnum (operands[1])"
+ "reload_completed && ix86_lea_for_add_ok (PLUS, insn, operands)"
[(const_int 0)]
{
rtx pat;
@@ -7552,6 +7585,7 @@
"TARGET_64BIT && ix86_binary_operator_ok (MINUS, DImode, operands)"
"sbb{q}\t{%2, %0|%0, %2}"
[(set_attr "type" "alu")
+ (set_attr "use_carry" "1")
(set_attr "pent_pair" "pu")
(set_attr "mode" "DI")])
@@ -7600,6 +7634,7 @@
"ix86_binary_operator_ok (MINUS, QImode, operands)"
"sbb{b}\t{%2, %0|%0, %2}"
[(set_attr "type" "alu")
+ (set_attr "use_carry" "1")
(set_attr "pent_pair" "pu")
(set_attr "mode" "QI")])
@@ -7612,6 +7647,7 @@
"ix86_binary_operator_ok (MINUS, HImode, operands)"
"sbb{w}\t{%2, %0|%0, %2}"
[(set_attr "type" "alu")
+ (set_attr "use_carry" "1")
(set_attr "pent_pair" "pu")
(set_attr "mode" "HI")])
@@ -7624,6 +7660,7 @@
"ix86_binary_operator_ok (MINUS, SImode, operands)"
"sbb{l}\t{%2, %0|%0, %2}"
[(set_attr "type" "alu")
+ (set_attr "use_carry" "1")
(set_attr "pent_pair" "pu")
(set_attr "mode" "SI")])
@@ -15162,7 +15199,7 @@
? gen_rtx_REG (XCmode, FIRST_FLOAT_REG) : NULL),
operands[0], const0_rtx,
GEN_INT ((TARGET_64BIT
- ? (DEFAULT_ABI == SYSV_ABI
+ ? (ix86_abi == SYSV_ABI
? X86_64_SSE_REGPARM_MAX
: X64_SSE_REGPARM_MAX)
: X86_32_SSE_REGPARM_MAX)
@@ -15242,6 +15279,7 @@
"reload_completed"
"ret"
[(set_attr "length" "1")
+ (set_attr "atom_unit" "jeu")
(set_attr "length_immediate" "0")
(set_attr "modrm" "0")])
@@ -15254,6 +15292,7 @@
"reload_completed"
"rep\;ret"
[(set_attr "length" "1")
+ (set_attr "atom_unit" "jeu")
(set_attr "length_immediate" "0")
(set_attr "prefix_rep" "1")
(set_attr "modrm" "0")])
@@ -15264,6 +15303,7 @@
"reload_completed"
"ret\t%0"
[(set_attr "length" "3")
+ (set_attr "atom_unit" "jeu")
(set_attr "length_immediate" "2")
(set_attr "modrm" "0")])
@@ -15612,7 +15652,7 @@
(bswap:SI (match_operand:SI 1 "register_operand" "")))]
""
{
- if (!TARGET_BSWAP)
+ if (!(TARGET_BSWAP || TARGET_MOVBE))
{
rtx x = operands[0];
@@ -15624,6 +15664,21 @@
}
})
+(define_insn "*bswapsi_movbe"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=r,r,m")
+ (bswap:SI (match_operand:SI 1 "nonimmediate_operand" "0,m,r")))]
+ "TARGET_MOVBE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "@
+ bswap\t%0
+ movbe\t{%1, %0|%0, %1}
+ movbe\t{%1, %0|%0, %1}"
+ [(set_attr "type" "*,imov,imov")
+ (set_attr "modrm" "*,1,1")
+ (set_attr "prefix_0f" "1")
+ (set_attr "prefix_extra" "*,1,1")
+ (set_attr "length" "2,*,*")
+ (set_attr "mode" "SI")])
+
(define_insn "*bswapsi_1"
[(set (match_operand:SI 0 "register_operand" "=r")
(bswap:SI (match_operand:SI 1 "register_operand" "0")))]
@@ -15652,7 +15707,29 @@
[(set_attr "length" "4")
(set_attr "mode" "HI")])
-(define_insn "bswapdi2"
+(define_expand "bswapdi2"
+ [(set (match_operand:DI 0 "register_operand" "")
+ (bswap:DI (match_operand:DI 1 "register_operand" "")))]
+ "TARGET_64BIT"
+ "")
+
+(define_insn "*bswapdi_movbe"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,m")
+ (bswap:DI (match_operand:DI 1 "nonimmediate_operand" "0,m,r")))]
+ "TARGET_64BIT && TARGET_MOVBE
+ && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "@
+ bswap\t%0
+ movbe\t{%1, %0|%0, %1}
+ movbe\t{%1, %0|%0, %1}"
+ [(set_attr "type" "*,imov,imov")
+ (set_attr "modrm" "*,1,1")
+ (set_attr "prefix_0f" "1")
+ (set_attr "prefix_extra" "*,1,1")
+ (set_attr "length" "3,*,*")
+ (set_attr "mode" "DI")])
+
+(define_insn "*bswapdi_1"
[(set (match_operand:DI 0 "register_operand" "=r")
(bswap:DI (match_operand:DI 1 "register_operand" "0")))]
"TARGET_64BIT"
@@ -16380,6 +16457,7 @@
"TARGET_SSE_MATH"
"%vrcpss\t{%1, %d0|%d0, %1}"
[(set_attr "type" "sse")
+ (set_attr "atom_sse_attr" "rcp")
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "SF")])
@@ -16731,6 +16809,7 @@
"TARGET_SSE_MATH"
"%vrsqrtss\t{%1, %d0|%d0, %1}"
[(set_attr "type" "sse")
+ (set_attr "atom_sse_attr" "rcp")
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "SF")])
@@ -16751,6 +16830,7 @@
"SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
"%vsqrts<ssemodefsuffix>\t{%1, %d0|%d0, %1}"
[(set_attr "type" "sse")
+ (set_attr "atom_sse_attr" "sqrt")
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "<MODE>")
(set_attr "athlon_decode" "*")
@@ -19807,6 +19887,7 @@
; Since we don't have the proper number of operands for an alu insn,
; fill in all the blanks.
[(set_attr "type" "alu")
+ (set_attr "use_carry" "1")
(set_attr "pent_pair" "pu")
(set_attr "memory" "none")
(set_attr "imm_disp" "false")
@@ -19822,6 +19903,7 @@
""
"sbb{q}\t%0, %0"
[(set_attr "type" "alu")
+ (set_attr "use_carry" "1")
(set_attr "pent_pair" "pu")
(set_attr "memory" "none")
(set_attr "imm_disp" "false")
@@ -19865,6 +19947,7 @@
; Since we don't have the proper number of operands for an alu insn,
; fill in all the blanks.
[(set_attr "type" "alu")
+ (set_attr "use_carry" "1")
(set_attr "pent_pair" "pu")
(set_attr "memory" "none")
(set_attr "imm_disp" "false")
@@ -19880,6 +19963,7 @@
""
"sbb{l}\t%0, %0"
[(set_attr "type" "alu")
+ (set_attr "use_carry" "1")
(set_attr "pent_pair" "pu")
(set_attr "memory" "none")
(set_attr "imm_disp" "false")
@@ -20212,7 +20296,8 @@
}
}
[(set (attr "type")
- (cond [(eq_attr "alternative" "0")
+ (cond [(and (eq_attr "alternative" "0")
+ (eq (symbol_ref "TARGET_OPT_AGU") (const_int 0)))
(const_string "alu")
(match_operand:SI 2 "const0_operand" "")
(const_string "imov")
@@ -20255,7 +20340,8 @@
}
}
[(set (attr "type")
- (cond [(eq_attr "alternative" "0")
+ (cond [(and (eq_attr "alternative" "0")
+ (eq (symbol_ref "TARGET_OPT_AGU") (const_int 0)))
(const_string "alu")
(match_operand:DI 2 "const0_operand" "")
(const_string "imov")
@@ -21753,6 +21839,7 @@
return patterns[locality];
}
[(set_attr "type" "sse")
+ (set_attr "atom_sse_attr" "prefetch")
(set_attr "memory" "none")])
(define_insn "*prefetch_sse_rex"
@@ -21771,6 +21858,7 @@
return patterns[locality];
}
[(set_attr "type" "sse")
+ (set_attr "atom_sse_attr" "prefetch")
(set_attr "memory" "none")])
(define_insn "*prefetch_3dnow"
@@ -21972,6 +22060,93 @@
(set_attr "prefix_extra" "1")
(set_attr "mode" "DI")])
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; LWP instructions
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_expand "lwp_llwpcb"
+ [(unspec_volatile [(match_operand 0 "register_operand" "r")]
+ UNSPECV_LLWP_INTRINSIC)]
+ "TARGET_LWP"
+ "")
+
+(define_insn "*lwp_llwpcb<mode>1"
+ [(unspec_volatile [(match_operand:P 0 "register_operand" "r")]
+ UNSPECV_LLWP_INTRINSIC)]
+ "TARGET_LWP"
+ "llwpcb\t%0"
+ [(set_attr "type" "lwp")
+ (set_attr "mode" "<MODE>")
+ (set_attr "length" "5")])
+
+(define_expand "lwp_slwpcb"
+ [(set (match_operand 0 "register_operand" "=r")
+ (unspec_volatile [(const_int 0)] UNSPECV_SLWP_INTRINSIC))]
+ "TARGET_LWP"
+ {
+ if (TARGET_64BIT)
+ emit_insn (gen_lwp_slwpcbdi (operands[0]));
+ else
+ emit_insn (gen_lwp_slwpcbsi (operands[0]));
+ DONE;
+ })
+
+(define_insn "lwp_slwpcb<mode>"
+ [(set (match_operand:P 0 "register_operand" "=r")
+ (unspec_volatile:P [(const_int 0)] UNSPECV_SLWP_INTRINSIC))]
+ "TARGET_LWP"
+ "slwpcb\t%0"
+ [(set_attr "type" "lwp")
+ (set_attr "mode" "<MODE>")
+ (set_attr "length" "5")])
+
+(define_expand "lwp_lwpval<mode>3"
+ [(unspec_volatile [(match_operand:SWI48 1 "register_operand" "r")
+ (match_operand:SI 2 "nonimmediate_operand" "rm")
+ (match_operand:SI 3 "const_int_operand" "i")]
+ UNSPECV_LWPVAL_INTRINSIC)]
+ "TARGET_LWP"
+ "/* Avoid unused variable warning. */
+ (void) operand0;")
+
+(define_insn "*lwp_lwpval<mode>3_1"
+ [(unspec_volatile [(match_operand:SWI48 0 "register_operand" "r")
+ (match_operand:SI 1 "nonimmediate_operand" "rm")
+ (match_operand:SI 2 "const_int_operand" "i")]
+ UNSPECV_LWPVAL_INTRINSIC)]
+ "TARGET_LWP"
+ "lwpval\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "lwp")
+ (set_attr "mode" "<MODE>")
+ (set (attr "length")
+ (symbol_ref "ix86_attr_length_address_default (insn) + 9"))])
+
+(define_expand "lwp_lwpins<mode>3"
+ [(set (reg:CCC FLAGS_REG)
+ (unspec_volatile:CCC [(match_operand:SWI48 1 "register_operand" "r")
+ (match_operand:SI 2 "nonimmediate_operand" "rm")
+ (match_operand:SI 3 "const_int_operand" "i")]
+ UNSPECV_LWPINS_INTRINSIC))
+ (set (match_operand:QI 0 "nonimmediate_operand" "=qm")
+ (eq:QI (reg:CCC FLAGS_REG) (const_int 0)))]
+ "TARGET_LWP"
+ "")
+
+(define_insn "*lwp_lwpins<mode>3_1"
+ [(set (reg:CCC FLAGS_REG)
+ (unspec_volatile:CCC [(match_operand:SWI48 0 "register_operand" "r")
+ (match_operand:SI 1 "nonimmediate_operand" "rm")
+ (match_operand:SI 2 "const_int_operand" "i")]
+ UNSPECV_LWPINS_INTRINSIC))]
+ "TARGET_LWP"
+ "lwpins\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "lwp")
+ (set_attr "mode" "<MODE>")
+ (set (attr "length")
+ (symbol_ref "ix86_attr_length_address_default (insn) + 9"))])
+
(include "mmx.md")
(include "sse.md")
(include "sync.md")
diff --git a/gcc-4.4.3/gcc/config/i386/i386.opt b/gcc-4.4.3/gcc/config/i386/i386.opt
index 05727be7b..c2562e48d 100644
--- a/gcc-4.4.3/gcc/config/i386/i386.opt
+++ b/gcc-4.4.3/gcc/config/i386/i386.opt
@@ -232,6 +232,10 @@ mtune=
Target RejectNegative Joined Var(ix86_tune_string)
Schedule code for given CPU
+mabi=
+Target RejectNegative Joined Var(ix86_abi_string)
+Generate code that conforms to the given ABI
+
mveclibabi=
Target RejectNegative Joined Var(ix86_veclibabi_string)
Vector library ABI to use
@@ -323,6 +327,10 @@ msse5
Target Report Mask(ISA_SSE5) Var(ix86_isa_flags) VarExists Save
Support SSE5 built-in functions and code generation
+mlwp
+Target Report Mask(ISA_LWP) Var(ix86_isa_flags) VarExists Save
+Support LWP built-in functions and code generation
+
mabm
Target Report Mask(ISA_ABM) Var(ix86_isa_flags) VarExists Save
Support code generation of Advanced Bit Manipulation (ABM) instructions.
@@ -339,6 +347,10 @@ msahf
Target Report Mask(ISA_SAHF) Var(ix86_isa_flags) VarExists Save
Support code generation of sahf instruction in 64bit x86-64 code.
+mmovbe
+Target Report Mask(ISA_MOVBE) Var(ix86_isa_flags) VarExists Save
+Support code generation of movbe instruction.
+
maes
Target Report Mask(ISA_AES) Var(ix86_isa_flags) VarExists Save
Support AES built-in functions and code generation
diff --git a/gcc-4.4.3/gcc/config/i386/linux-unwind.h b/gcc-4.4.3/gcc/config/i386/linux-unwind.h
index 89f238a6e..d73f354ba 100644
--- a/gcc-4.4.3/gcc/config/i386/linux-unwind.h
+++ b/gcc-4.4.3/gcc/config/i386/linux-unwind.h
@@ -106,7 +106,7 @@ x86_64_fallback_frame_state (struct _Unwind_Context *context,
signal-turned-exceptions for them. There's also no configure-run for
the target, so we can't check on (e.g.) HAVE_SYS_UCONTEXT_H. Using the
target libc version macro should be enough. */
-#if !(__GLIBC__ == 2 && __GLIBC_MINOR__ == 0)
+#if defined __GLIBC__ && !(__GLIBC__ == 2 && __GLIBC_MINOR__ == 0)
#include <signal.h>
#include <sys/ucontext.h>
diff --git a/gcc-4.4.3/gcc/config/i386/linux.h b/gcc-4.4.3/gcc/config/i386/linux.h
index 4f19b20ea..f3a98c26a 100644
--- a/gcc-4.4.3/gcc/config/i386/linux.h
+++ b/gcc-4.4.3/gcc/config/i386/linux.h
@@ -107,7 +107,7 @@ along with GCC; see the file COPYING3. If not see
#undef ASM_SPEC
#define ASM_SPEC \
- "%{v:-V} %{Qy:} %{!Qn:-Qy} %{n} %{T} %{Ym,*} %{Yd,*} %{Wa,*:%*} \
+ "%{v:-V} %{Qy:} %{!Qn:-Qy} %{n} %{T} %{Ym,*} %{Yd,*} %{Wa,*:%*} --32 \
%{!mno-sse2avx:%{mavx:-msse2avx}} %{msse2avx:%{!mavx:-msse2avx}}"
/* These may be provided by config/linux-grtev2.h. */
diff --git a/gcc-4.4.3/gcc/config/i386/linux64.h b/gcc-4.4.3/gcc/config/i386/linux64.h
index 9fe1d0aca..17d972cfb 100644
--- a/gcc-4.4.3/gcc/config/i386/linux64.h
+++ b/gcc-4.4.3/gcc/config/i386/linux64.h
@@ -64,11 +64,6 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
#define GLIBC_DYNAMIC_LINKER32 RUNTIME_ROOT_PREFIX "/lib/ld-linux.so.2"
#define GLIBC_DYNAMIC_LINKER64 RUNTIME_ROOT_PREFIX "/lib64/ld-linux-x86-64.so.2"
-#undef ASM_SPEC
-#define ASM_SPEC "%{v:-V} %{Qy:} %{!Qn:-Qy} %{n} %{T} %{Ym,*} %{Yd,*} \
- %{Wa,*:%*} %{m32:--32} %{m64:--64} \
- %{!mno-sse2avx:%{mavx:-msse2avx}} %{msse2avx:%{!mavx:-msse2avx}}"
-
#if TARGET_64BIT_DEFAULT
#define SPEC_32 "m32"
#define SPEC_64 "!m32"
@@ -77,6 +72,11 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
#define SPEC_64 "m64"
#endif
+#undef ASM_SPEC
+#define ASM_SPEC "%{v:-V} %{Qy:} %{!Qn:-Qy} %{n} %{T} %{Ym,*} %{Yd,*} \
+ %{Wa,*:%*} %{" SPEC_32 ":--32} %{" SPEC_64 ":--64} \
+ %{!mno-sse2avx:%{mavx:-msse2avx}} %{msse2avx:%{!mavx:-msse2avx}}"
+
#undef LINK_SPEC
#define LINK_SPEC "%{" SPEC_64 ":-m elf_x86_64} %{" SPEC_32 ":-m elf_i386} \
%{shared:-shared} \
diff --git a/gcc-4.4.3/gcc/config/i386/lwpintrin.h b/gcc-4.4.3/gcc/config/i386/lwpintrin.h
new file mode 100644
index 000000000..954b039e5
--- /dev/null
+++ b/gcc-4.4.3/gcc/config/i386/lwpintrin.h
@@ -0,0 +1,100 @@
+/* Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _X86INTRIN_H_INCLUDED
+# error "Never use <lwpintrin.h> directly; include <x86intrin.h> instead."
+#endif
+
+#ifndef _LWPINTRIN_H_INCLUDED
+#define _LWPINTRIN_H_INCLUDED
+
+#ifndef __LWP__
+# error "LWP instruction set not enabled"
+#else
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__llwpcb (void *pcbAddress)
+{
+ __builtin_ia32_llwpcb (pcbAddress);
+}
+
+extern __inline void * __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__slwpcb (void)
+{
+ return __builtin_ia32_slwpcb ();
+}
+
+#ifdef __OPTIMIZE__
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__lwpval32 (unsigned int data2, unsigned int data1, unsigned int flags)
+{
+ __builtin_ia32_lwpval32 (data2, data1, flags);
+}
+
+#ifdef __x86_64__
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__lwpval64 (unsigned long long data2, unsigned int data1, unsigned int flags)
+{
+ __builtin_ia32_lwpval64 (data2, data1, flags);
+}
+#endif
+#else
+#define __lwpval32(D2, D1, F) \
+ (__builtin_ia32_lwpval32 ((unsigned int) (D2), (unsigned int) (D1), \
+ (unsigned int) (F)))
+#ifdef __x86_64__
+#define __lwpval64(D2, D1, F) \
+ (__builtin_ia32_lwpval64 ((unsigned long long) (D2), (unsigned int) (D1), \
+ (unsigned int) (F)))
+#endif
+#endif
+
+
+#ifdef __OPTIMIZE__
+extern __inline unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__lwpins32 (unsigned int data2, unsigned int data1, unsigned int flags)
+{
+ return __builtin_ia32_lwpins32 (data2, data1, flags);
+}
+
+#ifdef __x86_64__
+extern __inline unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__lwpins64 (unsigned long long data2, unsigned int data1, unsigned int flags)
+{
+ return __builtin_ia32_lwpins64 (data2, data1, flags);
+}
+#endif
+#else
+#define __lwpins32(D2, D1, F) \
+ (__builtin_ia32_lwpins32 ((unsigned int) (D2), (unsigned int) (D1), \
+ (unsigned int) (F)))
+#ifdef __x86_64__
+#define __lwpins64(D2, D1, F) \
+ (__builtin_ia32_lwpins64 ((unsigned long long) (D2), (unsigned int) (D1), \
+ (unsigned int) (F)))
+#endif
+#endif
+
+#endif /* __LWP__ */
+
+#endif /* _LWPINTRIN_H_INCLUDED */
diff --git a/gcc-4.4.3/gcc/config/i386/mingw32.h b/gcc-4.4.3/gcc/config/i386/mingw32.h
index f3fbe8c58..746d7d105 100644
--- a/gcc-4.4.3/gcc/config/i386/mingw32.h
+++ b/gcc-4.4.3/gcc/config/i386/mingw32.h
@@ -38,7 +38,7 @@ along with GCC; see the file COPYING3. If not see
builtin_define_std ("WINNT"); \
builtin_define_with_int_value ("_INTEGRAL_MAX_BITS", \
TYPE_PRECISION (intmax_type_node));\
- if (TARGET_64BIT && DEFAULT_ABI == MS_ABI) \
+ if (TARGET_64BIT && ix86_abi == MS_ABI) \
{ \
builtin_define ("__MINGW64__"); \
builtin_define_std ("WIN64"); \
diff --git a/gcc-4.4.3/gcc/config/i386/ppro.md b/gcc-4.4.3/gcc/config/i386/ppro.md
index 5e163d829..20f457ab1 100644
--- a/gcc-4.4.3/gcc/config/i386/ppro.md
+++ b/gcc-4.4.3/gcc/config/i386/ppro.md
@@ -731,7 +731,7 @@
(define_insn_reservation "ppro_insn" 1
(and (eq_attr "cpu" "pentiumpro")
(and (eq_attr "memory" "none,unknown")
- (eq_attr "type" "alu,alu1,negnot,incdec,icmp,test,setcc,icmov,push,pop,fxch,sseiadd,sseishft,sseimul,mmx,mmxadd,mmxcmp")))
+ (eq_attr "type" "alu,alu1,negnot,incdec,icmp,test,setcc,icmov,push,pop,fxch,sseiadd,sseishft,sseishft1,sseimul,mmx,mmxadd,mmxcmp")))
"decodern,(p0|p1)")
;; read-modify and register-memory instructions have 2 or three uops,
@@ -739,13 +739,13 @@
(define_insn_reservation "ppro_insn_load" 3
(and (eq_attr "cpu" "pentiumpro")
(and (eq_attr "memory" "load")
- (eq_attr "type" "alu,alu1,negnot,incdec,icmp,test,setcc,icmov,push,pop,fxch,sseiadd,sseishft,sseimul,mmx,mmxadd,mmxcmp")))
+ (eq_attr "type" "alu,alu1,negnot,incdec,icmp,test,setcc,icmov,push,pop,fxch,sseiadd,sseishft,sseishft1,sseimul,mmx,mmxadd,mmxcmp")))
"decoder0,p2+(p0|p1)")
(define_insn_reservation "ppro_insn_store" 1
(and (eq_attr "cpu" "pentiumpro")
(and (eq_attr "memory" "store")
- (eq_attr "type" "alu,alu1,negnot,incdec,icmp,test,setcc,icmov,push,pop,fxch,sseiadd,sseishft,sseimul,mmx,mmxadd,mmxcmp")))
+ (eq_attr "type" "alu,alu1,negnot,incdec,icmp,test,setcc,icmov,push,pop,fxch,sseiadd,sseishft,sseishft1,sseimul,mmx,mmxadd,mmxcmp")))
"decoder0,(p0|p1),p4+p3")
;; read-modify-store instructions produce 4 uops so they have to be
@@ -753,6 +753,6 @@
(define_insn_reservation "ppro_insn_both" 4
(and (eq_attr "cpu" "pentiumpro")
(and (eq_attr "memory" "both")
- (eq_attr "type" "alu,alu1,negnot,incdec,icmp,test,setcc,icmov,push,pop,fxch,sseiadd,sseishft,sseimul,mmx,mmxadd,mmxcmp")))
+ (eq_attr "type" "alu,alu1,negnot,incdec,icmp,test,setcc,icmov,push,pop,fxch,sseiadd,sseishft,sseishft1,sseimul,mmx,mmxadd,mmxcmp")))
"decoder0,p2+(p0|p1),p4+p3")
diff --git a/gcc-4.4.3/gcc/config/i386/sse.md b/gcc-4.4.3/gcc/config/i386/sse.md
index ea5fb3440..cae9eed3b 100644
--- a/gcc-4.4.3/gcc/config/i386/sse.md
+++ b/gcc-4.4.3/gcc/config/i386/sse.md
@@ -342,6 +342,7 @@
&& !(MEM_P (operands[0]) && MEM_P (operands[1]))"
"vmovup<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
+ (set_attr "movu" "1")
(set_attr "prefix" "vex")
(set_attr "mode" "<MODE>")])
@@ -367,6 +368,7 @@
&& !(MEM_P (operands[0]) && MEM_P (operands[1]))"
"movup<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
+ (set_attr "movu" "1")
(set_attr "mode" "<MODE>")])
(define_insn "avx_movdqu<avxmodesuffix>"
@@ -377,6 +379,7 @@
"TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
"vmovdqu\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
+ (set_attr "movu" "1")
(set_attr "prefix" "vex")
(set_attr "mode" "<avxvecmode>")])
@@ -387,6 +390,7 @@
"TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
"movdqu\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
+ (set_attr "movu" "1")
(set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
@@ -428,7 +432,7 @@
UNSPEC_MOVNT))]
"TARGET_SSE2"
"movntdq\t{%1, %0|%0, %1}"
- [(set_attr "type" "ssecvt")
+ [(set_attr "type" "ssemov")
(set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
@@ -438,7 +442,7 @@
UNSPEC_MOVNT))]
"TARGET_SSE2"
"movnti\t{%1, %0|%0, %1}"
- [(set_attr "type" "ssecvt")
+ [(set_attr "type" "ssemov")
(set_attr "mode" "V2DF")])
(define_insn "avx_lddqu<avxmodesuffix>"
@@ -449,6 +453,7 @@
"TARGET_AVX"
"vlddqu\t{%1, %0|%0, %1}"
[(set_attr "type" "ssecvt")
+ (set_attr "movu" "1")
(set_attr "prefix" "vex")
(set_attr "mode" "<avxvecmode>")])
@@ -458,7 +463,8 @@
UNSPEC_LDDQU))]
"TARGET_SSE3"
"lddqu\t{%1, %0|%0, %1}"
- [(set_attr "type" "ssecvt")
+ [(set_attr "type" "ssemov")
+ (set_attr "movu" "1")
(set_attr "prefix_rep" "1")
(set_attr "mode" "TI")])
@@ -765,6 +771,7 @@
"TARGET_SSE"
"%vrcpps\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")
+ (set_attr "atom_sse_attr" "rcp")
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "V4SF")])
@@ -791,6 +798,7 @@
"TARGET_SSE"
"rcpss\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")
+ (set_attr "atom_sse_attr" "rcp")
(set_attr "mode" "SF")])
(define_expand "sqrtv8sf2"
@@ -836,6 +844,7 @@
"TARGET_SSE"
"%vsqrtps\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")
+ (set_attr "atom_sse_attr" "sqrt")
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "V4SF")])
@@ -880,6 +889,7 @@
"SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
"sqrts<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")
+ (set_attr "atom_sse_attr" "sqrt")
(set_attr "mode" "<ssescalarmode>")])
(define_expand "rsqrtv8sf2"
@@ -1043,7 +1053,7 @@
(const_int 1)))]
"SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
"<maxminfprefix>s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
- [(set_attr "type" "sse")
+ [(set_attr "type" "sseadd")
(set_attr "mode" "<ssescalarmode>")])
;; These versions of the min/max patterns implement exactly the operations
@@ -1179,6 +1189,7 @@
"TARGET_SSE3"
"addsubpd\t{%2, %0|%0, %2}"
[(set_attr "type" "sseadd")
+ (set_attr "atom_unit" "complex")
(set_attr "mode" "V2DF")])
(define_insn "avx_h<plusminus_insn>v4df3"
@@ -1302,6 +1313,7 @@
"TARGET_SSE3"
"h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}"
[(set_attr "type" "sseadd")
+ (set_attr "atom_unit" "complex")
(set_attr "prefix_rep" "1")
(set_attr "mode" "V4SF")])
@@ -5069,6 +5081,7 @@
"TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
"pmaddwd\t{%2, %0|%0, %2}"
[(set_attr "type" "sseiadd")
+ (set_attr "atom_unit" "simul")
(set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
@@ -7018,7 +7031,7 @@
vpsrldq\t{$8, %1, %0|%0, %1, 8}
vmovq\t{%H1, %0|%0, %H1}
vmov{q}\t{%H1, %0|%0, %H1}"
- [(set_attr "type" "ssemov,sseishft,ssemov,imov")
+ [(set_attr "type" "ssemov,sseishft1,ssemov,imov")
(set_attr "memory" "*,none,*,*")
(set_attr "prefix" "vex")
(set_attr "mode" "V2SF,TI,TI,DI")])
@@ -7034,7 +7047,7 @@
psrldq\t{$8, %0|%0, 8}
movq\t{%H1, %0|%0, %H1}
mov{q}\t{%H1, %0|%0, %H1}"
- [(set_attr "type" "ssemov,sseishft,ssemov,imov")
+ [(set_attr "type" "ssemov,sseishft1,ssemov,imov")
(set_attr "memory" "*,none,*,*")
(set_attr "mode" "V2SF,TI,TI,DI")])
@@ -7050,7 +7063,7 @@
vmovhps\t{%1, %0|%0, %1}
vpsrldq\t{$8, %1, %0|%0, %1, 8}
vmovq\t{%H1, %0|%0, %H1}"
- [(set_attr "type" "ssemov,sseishft,ssemov")
+ [(set_attr "type" "ssemov,sseishft1,ssemov")
(set_attr "memory" "*,none,*")
(set_attr "prefix" "vex")
(set_attr "mode" "V2SF,TI,TI")])
@@ -7066,7 +7079,7 @@
movhps\t{%1, %0|%0, %1}
psrldq\t{$8, %0|%0, 8}
movq\t{%H1, %0|%0, %H1}"
- [(set_attr "type" "ssemov,sseishft,ssemov")
+ [(set_attr "type" "ssemov,sseishft1,ssemov")
(set_attr "memory" "*,none,*")
(set_attr "mode" "V2SF,TI,TI")])
@@ -7624,6 +7637,7 @@
"TARGET_SSE2"
"psadbw\t{%2, %0|%0, %2}"
[(set_attr "type" "sseiadd")
+ (set_attr "atom_unit" "simul")
(set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
@@ -7645,7 +7659,7 @@
UNSPEC_MOVMSK))]
"SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
"%vmovmskp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
- [(set_attr "type" "ssecvt")
+ [(set_attr "type" "ssemov")
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "<MODE>")])
@@ -7655,7 +7669,7 @@
UNSPEC_MOVMSK))]
"TARGET_SSE2"
"%vpmovmskb\t{%1, %0|%0, %1}"
- [(set_attr "type" "ssecvt")
+ [(set_attr "type" "ssemov")
(set_attr "prefix_data16" "1")
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "SI")])
@@ -7678,7 +7692,7 @@
"TARGET_SSE2 && !TARGET_64BIT"
;; @@@ check ordering of operands in intel/nonintel syntax
"%vmaskmovdqu\t{%2, %1|%1, %2}"
- [(set_attr "type" "ssecvt")
+ [(set_attr "type" "ssemov")
(set_attr "prefix_data16" "1")
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
@@ -7692,7 +7706,7 @@
"TARGET_SSE2 && TARGET_64BIT"
;; @@@ check ordering of operands in intel/nonintel syntax
"%vmaskmovdqu\t{%2, %1|%1, %2}"
- [(set_attr "type" "ssecvt")
+ [(set_attr "type" "ssemov")
(set_attr "prefix_data16" "1")
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
@@ -7703,6 +7717,7 @@
"TARGET_SSE"
"%vldmxcsr\t%0"
[(set_attr "type" "sse")
+ (set_attr "atom_sse_attr" "mxcsr")
(set_attr "prefix" "maybe_vex")
(set_attr "memory" "load")])
@@ -7712,6 +7727,7 @@
"TARGET_SSE"
"%vstmxcsr\t%0"
[(set_attr "type" "sse")
+ (set_attr "atom_sse_attr" "mxcsr")
(set_attr "prefix" "maybe_vex")
(set_attr "memory" "store")])
@@ -7730,6 +7746,7 @@
"TARGET_SSE || TARGET_3DNOW_A"
"sfence"
[(set_attr "type" "sse")
+ (set_attr "atom_sse_attr" "fence")
(set_attr "memory" "unknown")])
(define_insn "sse2_clflush"
@@ -7738,6 +7755,7 @@
"TARGET_SSE2"
"clflush\t%a0"
[(set_attr "type" "sse")
+ (set_attr "atom_sse_attr" "fence")
(set_attr "memory" "unknown")])
(define_expand "sse2_mfence"
@@ -7755,6 +7773,7 @@
"TARGET_64BIT || TARGET_SSE2"
"mfence"
[(set_attr "type" "sse")
+ (set_attr "atom_sse_attr" "fence")
(set_attr "memory" "unknown")])
(define_expand "sse2_lfence"
@@ -7772,6 +7791,7 @@
"TARGET_SSE2"
"lfence"
[(set_attr "type" "sse")
+ (set_attr "atom_sse_attr" "lfence")
(set_attr "memory" "unknown")])
(define_insn "sse3_mwait"
@@ -7895,6 +7915,7 @@
"TARGET_SSSE3"
"phaddw\t{%2, %0|%0, %2}"
[(set_attr "type" "sseiadd")
+ (set_attr "atom_unit" "complex")
(set_attr "prefix_data16" "1")
(set_attr "prefix_extra" "1")
(set_attr "mode" "TI")])
@@ -7923,6 +7944,7 @@
"TARGET_SSSE3"
"phaddw\t{%2, %0|%0, %2}"
[(set_attr "type" "sseiadd")
+ (set_attr "atom_unit" "complex")
(set_attr "prefix_extra" "1")
(set_attr "mode" "DI")])
@@ -7977,6 +7999,7 @@
"TARGET_SSSE3"
"phaddd\t{%2, %0|%0, %2}"
[(set_attr "type" "sseiadd")
+ (set_attr "atom_unit" "complex")
(set_attr "prefix_data16" "1")
(set_attr "prefix_extra" "1")
(set_attr "mode" "TI")])
@@ -7997,6 +8020,7 @@
"TARGET_SSSE3"
"phaddd\t{%2, %0|%0, %2}"
[(set_attr "type" "sseiadd")
+ (set_attr "atom_unit" "complex")
(set_attr "prefix_extra" "1")
(set_attr "mode" "DI")])
@@ -8083,6 +8107,7 @@
"TARGET_SSSE3"
"phaddsw\t{%2, %0|%0, %2}"
[(set_attr "type" "sseiadd")
+ (set_attr "atom_unit" "complex")
(set_attr "prefix_data16" "1")
(set_attr "prefix_extra" "1")
(set_attr "mode" "TI")])
@@ -8111,6 +8136,7 @@
"TARGET_SSSE3"
"phaddsw\t{%2, %0|%0, %2}"
[(set_attr "type" "sseiadd")
+ (set_attr "atom_unit" "complex")
(set_attr "prefix_extra" "1")
(set_attr "mode" "DI")])
@@ -8197,6 +8223,7 @@
"TARGET_SSSE3"
"phsubw\t{%2, %0|%0, %2}"
[(set_attr "type" "sseiadd")
+ (set_attr "atom_unit" "complex")
(set_attr "prefix_data16" "1")
(set_attr "prefix_extra" "1")
(set_attr "mode" "TI")])
@@ -8225,6 +8252,7 @@
"TARGET_SSSE3"
"phsubw\t{%2, %0|%0, %2}"
[(set_attr "type" "sseiadd")
+ (set_attr "atom_unit" "complex")
(set_attr "prefix_extra" "1")
(set_attr "mode" "DI")])
@@ -8279,6 +8307,7 @@
"TARGET_SSSE3"
"phsubd\t{%2, %0|%0, %2}"
[(set_attr "type" "sseiadd")
+ (set_attr "atom_unit" "complex")
(set_attr "prefix_data16" "1")
(set_attr "prefix_extra" "1")
(set_attr "mode" "TI")])
@@ -8299,6 +8328,7 @@
"TARGET_SSSE3"
"phsubd\t{%2, %0|%0, %2}"
[(set_attr "type" "sseiadd")
+ (set_attr "atom_unit" "complex")
(set_attr "prefix_extra" "1")
(set_attr "mode" "DI")])
@@ -8385,6 +8415,7 @@
"TARGET_SSSE3"
"phsubsw\t{%2, %0|%0, %2}"
[(set_attr "type" "sseiadd")
+ (set_attr "atom_unit" "complex")
(set_attr "prefix_data16" "1")
(set_attr "prefix_extra" "1")
(set_attr "mode" "TI")])
@@ -8413,6 +8444,7 @@
"TARGET_SSSE3"
"phsubsw\t{%2, %0|%0, %2}"
[(set_attr "type" "sseiadd")
+ (set_attr "atom_unit" "complex")
(set_attr "prefix_extra" "1")
(set_attr "mode" "DI")])
@@ -8519,6 +8551,7 @@
"TARGET_SSSE3"
"pmaddubsw\t{%2, %0|%0, %2}"
[(set_attr "type" "sseiadd")
+ (set_attr "atom_unit" "simul")
(set_attr "prefix_data16" "1")
(set_attr "prefix_extra" "1")
(set_attr "mode" "TI")])
@@ -8557,6 +8590,7 @@
"TARGET_SSSE3"
"pmaddubsw\t{%2, %0|%0, %2}"
[(set_attr "type" "sseiadd")
+ (set_attr "atom_unit" "simul")
(set_attr "prefix_extra" "1")
(set_attr "mode" "DI")])
@@ -8764,6 +8798,7 @@
return "palignr\t{%3, %2, %0|%0, %2, %3}";
}
[(set_attr "type" "sseishft")
+ (set_attr "atom_unit" "sishuf")
(set_attr "prefix_data16" "1")
(set_attr "prefix_extra" "1")
(set_attr "mode" "TI")])
@@ -8780,6 +8815,7 @@
return "palignr\t{%3, %2, %0|%0, %2, %3}";
}
[(set_attr "type" "sseishft")
+ (set_attr "atom_unit" "sishuf")
(set_attr "prefix_extra" "1")
(set_attr "mode" "DI")])
@@ -8966,7 +9002,7 @@
UNSPEC_MOVNTDQA))]
"TARGET_SSE4_1"
"%vmovntdqa\t{%1, %0|%0, %1}"
- [(set_attr "type" "ssecvt")
+ [(set_attr "type" "ssemov")
(set_attr "prefix_extra" "1")
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
diff --git a/gcc-4.4.3/gcc/config/i386/ssemath.h b/gcc-4.4.3/gcc/config/i386/ssemath.h
new file mode 100644
index 000000000..357d6a378
--- /dev/null
+++ b/gcc-4.4.3/gcc/config/i386/ssemath.h
@@ -0,0 +1,25 @@
+/* Copyright (C) 2010
+ Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+#undef TARGET_FPMATH_DEFAULT
+#define TARGET_FPMATH_DEFAULT (TARGET_SSE2 ? FPMATH_SSE : FPMATH_387)
+
+#undef TARGET_SUBTARGET32_ISA_DEFAULT
+#define TARGET_SUBTARGET32_ISA_DEFAULT \
+ (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2)
diff --git a/gcc-4.4.3/gcc/config/i386/x86intrin.h b/gcc-4.4.3/gcc/config/i386/x86intrin.h
index d848811d3..fead76635 100644
--- a/gcc-4.4.3/gcc/config/i386/x86intrin.h
+++ b/gcc-4.4.3/gcc/config/i386/x86intrin.h
@@ -56,6 +56,10 @@
#include <bmmintrin.h>
#endif
+#ifdef __LWP__
+#include <lwpintrin.h>
+#endif
+
#if defined (__AES__) || defined (__PCLMUL__)
#include <wmmintrin.h>
#endif
diff --git a/gcc-4.4.3/gcc/config/linux-android.h b/gcc-4.4.3/gcc/config/linux-android.h
index a43bab5b0..5ca3858a2 100644
--- a/gcc-4.4.3/gcc/config/linux-android.h
+++ b/gcc-4.4.3/gcc/config/linux-android.h
@@ -20,6 +20,12 @@
along with GCC; see the file COPYING3. If not see
<http://www.gnu.org/licenses/>. */
+#define ANDROID_TARGET_OS_CPP_BUILTINS() \
+ do { \
+ if (OPTION_ANDROID) \
+ builtin_define ("__ANDROID__"); \
+ } while (0)
+
#if ANDROID_DEFAULT
# define NOANDROID "mno-android"
#else
diff --git a/gcc-4.4.3/gcc/config/linux.h b/gcc-4.4.3/gcc/config/linux.h
index 443db22bd..740ddb7be 100644
--- a/gcc-4.4.3/gcc/config/linux.h
+++ b/gcc-4.4.3/gcc/config/linux.h
@@ -42,7 +42,11 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
provides part of the support for getting C++ file-scope static
object constructed before entering `main'. */
-#if defined HAVE_LD_PIE
+#if defined (HAVE_LD_PIE) && defined (ENABLE_CRTBEGINTS)
+#define LINUX_TARGET_STARTFILE_SPEC \
+ "%{!shared: %{pg|p|profile:gcrt1.o%s;pie:Scrt1.o%s;:crt1.o%s}} crti.o%s \
+ %{static:%{pie:crtbeginTS.o%s;:crtbeginT.o%s}} %{!static:%{shared|pie:crtbeginS.o%s;:crtbegin.o%s}}"
+#elif defined (HAVE_LD_PIE) && ! defined (ENABLE_CRTBEGINTS)
#define LINUX_TARGET_STARTFILE_SPEC \
"%{!shared: %{pg|p|profile:gcrt1.o%s;pie:Scrt1.o%s;:crt1.o%s}} \
crti.o%s %{static:crtbeginT.o%s;shared|pie:crtbeginS.o%s;:crtbegin.o%s}"
@@ -96,8 +100,6 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
builtin_assert ("system=linux"); \
builtin_assert ("system=unix"); \
builtin_assert ("system=posix"); \
- if (OPTION_ANDROID) \
- builtin_define ("__ANDROID__"); \
} while (0)
#if defined(HAVE_LD_EH_FRAME_HDR)
diff --git a/gcc-4.4.3/gcc/config/mips/linux64.h b/gcc-4.4.3/gcc/config/mips/linux64.h
index 2f24dfa14..505d8b1d4 100644
--- a/gcc-4.4.3/gcc/config/mips/linux64.h
+++ b/gcc-4.4.3/gcc/config/mips/linux64.h
@@ -39,8 +39,10 @@ along with GCC; see the file COPYING3. If not see
#define GLIBC_DYNAMIC_LINKER64 "/lib64/ld.so.1"
#define GLIBC_DYNAMIC_LINKERN32 "/lib32/ld.so.1"
#define UCLIBC_DYNAMIC_LINKERN32 "/lib32/ld-uClibc.so.0"
+#define BIONIC_DYNAMIC_LINKERN32 "/system/bin/linker32"
#define LINUX_DYNAMIC_LINKERN32 \
- CHOOSE_DYNAMIC_LINKER (GLIBC_DYNAMIC_LINKERN32, UCLIBC_DYNAMIC_LINKERN32)
+ CHOOSE_DYNAMIC_LINKER (GLIBC_DYNAMIC_LINKERN32, UCLIBC_DYNAMIC_LINKERN32, \
+ BIONIC_DYNAMIC_LINKERN32)
#undef LINK_SPEC
#define LINK_SPEC "\
diff --git a/gcc-4.4.3/gcc/config/rs6000/linux64.h b/gcc-4.4.3/gcc/config/rs6000/linux64.h
index 3ccbe21c8..7ecf98742 100644
--- a/gcc-4.4.3/gcc/config/rs6000/linux64.h
+++ b/gcc-4.4.3/gcc/config/rs6000/linux64.h
@@ -156,7 +156,7 @@ extern int dot_symbols;
#endif
#define ASM_SPEC32 "-a32 %{n} %{T} %{Ym,*} %{Yd,*} \
-%{mrelocatable} %{mrelocatable-lib} %{fpic:-K PIC} %{fPIC:-K PIC} \
+%{mrelocatable} %{mrelocatable-lib} %{fpic|fPIC|fpie|fPIE:-K PIC} \
%{memb} %{!memb: %{msdata: -memb} %{msdata=eabi: -memb}} \
%{!mlittle: %{!mlittle-endian: %{!mbig: %{!mbig-endian: \
%{mcall-freebsd: -mbig} \
diff --git a/gcc-4.4.3/gcc/config/rs6000/sysv4.h b/gcc-4.4.3/gcc/config/rs6000/sysv4.h
index 339d51847..d9407e97d 100644
--- a/gcc-4.4.3/gcc/config/rs6000/sysv4.h
+++ b/gcc-4.4.3/gcc/config/rs6000/sysv4.h
@@ -883,7 +883,12 @@ SVR4_ASM_SPEC \
%{!mnewlib: %{pthread:-lpthread} %{shared:-lc} \
%{!shared: %{profile:-lc_p} %{!profile:-lc}}}"
-#ifdef HAVE_LD_PIE
+#if defined (HAVE_LD_PIE) && defined (ENABLE_CRTBEGINTS)
+#define STARTFILE_LINUX_SPEC "\
+%{!shared: %{pg|p|profile:gcrt1.o%s;pie:Scrt1.o%s;:crt1.o%s}} \
+%{mnewlib:ecrti.o%s;:crti.o%s} \
+%{static:%{pie:crtbeginTS.o%s;:crtbeginT.o%s}} %{!static:%{shared|pie:crtbeginS.o%s;:crtbegin.o%s}}"
+#elif defined (HAVE_LD_PIE) && ! defined (ENABLE_CRTBEGINTS)
#define STARTFILE_LINUX_SPEC "\
%{!shared: %{pg|p|profile:gcrt1.o%s;pie:Scrt1.o%s;:crt1.o%s}} \
%{mnewlib:ecrti.o%s;:crti.o%s} \
diff --git a/gcc-4.4.3/gcc/config/sparc/sparc.c b/gcc-4.4.3/gcc/config/sparc/sparc.c
index d007ce551..b7d94f545 100644
--- a/gcc-4.4.3/gcc/config/sparc/sparc.c
+++ b/gcc-4.4.3/gcc/config/sparc/sparc.c
@@ -6104,7 +6104,7 @@ enum rtx_code
sparc_emit_float_lib_cmp (rtx x, rtx y, enum rtx_code comparison)
{
const char *qpfunc;
- rtx slot0, slot1, result, tem, tem2;
+ rtx slot0, slot1, result, tem, tem2, libfunc;
enum machine_mode mode;
enum rtx_code new_comparison;
@@ -6167,7 +6167,8 @@ sparc_emit_float_lib_cmp (rtx x, rtx y, enum rtx_code comparison)
emit_move_insn (slot1, y);
}
- emit_library_call (gen_rtx_SYMBOL_REF (Pmode, qpfunc), LCT_NORMAL,
+ libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
+ emit_library_call (libfunc, LCT_NORMAL,
DImode, 2,
XEXP (slot0, 0), Pmode,
XEXP (slot1, 0), Pmode);
@@ -6175,7 +6176,8 @@ sparc_emit_float_lib_cmp (rtx x, rtx y, enum rtx_code comparison)
}
else
{
- emit_library_call (gen_rtx_SYMBOL_REF (Pmode, qpfunc), LCT_NORMAL,
+ libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
+ emit_library_call (libfunc, LCT_NORMAL,
SImode, 2,
x, TFmode, y, TFmode);
mode = SImode;
@@ -6186,7 +6188,7 @@ sparc_emit_float_lib_cmp (rtx x, rtx y, enum rtx_code comparison)
register so reload doesn't clobber the value if it needs
the return register for a spill reg. */
result = gen_reg_rtx (mode);
- emit_move_insn (result, hard_libcall_value (mode));
+ emit_move_insn (result, hard_libcall_value (mode, libfunc));
switch (comparison)
{