aboutsummaryrefslogtreecommitdiffstats
path: root/gcc-4.8/gcc/config/i386/i386.c
diff options
context:
space:
mode:
authorAndrew Hsieh <andrewhsieh@google.com>2014-06-18 13:00:04 -0700
committerPavel Chupin <pavel.v.chupin@intel.com>2014-06-19 22:38:42 +0400
commitf190d6284359da8ae8694b2d2e14b01602a959ed (patch)
treed4e0548e7cec02d60b1082368032e66a1c509a02 /gcc-4.8/gcc/config/i386/i386.c
parent4ff2f42147bc128ce38789071d98e55844cd3a5e (diff)
downloadtoolchain_gcc-f190d6284359da8ae8694b2d2e14b01602a959ed.tar.gz
toolchain_gcc-f190d6284359da8ae8694b2d2e14b01602a959ed.tar.bz2
toolchain_gcc-f190d6284359da8ae8694b2d2e14b01602a959ed.zip
Merge GCC 4.8.3
Change-Id: I0abe59f7705b3eccc6b2f123af75b2e30917696a
Diffstat (limited to 'gcc-4.8/gcc/config/i386/i386.c')
-rw-r--r--gcc-4.8/gcc/config/i386/i386.c614
1 files changed, 338 insertions, 276 deletions
diff --git a/gcc-4.8/gcc/config/i386/i386.c b/gcc-4.8/gcc/config/i386/i386.c
index 98dc2e41e..0569828f3 100644
--- a/gcc-4.8/gcc/config/i386/i386.c
+++ b/gcc-4.8/gcc/config/i386/i386.c
@@ -1378,7 +1378,7 @@ struct processor_costs nocona_cost = {
8, /* MMX or SSE register to integer */
8, /* size of l1 cache. */
1024, /* size of l2 cache. */
- 128, /* size of prefetch block */
+ 64, /* size of prefetch block */
8, /* number of parallel prefetches */
1, /* Branch cost */
COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
@@ -1894,10 +1894,10 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_AMDFAM10 | m_BDVER | m_GENERIC,
/* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
- m_COREI7 | m_AMDFAM10 | m_BDVER | m_BTVER,
+ m_COREI7 | m_HASWELL | m_AMDFAM10 | m_BDVER | m_BTVER,
/* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
- m_COREI7 | m_BDVER,
+ m_COREI7 | m_HASWELL| m_BDVER,
/* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
m_BDVER ,
@@ -2409,6 +2409,7 @@ static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
/* Processor target table, indexed by processor number */
struct ptt
{
+ const char *const name; /* processor name */
const struct processor_costs *cost; /* Processor costs */
const int align_loop; /* Default alignments. */
const int align_loop_max_skip;
@@ -2417,66 +2418,31 @@ struct ptt
const int align_func;
};
+/* This table must be in sync with enum processor_type in i386.h. */
static const struct ptt processor_target_table[PROCESSOR_max] =
{
- {&i386_cost, 4, 3, 4, 3, 4},
- {&i486_cost, 16, 15, 16, 15, 16},
- {&pentium_cost, 16, 7, 16, 7, 16},
- {&pentiumpro_cost, 16, 15, 16, 10, 16},
- {&geode_cost, 0, 0, 0, 0, 0},
- {&k6_cost, 32, 7, 32, 7, 32},
- {&athlon_cost, 16, 7, 16, 7, 16},
- {&pentium4_cost, 0, 0, 0, 0, 0},
- {&k8_cost, 16, 7, 16, 7, 16},
- {&nocona_cost, 0, 0, 0, 0, 0},
- /* Core 2 */
- {&core_cost, 16, 10, 16, 10, 16},
- /* Core i7 */
- {&core_cost, 16, 10, 16, 10, 16},
- /* Core avx2 */
- {&core_cost, 16, 10, 16, 10, 16},
- {&generic32_cost, 16, 7, 16, 7, 16},
- {&generic64_cost, 16, 10, 16, 10, 16},
- {&amdfam10_cost, 32, 24, 32, 7, 32},
- {&bdver1_cost, 16, 10, 16, 7, 11},
- {&bdver2_cost, 16, 10, 16, 7, 11},
- {&bdver3_cost, 16, 10, 16, 7, 11},
- {&btver1_cost, 16, 10, 16, 7, 11},
- {&btver2_cost, 16, 10, 16, 7, 11},
- {&atom_cost, 16, 15, 16, 7, 16}
-};
-
-static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
-{
- "generic",
- "i386",
- "i486",
- "pentium",
- "pentium-mmx",
- "pentiumpro",
- "pentium2",
- "pentium3",
- "pentium4",
- "pentium-m",
- "prescott",
- "nocona",
- "core2",
- "corei7",
- "core-avx2",
- "atom",
- "geode",
- "k6",
- "k6-2",
- "k6-3",
- "athlon",
- "athlon-4",
- "k8",
- "amdfam10",
- "bdver1",
- "bdver2",
- "bdver3",
- "btver1",
- "btver2"
+ {"generic", &generic32_cost, 16, 7, 16, 7, 16},
+ {"generic", &generic64_cost, 16, 10, 16, 10, 16},
+ {"i386", &i386_cost, 4, 3, 4, 3, 4},
+ {"i486", &i486_cost, 16, 15, 16, 15, 16},
+ {"pentium", &pentium_cost, 16, 7, 16, 7, 16},
+ {"pentiumpro", &pentiumpro_cost, 16, 15, 16, 10, 16},
+ {"pentium4", &pentium4_cost, 0, 0, 0, 0, 0},
+ {"nocona", &nocona_cost, 0, 0, 0, 0, 0},
+ {"core2", &core_cost, 16, 10, 16, 10, 16},
+ {"corei7", &core_cost, 16, 10, 16, 10, 16},
+ {"core-avx2", &core_cost, 16, 10, 16, 10, 16},
+ {"atom", &atom_cost, 16, 15, 16, 7, 16},
+ {"geode", &geode_cost, 0, 0, 0, 0, 0},
+ {"k6", &k6_cost, 32, 7, 32, 7, 32},
+ {"athlon", &athlon_cost, 16, 7, 16, 7, 16},
+ {"k8", &k8_cost, 16, 7, 16, 7, 16},
+ {"amdfam10", &amdfam10_cost, 32, 24, 32, 7, 32},
+ {"bdver1", &bdver1_cost, 16, 10, 16, 7, 11},
+ {"bdver2", &bdver2_cost, 16, 10, 16, 7, 11},
+ {"bdver3", &bdver3_cost, 16, 10, 16, 7, 11},
+ {"btver1", &btver1_cost, 16, 10, 16, 7, 11},
+ {"btver2", &btver2_cost, 16, 10, 16, 7, 11}
};
static bool
@@ -2921,7 +2887,7 @@ ix86_option_override_internal (bool main_args_p)
| PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX | PTA_AVX2
| PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL | PTA_FSGSBASE
| PTA_RDRND | PTA_F16C | PTA_BMI | PTA_BMI2 | PTA_LZCNT
- | PTA_FMA | PTA_MOVBE | PTA_RTM | PTA_HLE | PTA_FXSR | PTA_XSAVE
+ | PTA_FMA | PTA_MOVBE | PTA_HLE | PTA_FXSR | PTA_XSAVE
| PTA_XSAVEOPT},
{"atom", PROCESSOR_ATOM, CPU_ATOM,
PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
@@ -2984,7 +2950,7 @@ ix86_option_override_internal (bool main_args_p)
{"bdver3", PROCESSOR_BDVER3, CPU_BDVER3,
PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
| PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
- | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
+ | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
| PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
| PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
| PTA_XSAVEOPT},
@@ -3126,7 +3092,8 @@ ix86_option_override_internal (bool main_args_p)
ix86_tune_string = ix86_arch_string;
if (!ix86_tune_string)
{
- ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
+ ix86_tune_string
+ = processor_target_table[TARGET_CPU_DEFAULT].name;
ix86_tune_defaulted = 1;
}
@@ -4092,19 +4059,15 @@ ix86_function_specific_print (FILE *file, int indent,
= ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
NULL, NULL, ptr->x_ix86_fpmath, false);
+ gcc_assert (ptr->arch < PROCESSOR_max);
fprintf (file, "%*sarch = %d (%s)\n",
indent, "",
- ptr->arch,
- ((ptr->arch < TARGET_CPU_DEFAULT_max)
- ? cpu_names[ptr->arch]
- : "<unknown>"));
+ ptr->arch, processor_target_table[ptr->arch].name);
+ gcc_assert (ptr->tune < PROCESSOR_max);
fprintf (file, "%*stune = %d (%s)\n",
indent, "",
- ptr->tune,
- ((ptr->tune < TARGET_CPU_DEFAULT_max)
- ? cpu_names[ptr->tune]
- : "<unknown>"));
+ ptr->tune, processor_target_table[ptr->tune].name);
fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
@@ -5227,7 +5190,12 @@ ix86_function_regparm (const_tree type, const_tree decl)
/* Use register calling convention for local functions when possible. */
if (decl
&& TREE_CODE (decl) == FUNCTION_DECL
- && optimize
+ /* Caller and callee must agree on the calling convention, so
+ checking here just optimize means that with
+ __attribute__((optimize (...))) caller could use regparm convention
+ and callee not, or vice versa. Instead look at whether the callee
+ is optimized or not. */
+ && opt_for_fn (decl, optimize)
&& !(profile_flag && !flag_fentry))
{
/* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
@@ -5423,6 +5391,17 @@ ix86_legitimate_combined_insn (rtx insn)
bool win;
int j;
+ /* For pre-AVX disallow unaligned loads/stores where the
+ instructions don't support it. */
+ if (!TARGET_AVX
+ && VECTOR_MODE_P (GET_MODE (op))
+ && misaligned_operand (op, GET_MODE (op)))
+ {
+ int min_align = get_attr_ssememalign (insn);
+ if (min_align == 0)
+ return false;
+ }
+
/* A unary operator may be accepted by the predicate, but it
is irrelevant for matching constraints. */
if (UNARY_P (op))
@@ -5761,9 +5740,9 @@ init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
cum->nregs = 0;
cum->sse_nregs = 0;
cum->mmx_nregs = 0;
- cum->warn_avx = 0;
- cum->warn_sse = 0;
- cum->warn_mmx = 0;
+ cum->warn_avx = false;
+ cum->warn_sse = false;
+ cum->warn_mmx = false;
return;
}
@@ -5804,10 +5783,14 @@ init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
The midde-end can't deal with the vector types > 16 bytes. In this
case, we return the original mode and warn ABI change if CUM isn't
- NULL. */
+ NULL.
+
+ If INT_RETURN is true, warn ABI change if the vector mode isn't
+ available for function return value. */
static enum machine_mode
-type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum)
+type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
+ bool in_return)
{
enum machine_mode mode = TYPE_MODE (type);
@@ -5833,32 +5816,60 @@ type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum)
if (size == 32 && !TARGET_AVX)
{
static bool warnedavx;
+ static bool warnedavx_ret;
- if (cum
- && !warnedavx
- && cum->warn_avx)
+ if (cum && cum->warn_avx && !warnedavx)
{
+ if (warning (OPT_Wpsabi, "AVX vector argument "
+ "without AVX enabled changes the ABI"))
warnedavx = true;
- warning (0, "AVX vector argument without AVX "
- "enabled changes the ABI");
}
+ else if (in_return && !warnedavx_ret)
+ {
+ if (warning (OPT_Wpsabi, "AVX vector return "
+ "without AVX enabled changes the ABI"))
+ warnedavx_ret = true;
+ }
+
return TYPE_MODE (type);
}
- else if ((size == 8 || size == 16) && !TARGET_SSE)
+ else if (((size == 8 && TARGET_64BIT) || size == 16)
+ && !TARGET_SSE)
{
static bool warnedsse;
+ static bool warnedsse_ret;
- if (cum
- && !warnedsse
- && cum->warn_sse)
+ if (cum && cum->warn_sse && !warnedsse)
{
+ if (warning (OPT_Wpsabi, "SSE vector argument "
+ "without SSE enabled changes the ABI"))
warnedsse = true;
- warning (0, "SSE vector argument without SSE "
- "enabled changes the ABI");
}
- return mode;
+ else if (!TARGET_64BIT && in_return && !warnedsse_ret)
+ {
+ if (warning (OPT_Wpsabi, "SSE vector return "
+ "without SSE enabled changes the ABI"))
+ warnedsse_ret = true;
+ }
+ }
+ else if ((size == 8 && !TARGET_64BIT) && !TARGET_MMX)
+ {
+ static bool warnedmmx;
+ static bool warnedmmx_ret;
+
+ if (cum && cum->warn_mmx && !warnedmmx)
+ {
+ if (warning (OPT_Wpsabi, "MMX vector argument "
+ "without MMX enabled changes the ABI"))
+ warnedmmx = true;
+ }
+ else if (in_return && !warnedmmx_ret)
+ {
+ if (warning (OPT_Wpsabi, "MMX vector return "
+ "without MMX enabled changes the ABI"))
+ warnedmmx_ret = true;
+ }
}
- else
return mode;
}
@@ -6204,25 +6215,28 @@ classify_argument (enum machine_mode mode, const_tree type,
case CHImode:
case CQImode:
{
- int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
+ int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
+
+ /* Analyze last 128 bits only. */
+ size = (size - 1) & 0x7f;
- if (size <= 32)
+ if (size < 32)
{
classes[0] = X86_64_INTEGERSI_CLASS;
return 1;
}
- else if (size <= 64)
+ else if (size < 64)
{
classes[0] = X86_64_INTEGER_CLASS;
return 1;
}
- else if (size <= 64+32)
+ else if (size < 64+32)
{
classes[0] = X86_64_INTEGER_CLASS;
classes[1] = X86_64_INTEGERSI_CLASS;
return 2;
}
- else if (size <= 64+64)
+ else if (size < 64+64)
{
classes[0] = classes[1] = X86_64_INTEGER_CLASS;
return 2;
@@ -6489,7 +6503,7 @@ construct_container (enum machine_mode mode, enum machine_mode orig_mode,
if (n == 2
&& regclass[0] == X86_64_INTEGER_CLASS
&& regclass[1] == X86_64_INTEGER_CLASS
- && (mode == CDImode || mode == TImode || mode == TFmode)
+ && (mode == CDImode || mode == TImode)
&& intreg[0] + 1 == intreg[1])
return gen_rtx_REG (mode, intreg[0]);
@@ -6739,7 +6753,7 @@ ix86_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
if (type)
- mode = type_natural_mode (type, NULL);
+ mode = type_natural_mode (type, NULL, false);
if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
function_arg_advance_ms_64 (cum, bytes, words);
@@ -6767,8 +6781,6 @@ function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
enum machine_mode orig_mode, const_tree type,
HOST_WIDE_INT bytes, HOST_WIDE_INT words)
{
- static bool warnedsse, warnedmmx;
-
/* Avoid the AL settings for the Unix64 ABI. */
if (mode == VOIDmode)
return constm1_rtx;
@@ -6825,12 +6837,6 @@ function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
case V2DFmode:
if (!type || !AGGREGATE_TYPE_P (type))
{
- if (!TARGET_SSE && !warnedsse && cum->warn_sse)
- {
- warnedsse = true;
- warning (0, "SSE vector argument without SSE enabled "
- "changes the ABI");
- }
if (cum->sse_nregs)
return gen_reg_or_parallel (mode, orig_mode,
cum->sse_regno + FIRST_SSE_REG);
@@ -6863,12 +6869,6 @@ function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
case V1DImode:
if (!type || !AGGREGATE_TYPE_P (type))
{
- if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
- {
- warnedmmx = true;
- warning (0, "MMX vector argument without MMX enabled "
- "changes the ABI");
- }
if (cum->mmx_nregs)
return gen_reg_or_parallel (mode, orig_mode,
cum->mmx_regno + FIRST_MMX_REG);
@@ -6991,7 +6991,7 @@ ix86_function_arg (cumulative_args_t cum_v, enum machine_mode omode,
/* To simplify the code below, represent vector types with a vector mode
even if MMX/SSE are not active. */
if (type && TREE_CODE (type) == VECTOR_TYPE)
- mode = type_natural_mode (type, cum);
+ mode = type_natural_mode (type, cum, false);
if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
@@ -7249,9 +7249,15 @@ ix86_function_value_regno_p (const unsigned int regno)
switch (regno)
{
case AX_REG:
+ case DX_REG:
return true;
+ case DI_REG:
+ case SI_REG:
+ return TARGET_64BIT && ix86_abi != MS_ABI;
- case FIRST_FLOAT_REG:
+ /* Complex values are returned in %st(0)/%st(1) pair. */
+ case ST0_REG:
+ case ST1_REG:
/* TODO: The function should depend on current function ABI but
builtins.c would need updating then. Therefore we use the
default ABI. */
@@ -7259,10 +7265,12 @@ ix86_function_value_regno_p (const unsigned int regno)
return false;
return TARGET_FLOAT_RETURNS_IN_80387;
- case FIRST_SSE_REG:
+ /* Complex values are returned in %xmm0/%xmm1 pair. */
+ case XMM0_REG:
+ case XMM1_REG:
return TARGET_SSE;
- case FIRST_MMX_REG:
+ case MM0_REG:
if (TARGET_MACHO || TARGET_64BIT)
return false;
return TARGET_MMX;
@@ -7434,7 +7442,7 @@ ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
enum machine_mode mode, orig_mode;
orig_mode = TYPE_MODE (valtype);
- mode = type_natural_mode (valtype, NULL);
+ mode = type_natural_mode (valtype, NULL, true);
return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
}
@@ -7549,7 +7557,7 @@ ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
#ifdef SUBTARGET_RETURN_IN_MEMORY
return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
#else
- const enum machine_mode mode = type_natural_mode (type, NULL);
+ const enum machine_mode mode = type_natural_mode (type, NULL, true);
if (TARGET_64BIT)
{
@@ -7563,52 +7571,6 @@ ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
#endif
}
-/* When returning SSE vector types, we have a choice of either
- (1) being abi incompatible with a -march switch, or
- (2) generating an error.
- Given no good solution, I think the safest thing is one warning.
- The user won't be able to use -Werror, but....
-
- Choose the STRUCT_VALUE_RTX hook because that's (at present) only
- called in response to actually generating a caller or callee that
- uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
- via aggregate_value_p for general type probing from tree-ssa. */
-
-static rtx
-ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
-{
- static bool warnedsse, warnedmmx;
-
- if (!TARGET_64BIT && type)
- {
- /* Look at the return type of the function, not the function type. */
- enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
-
- if (!TARGET_SSE && !warnedsse)
- {
- if (mode == TImode
- || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
- {
- warnedsse = true;
- warning (0, "SSE vector return without SSE enabled "
- "changes the ABI");
- }
- }
-
- if (!TARGET_MMX && !warnedmmx)
- {
- if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
- {
- warnedmmx = true;
- warning (0, "MMX vector return without MMX enabled "
- "changes the ABI");
- }
- }
- }
-
- return NULL;
-}
-
/* Create the va_list data type. */
@@ -8033,7 +7995,7 @@ ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
size = int_size_in_bytes (type);
rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
- nat_mode = type_natural_mode (type, NULL);
+ nat_mode = type_natural_mode (type, NULL, false);
switch (nat_mode)
{
case V8SFmode:
@@ -10574,18 +10536,20 @@ ix86_expand_prologue (void)
}
m->fs.sp_offset += allocate;
+ /* Use stack_pointer_rtx for relative addressing so that code
+ works for realigned stack, too. */
if (r10_live && eax_live)
{
- t = choose_baseaddr (m->fs.sp_offset - allocate);
+ t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
gen_frame_mem (word_mode, t));
- t = choose_baseaddr (m->fs.sp_offset - allocate - UNITS_PER_WORD);
+ t = plus_constant (Pmode, t, UNITS_PER_WORD);
emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
gen_frame_mem (word_mode, t));
}
else if (eax_live || r10_live)
{
- t = choose_baseaddr (m->fs.sp_offset - allocate);
+ t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
emit_move_insn (gen_rtx_REG (word_mode,
(eax_live ? AX_REG : R10_REG)),
gen_frame_mem (word_mode, t));
@@ -11178,8 +11142,9 @@ ix86_expand_epilogue (int style)
m->fs.cfa_offset -= UNITS_PER_WORD;
m->fs.sp_offset -= UNITS_PER_WORD;
- add_reg_note (insn, REG_CFA_ADJUST_CFA,
- copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
+ rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
+ x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
+ add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
add_reg_note (insn, REG_CFA_REGISTER,
gen_rtx_SET (VOIDmode, ecx, pc_rtx));
RTX_FRAME_RELATED_P (insn) = 1;
@@ -11575,30 +11540,6 @@ ix86_live_on_entry (bitmap regs)
}
}
-/* Determine if op is suitable SUBREG RTX for address. */
-
-static bool
-ix86_address_subreg_operand (rtx op)
-{
- enum machine_mode mode;
-
- if (!REG_P (op))
- return false;
-
- mode = GET_MODE (op);
-
- if (GET_MODE_CLASS (mode) != MODE_INT)
- return false;
-
- /* Don't allow SUBREGs that span more than a word. It can lead to spill
- failures when the register is one word out of a two word structure. */
- if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
- return false;
-
- /* Allow only SUBREGs of non-eliminable hard registers. */
- return register_no_elim_operand (op, mode);
-}
-
/* Extract the parts of an RTL expression that is a valid memory address
for an instruction. Return 0 if the structure of the address is
grossly off. Return -1 if the address contains ASHIFT, so it is not
@@ -11655,7 +11596,7 @@ ix86_decompose_address (rtx addr, struct ix86_address *out)
base = addr;
else if (GET_CODE (addr) == SUBREG)
{
- if (ix86_address_subreg_operand (SUBREG_REG (addr)))
+ if (REG_P (SUBREG_REG (addr)))
base = addr;
else
return 0;
@@ -11719,7 +11660,7 @@ ix86_decompose_address (rtx addr, struct ix86_address *out)
break;
case SUBREG:
- if (!ix86_address_subreg_operand (SUBREG_REG (op)))
+ if (!REG_P (SUBREG_REG (op)))
return 0;
/* FALLTHRU */
@@ -11764,19 +11705,6 @@ ix86_decompose_address (rtx addr, struct ix86_address *out)
scale = 1 << scale;
retval = -1;
}
- else if (CONST_INT_P (addr))
- {
- if (!x86_64_immediate_operand (addr, VOIDmode))
- return 0;
-
- /* Constant addresses are sign extended to 64bit, we have to
- prevent addresses from 0x80000000 to 0xffffffff in x32 mode. */
- if (TARGET_X32
- && val_signbit_known_set_p (SImode, INTVAL (addr)))
- return 0;
-
- disp = addr;
- }
else
disp = addr; /* displacement */
@@ -11785,18 +11713,12 @@ ix86_decompose_address (rtx addr, struct ix86_address *out)
if (REG_P (index))
;
else if (GET_CODE (index) == SUBREG
- && ix86_address_subreg_operand (SUBREG_REG (index)))
+ && REG_P (SUBREG_REG (index)))
;
else
return 0;
}
-/* Address override works only on the (%reg) part of %fs:(%reg). */
- if (seg != SEG_DEFAULT
- && ((base && GET_MODE (base) != word_mode)
- || (index && GET_MODE (index) != word_mode)))
- return 0;
-
/* Extract the integral value of scale. */
if (scale_rtx)
{
@@ -12272,6 +12194,45 @@ ix86_legitimize_reload_address (rtx x,
return false;
}
+/* Determine if op is suitable RTX for an address register.
+ Return naked register if a register or a register subreg is
+ found, otherwise return NULL_RTX. */
+
+static rtx
+ix86_validate_address_register (rtx op)
+{
+ enum machine_mode mode = GET_MODE (op);
+
+ /* Only SImode or DImode registers can form the address. */
+ if (mode != SImode && mode != DImode)
+ return NULL_RTX;
+
+ if (REG_P (op))
+ return op;
+ else if (GET_CODE (op) == SUBREG)
+ {
+ rtx reg = SUBREG_REG (op);
+
+ if (!REG_P (reg))
+ return NULL_RTX;
+
+ mode = GET_MODE (reg);
+
+ /* Don't allow SUBREGs that span more than a word. It can
+ lead to spill failures when the register is one word out
+ of a two word structure. */
+ if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
+ return NULL_RTX;
+
+ /* Allow only SUBREGs of non-eliminable hard registers. */
+ if (register_no_elim_operand (reg, mode))
+ return reg;
+ }
+
+ /* Op is not a register. */
+ return NULL_RTX;
+}
+
/* Recognizes RTL expressions that are valid memory addresses for an
instruction. The MODE argument is the machine mode for the MEM
expression that wants to use this address.
@@ -12287,6 +12248,7 @@ ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
struct ix86_address parts;
rtx base, index, disp;
HOST_WIDE_INT scale;
+ enum ix86_address_seg seg;
if (ix86_decompose_address (addr, &parts) <= 0)
/* Decomposition failed. */
@@ -12296,21 +12258,14 @@ ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
index = parts.index;
disp = parts.disp;
scale = parts.scale;
+ seg = parts.seg;
/* Validate base register. */
if (base)
{
- rtx reg;
+ rtx reg = ix86_validate_address_register (base);
- if (REG_P (base))
- reg = base;
- else if (GET_CODE (base) == SUBREG && REG_P (SUBREG_REG (base)))
- reg = SUBREG_REG (base);
- else
- /* Base is not a register. */
- return false;
-
- if (GET_MODE (base) != SImode && GET_MODE (base) != DImode)
+ if (reg == NULL_RTX)
return false;
if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
@@ -12322,17 +12277,9 @@ ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
/* Validate index register. */
if (index)
{
- rtx reg;
-
- if (REG_P (index))
- reg = index;
- else if (GET_CODE (index) == SUBREG && REG_P (SUBREG_REG (index)))
- reg = SUBREG_REG (index);
- else
- /* Index is not a register. */
- return false;
+ rtx reg = ix86_validate_address_register (index);
- if (GET_MODE (index) != SImode && GET_MODE (index) != DImode)
+ if (reg == NULL_RTX)
return false;
if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
@@ -12346,6 +12293,12 @@ ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
&& GET_MODE (base) != GET_MODE (index))
return false;
+ /* Address override works only on the (%reg) part of %fs:(%reg). */
+ if (seg != SEG_DEFAULT
+ && ((base && GET_MODE (base) != word_mode)
+ || (index && GET_MODE (index) != word_mode)))
+ return false;
+
/* Validate scale factor. */
if (scale != 1)
{
@@ -12467,6 +12420,12 @@ ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
&& !x86_64_immediate_operand (disp, VOIDmode))
/* Displacement is out of range. */
return false;
+ /* In x32 mode, constant addresses are sign extended to 64bit, so
+ we have to prevent addresses from 0x80000000 to 0xffffffff. */
+ else if (TARGET_X32 && !(index || base)
+ && CONST_INT_P (disp)
+ && val_signbit_known_set_p (SImode, INTVAL (disp)))
+ return false;
}
/* Everything looks valid. */
@@ -13831,8 +13790,6 @@ put_condition_code (enum rtx_code code, enum machine_mode mode, bool reverse,
Those same assemblers have the same but opposite lossage on cmov. */
if (mode == CCmode)
suffix = fp ? "nbe" : "a";
- else if (mode == CCCmode)
- suffix = "b";
else
gcc_unreachable ();
break;
@@ -13854,8 +13811,12 @@ put_condition_code (enum rtx_code code, enum machine_mode mode, bool reverse,
}
break;
case LTU:
- gcc_assert (mode == CCmode || mode == CCCmode);
+ if (mode == CCmode)
suffix = "b";
+ else if (mode == CCCmode)
+ suffix = "c";
+ else
+ gcc_unreachable ();
break;
case GE:
switch (mode)
@@ -13875,20 +13836,20 @@ put_condition_code (enum rtx_code code, enum machine_mode mode, bool reverse,
}
break;
case GEU:
- /* ??? As above. */
- gcc_assert (mode == CCmode || mode == CCCmode);
+ if (mode == CCmode)
suffix = fp ? "nb" : "ae";
+ else if (mode == CCCmode)
+ suffix = "nc";
+ else
+ gcc_unreachable ();
break;
case LE:
gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
suffix = "le";
break;
case LEU:
- /* ??? As above. */
if (mode == CCmode)
suffix = "be";
- else if (mode == CCCmode)
- suffix = fp ? "nb" : "ae";
else
gcc_unreachable ();
break;
@@ -15354,7 +15315,7 @@ ix86_avx_u128_mode_needed (rtx insn)
rtx arg = XEXP (XEXP (link, 0), 0);
if (ix86_check_avx256_register (&arg, NULL))
- return AVX_U128_ANY;
+ return AVX_U128_DIRTY;
}
}
@@ -15474,8 +15435,8 @@ ix86_avx_u128_mode_after (int mode, rtx insn)
{
bool avx_reg256_found = false;
note_stores (pat, ix86_check_avx256_stores, &avx_reg256_found);
- if (!avx_reg256_found)
- return AVX_U128_CLEAN;
+
+ return avx_reg256_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
}
/* Otherwise, return current mode. Remember that if insn
@@ -17364,8 +17325,18 @@ ix86_avoid_lea_for_addr (rtx insn, rtx operands[])
if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
return false;
- /* Check it is correct to split here. */
- if (!ix86_ok_to_clobber_flags(insn))
+ /* The "at least two components" test below might not catch simple
+ move or zero extension insns if parts.base is non-NULL and parts.disp
+ is const0_rtx as the only components in the address, e.g. if the
+ register is %rbp or %r13. As this test is much cheaper and moves or
+ zero extensions are the common case, do this check first. */
+ if (REG_P (operands[1])
+ || (SImode_address_operand (operands[1], VOIDmode)
+ && REG_P (XEXP (operands[1], 0))))
+ return false;
+
+ /* Check if it is OK to split here. */
+ if (!ix86_ok_to_clobber_flags (insn))
return false;
ok = ix86_decompose_address (operands[1], &parts);
@@ -18500,11 +18471,6 @@ ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
return CCmode;
case GTU: /* CF=0 & ZF=0 */
case LEU: /* CF=1 | ZF=1 */
- /* Detect overflow checks. They need just the carry flag. */
- if (GET_CODE (op0) == MINUS
- && rtx_equal_p (op1, XEXP (op0, 0)))
- return CCCmode;
- else
return CCmode;
/* Codes possibly doable only with sign flag when
comparing against zero. */
@@ -20514,7 +20480,7 @@ ix86_expand_vec_perm (rtx operands[])
return;
case V8SFmode:
- mask = gen_lowpart (V8SFmode, mask);
+ mask = gen_lowpart (V8SImode, mask);
if (one_operand_shuffle)
emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
else
@@ -21754,6 +21720,21 @@ counter_mode (rtx count_exp)
return SImode;
}
+/* Copy the address to a Pmode register. This is used for x32 to
+ truncate DImode TLS address to a SImode register. */
+
+static rtx
+ix86_copy_addr_to_reg (rtx addr)
+{
+ if (GET_MODE (addr) == Pmode || GET_MODE (addr) == VOIDmode)
+ return copy_addr_to_reg (addr);
+ else
+ {
+ gcc_assert (GET_MODE (addr) == DImode && Pmode == SImode);
+ return gen_rtx_SUBREG (SImode, copy_to_mode_reg (DImode, addr), 0);
+ }
+}
+
/* When SRCPTR is non-NULL, output simple loop to move memory
pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
@@ -22742,8 +22723,8 @@ ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
gcc_assert (alg != no_stringop);
if (!count)
count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
- destreg = copy_addr_to_reg (XEXP (dst, 0));
- srcreg = copy_addr_to_reg (XEXP (src, 0));
+ destreg = ix86_copy_addr_to_reg (XEXP (dst, 0));
+ srcreg = ix86_copy_addr_to_reg (XEXP (src, 0));
switch (alg)
{
case libcall:
@@ -23133,7 +23114,7 @@ ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
gcc_assert (alg != no_stringop);
if (!count)
count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
- destreg = copy_addr_to_reg (XEXP (dst, 0));
+ destreg = ix86_copy_addr_to_reg (XEXP (dst, 0));
switch (alg)
{
case libcall:
@@ -25002,7 +24983,8 @@ ix86_constant_alignment (tree exp, int align)
int
ix86_data_alignment (tree type, int align)
{
- int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
+ int max_align
+ = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
if (AGGREGATE_TYPE_P (type)
&& TYPE_SIZE (type)
@@ -27998,8 +27980,8 @@ static const struct builtin_description bdesc_multi_arg[] =
{ OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
{ OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
- { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_2_SF },
- { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_2_DF },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_1_SF },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_1_DF },
{ OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
{ OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
{ OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
@@ -31264,6 +31246,7 @@ ix86_expand_special_args_builtin (const struct builtin_description *d,
tree arg;
rtx pat, op;
unsigned int i, nargs, arg_adjust, memory;
+ bool aligned_mem = false;
struct
{
rtx op;
@@ -31309,6 +31292,15 @@ ix86_expand_special_args_builtin (const struct builtin_description *d,
nargs = 1;
klass = load;
memory = 0;
+ switch (icode)
+ {
+ case CODE_FOR_sse4_1_movntdqa:
+ case CODE_FOR_avx2_movntdqa:
+ aligned_mem = true;
+ break;
+ default:
+ break;
+ }
break;
case VOID_FTYPE_PV2SF_V4SF:
case VOID_FTYPE_PV4DI_V4DI:
@@ -31326,6 +31318,26 @@ ix86_expand_special_args_builtin (const struct builtin_description *d,
klass = store;
/* Reserve memory operand for target. */
memory = ARRAY_SIZE (args);
+ switch (icode)
+ {
+ /* These builtins and instructions require the memory
+ to be properly aligned. */
+ case CODE_FOR_avx_movntv4di:
+ case CODE_FOR_sse2_movntv2di:
+ case CODE_FOR_avx_movntv8sf:
+ case CODE_FOR_sse_movntv4sf:
+ case CODE_FOR_sse4a_vmmovntv4sf:
+ case CODE_FOR_avx_movntv4df:
+ case CODE_FOR_sse2_movntv2df:
+ case CODE_FOR_sse4a_vmmovntv2df:
+ case CODE_FOR_sse2_movntidi:
+ case CODE_FOR_sse_movntq:
+ case CODE_FOR_sse2_movntisi:
+ aligned_mem = true;
+ break;
+ default:
+ break;
+ }
break;
case V4SF_FTYPE_V4SF_PCV2SF:
case V2DF_FTYPE_V2DF_PCDOUBLE:
@@ -31382,6 +31394,17 @@ ix86_expand_special_args_builtin (const struct builtin_description *d,
{
op = force_reg (Pmode, convert_to_mode (Pmode, op, 1));
target = gen_rtx_MEM (tmode, op);
+ /* target at this point has just BITS_PER_UNIT MEM_ALIGN
+ on it. Try to improve it using get_pointer_alignment,
+ and if the special builtin is one that requires strict
+ mode alignment, also from it's GET_MODE_ALIGNMENT.
+ Failure to do so could lead to ix86_legitimate_combined_insn
+ rejecting all changes to such insns. */
+ unsigned int align = get_pointer_alignment (arg);
+ if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode))
+ align = GET_MODE_ALIGNMENT (tmode);
+ if (MEM_ALIGN (target) < align)
+ set_mem_align (target, align);
}
else
target = force_reg (tmode, op);
@@ -31427,8 +31450,17 @@ ix86_expand_special_args_builtin (const struct builtin_description *d,
/* This must be the memory operand. */
op = force_reg (Pmode, convert_to_mode (Pmode, op, 1));
op = gen_rtx_MEM (mode, op);
- gcc_assert (GET_MODE (op) == mode
- || GET_MODE (op) == VOIDmode);
+ /* op at this point has just BITS_PER_UNIT MEM_ALIGN
+ on it. Try to improve it using get_pointer_alignment,
+ and if the special builtin is one that requires strict
+ mode alignment, also from it's GET_MODE_ALIGNMENT.
+ Failure to do so could lead to ix86_legitimate_combined_insn
+ rejecting all changes to such insns. */
+ unsigned int align = get_pointer_alignment (arg);
+ if (aligned_mem && align < GET_MODE_ALIGNMENT (mode))
+ align = GET_MODE_ALIGNMENT (mode);
+ if (MEM_ALIGN (op) < align)
+ set_mem_align (op, align);
}
else
{
@@ -32041,7 +32073,8 @@ rdrand_step:
else
op2 = gen_rtx_SUBREG (SImode, op0, 0);
- if (target == 0)
+ if (target == 0
+ || !register_operand (target, SImode))
target = gen_reg_rtx (SImode);
pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
@@ -32083,7 +32116,8 @@ rdseed_step:
const0_rtx);
emit_insn (gen_rtx_SET (VOIDmode, op2, pat));
- if (target == 0)
+ if (target == 0
+ || !register_operand (target, SImode))
target = gen_reg_rtx (SImode);
emit_insn (gen_zero_extendqisi2 (target, op2));
@@ -32222,7 +32256,9 @@ addcarryx:
mode4 = insn_data[icode].operand[5].mode;
if (target == NULL_RTX
- || GET_MODE (target) != insn_data[icode].operand[0].mode)
+ || GET_MODE (target) != insn_data[icode].operand[0].mode
+ || !insn_data[icode].operand[0].predicate (target,
+ GET_MODE (target)))
subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
else
subtarget = target;
@@ -33893,10 +33929,10 @@ ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
{
/* We implement the move patterns for all vector modes into and
out of SSE registers, even when no operation instructions
- are available. OImode move is available only when AVX is
- enabled. */
- return ((TARGET_AVX && mode == OImode)
- || VALID_AVX256_REG_MODE (mode)
+ are available. OImode and AVX modes are available only when
+ AVX is enabled. */
+ return ((TARGET_AVX
+ && VALID_AVX256_REG_OR_OI_MODE (mode))
|| VALID_SSE_REG_MODE (mode)
|| VALID_SSE2_REG_MODE (mode)
|| VALID_MMX_REG_MODE (mode)
@@ -35034,7 +35070,7 @@ x86_output_mi_thunk (FILE *file,
{
tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
tmp = gen_rtx_CONST (Pmode, tmp);
- fnaddr = gen_rtx_MEM (Pmode, tmp);
+ fnaddr = gen_const_mem (Pmode, tmp);
}
}
else
@@ -35054,8 +35090,9 @@ x86_output_mi_thunk (FILE *file,
output_set_got (tmp, NULL_RTX);
fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
- fnaddr = gen_rtx_PLUS (Pmode, fnaddr, tmp);
- fnaddr = gen_rtx_MEM (Pmode, fnaddr);
+ fnaddr = gen_rtx_CONST (Pmode, fnaddr);
+ fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
+ fnaddr = gen_const_mem (Pmode, fnaddr);
}
}
@@ -35246,7 +35283,10 @@ ix86_avoid_jump_mispredicts (void)
The smallest offset in the page INSN can start is the case where START
ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
- */
+
+ Don't consider asm goto as jump, while it can contain a jump, it doesn't
+ have to, control transfer to label(s) can be performed through other
+ means, and also we estimate minimum length of all asm stmts as 0. */
for (insn = start; insn; insn = NEXT_INSN (insn))
{
int min_size;
@@ -35274,6 +35314,7 @@ ix86_avoid_jump_mispredicts (void)
{
start = NEXT_INSN (start);
if ((JUMP_P (start)
+ && asm_noperands (PATTERN (start)) < 0
&& GET_CODE (PATTERN (start)) != ADDR_VEC
&& GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
|| CALL_P (start))
@@ -35292,6 +35333,7 @@ ix86_avoid_jump_mispredicts (void)
fprintf (dump_file, "Insn %i estimated to %i bytes\n",
INSN_UID (insn), min_size);
if ((JUMP_P (insn)
+ && asm_noperands (PATTERN (insn)) < 0
&& GET_CODE (PATTERN (insn)) != ADDR_VEC
&& GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
|| CALL_P (insn))
@@ -35303,6 +35345,7 @@ ix86_avoid_jump_mispredicts (void)
{
start = NEXT_INSN (start);
if ((JUMP_P (start)
+ && asm_noperands (PATTERN (start)) < 0
&& GET_CODE (PATTERN (start)) != ADDR_VEC
&& GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
|| CALL_P (start))
@@ -39385,6 +39428,8 @@ expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
else
dfinal.perm[i] = e;
}
+
+ if (!d->testing_p)
dfinal.op0 = gen_reg_rtx (dfinal.vmode);
dfinal.op1 = dfinal.op0;
dfinal.one_operand_p = true;
@@ -39820,6 +39865,9 @@ expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
return false;
gcc_assert (!d->one_operand_p);
+ if (d->testing_p)
+ return true;
+
nelt = d->nelt;
eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
@@ -40019,6 +40067,8 @@ expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
switch (d->vmode)
{
case V4DFmode:
+ if (d->testing_p)
+ break;
t1 = gen_reg_rtx (V4DFmode);
t2 = gen_reg_rtx (V4DFmode);
@@ -40038,6 +40088,8 @@ expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
{
int mask = odd ? 0xdd : 0x88;
+ if (d->testing_p)
+ break;
t1 = gen_reg_rtx (V8SFmode);
t2 = gen_reg_rtx (V8SFmode);
t3 = gen_reg_rtx (V8SFmode);
@@ -40079,6 +40131,8 @@ expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
return expand_vec_perm_pshufb2 (d);
else
{
+ if (d->testing_p)
+ break;
/* We need 2*log2(N)-1 operations to achieve odd/even
with interleave. */
t1 = gen_reg_rtx (V8HImode);
@@ -40100,6 +40154,8 @@ expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
return expand_vec_perm_pshufb2 (d);
else
{
+ if (d->testing_p)
+ break;
t1 = gen_reg_rtx (V16QImode);
t2 = gen_reg_rtx (V16QImode);
t3 = gen_reg_rtx (V16QImode);
@@ -40132,6 +40188,9 @@ expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
return expand_vec_perm_even_odd_1 (&d_copy, odd);
}
+ if (d->testing_p)
+ break;
+
t1 = gen_reg_rtx (V4DImode);
t2 = gen_reg_rtx (V4DImode);
@@ -40158,6 +40217,9 @@ expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
return expand_vec_perm_even_odd_1 (&d_copy, odd);
}
+ if (d->testing_p)
+ break;
+
t1 = gen_reg_rtx (V8SImode);
t2 = gen_reg_rtx (V8SImode);
@@ -40250,6 +40312,8 @@ expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
case V16QImode:
/* These can be implemented via interleave. We save one insn by
stopping once we have promoted to V4SImode and then use pshufd. */
+ if (d->testing_p)
+ return true;
do
{
rtx dest;
@@ -42487,8 +42551,6 @@ ix86_memmodel_check (unsigned HOST_WIDE_INT val)
#undef TARGET_PROMOTE_PROTOTYPES
#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
-#undef TARGET_STRUCT_VALUE_RTX
-#define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
#undef TARGET_SETUP_INCOMING_VARARGS
#define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
#undef TARGET_MUST_PASS_IN_STACK