aboutsummaryrefslogtreecommitdiffstats
path: root/gcc-4.9/gcc/config/i386/i386.c
diff options
context:
space:
mode:
authorYiran Wang <yiran@google.com>2015-06-23 15:33:17 -0700
committerYiran Wang <yiran@google.com>2015-06-29 10:56:28 -0700
commit1d9fec7937f45dde5e04cac966a2d9a12f2fc15a (patch)
tree3fbcd18a379a05fd6d43491a107e1f36bc61b185 /gcc-4.9/gcc/config/i386/i386.c
parentf378ebf14df0952eae870c9865bab8326aa8f137 (diff)
downloadtoolchain_gcc-1d9fec7937f45dde5e04cac966a2d9a12f2fc15a.tar.gz
toolchain_gcc-1d9fec7937f45dde5e04cac966a2d9a12f2fc15a.tar.bz2
toolchain_gcc-1d9fec7937f45dde5e04cac966a2d9a12f2fc15a.zip
Synchronize with google/gcc-4_9 to r224707 (from r214835)
Change-Id: I3d6f06fc613c8f8b6a82143dc44b7338483aac5d
Diffstat (limited to 'gcc-4.9/gcc/config/i386/i386.c')
-rw-r--r--gcc-4.9/gcc/config/i386/i386.c349
1 files changed, 251 insertions, 98 deletions
diff --git a/gcc-4.9/gcc/config/i386/i386.c b/gcc-4.9/gcc/config/i386/i386.c
index a598b8eef..54942d520 100644
--- a/gcc-4.9/gcc/config/i386/i386.c
+++ b/gcc-4.9/gcc/config/i386/i386.c
@@ -2465,7 +2465,7 @@ struct ptt
const int align_func;
};
-/* This table must be in sync with enum processor_type in i386.h. */
+/* This table must be in sync with enum processor_type in i386.h. */
static const struct ptt processor_target_table[PROCESSOR_max] =
{
{"generic", &generic_cost, 16, 10, 16, 10, 16},
@@ -3257,14 +3257,14 @@ ix86_option_override_internal (bool main_args_p,
| PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
| PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
| PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
- | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
+ | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
| PTA_XSAVEOPT | PTA_FSGSBASE},
{"bdver4", PROCESSOR_BDVER4, CPU_BDVER4,
PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
| PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
- | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
- | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2
- | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR
+ | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
+ | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2
+ | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR
| PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND
| PTA_MOVBE},
{"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
@@ -3334,8 +3334,9 @@ ix86_option_override_internal (bool main_args_p,
/* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
on and OPTION_MASK_ABI_64 is off. We turn off
OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
- -m64. */
- if (TARGET_LP64_P (opts->x_ix86_isa_flags))
+ -m64 or OPTION_MASK_CODE16 is turned on by -m16. */
+ if (TARGET_LP64_P (opts->x_ix86_isa_flags)
+ || TARGET_16BIT_P (opts->x_ix86_isa_flags))
opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
#endif
}
@@ -3846,11 +3847,30 @@ ix86_option_override_internal (bool main_args_p,
opts->x_target_flags |= MASK_NO_RED_ZONE;
}
+ if (!global_options_set.x_flag_shrink_wrap_frame_pointer)
+ flag_shrink_wrap_frame_pointer = 1;
+
+ /* -fshrink-wrap-frame-pointer is an optimization based on
+ -fno-omit-frame-pointer mode, so it is only effective when
+ flag_omit_frame_pointer is false.
+ Frame pointer shrinkwrap may increase code size, so disable
+ it when optimize_size is true. */
+ if (flag_omit_frame_pointer
+ || optimize == 0
+ || optimize_size)
+ flag_shrink_wrap_frame_pointer = 0;
+
+ /* If only no -mno-omit-leaf-frame-pointer is explicitly specified,
+ -fshrink_wrap_frame_pointer will enable omitting leaf frame
+ pointer by default. */
+ if (flag_shrink_wrap_frame_pointer
+ && !(TARGET_OMIT_LEAF_FRAME_POINTER_P (opts_set->x_target_flags)
+ && !TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags)))
+ opts->x_target_flags |= MASK_OMIT_LEAF_FRAME_POINTER;
+
/* Keep nonleaf frame pointers. */
if (opts->x_flag_omit_frame_pointer)
opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
- else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags))
- opts->x_flag_omit_frame_pointer = 1;
/* If we're doing fast math, we don't care about comparison order
wrt NaNs. This lets us use a shorter comparison sequence. */
@@ -3969,7 +3989,7 @@ ix86_option_override_internal (bool main_args_p,
/* For all chips supporting SSE2, -mfpmath=sse performs better than
fpmath=387. The second is however default at many targets since the
extra 80bit precision of temporaries is considered to be part of ABI.
- Overwrite the default at least for -ffast-math.
+ Overwrite the default at least for -ffast-math.
TODO: -mfpmath=both seems to produce same performing code with bit
smaller binaries. It is however not clear if register allocation is
ready for this setting.
@@ -4291,7 +4311,7 @@ ix86_conditional_register_usage (void)
c_mask = (TARGET_64BIT_MS_ABI ? (1 << 3)
: TARGET_64BIT ? (1 << 2)
: (1 << 1));
-
+
CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
@@ -4840,9 +4860,9 @@ ix86_valid_target_attribute_p (tree fndecl,
tree old_optimize = build_optimization_node (&global_options);
- /* Get the optimization options of the current function. */
+ /* Get the optimization options of the current function. */
tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
-
+
if (!func_optimize)
func_optimize = old_optimize;
@@ -4850,7 +4870,7 @@ ix86_valid_target_attribute_p (tree fndecl,
memset (&func_options, 0, sizeof (func_options));
init_options_struct (&func_options, NULL);
lang_hooks.init_options_struct (&func_options);
-
+
cl_optimization_restore (&func_options,
TREE_OPTIMIZATION (func_optimize));
@@ -5007,6 +5027,10 @@ ix86_in_large_data_p (tree exp)
if (TREE_CODE (exp) == FUNCTION_DECL)
return false;
+ /* Automatic variables are never large data. */
+ if (TREE_CODE (exp) == VAR_DECL && !is_global_var (exp))
+ return false;
+
if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
{
const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
@@ -5040,8 +5064,7 @@ ATTRIBUTE_UNUSED static section *
x86_64_elf_select_section (tree decl, int reloc,
unsigned HOST_WIDE_INT align)
{
- if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
- && ix86_in_large_data_p (decl))
+ if (ix86_in_large_data_p (decl))
{
const char *sname = NULL;
unsigned int flags = SECTION_WRITE;
@@ -5127,8 +5150,7 @@ x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
static void ATTRIBUTE_UNUSED
x86_64_elf_unique_section (tree decl, int reloc)
{
- if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
- && ix86_in_large_data_p (decl))
+ if (ix86_in_large_data_p (decl))
{
const char *prefix = NULL;
/* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
@@ -5197,7 +5219,7 @@ x86_elf_aligned_common (FILE *file,
{
if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
&& size > (unsigned int)ix86_section_threshold)
- fputs (".largecomm\t", file);
+ fputs ("\t.largecomm\t", file);
else
fputs (COMMON_ASM_OP, file);
assemble_name (file, name);
@@ -5976,7 +5998,18 @@ ix86_function_type_abi (const_tree fntype)
if (abi == SYSV_ABI)
{
if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
- abi = MS_ABI;
+ {
+ if (TARGET_X32)
+ {
+ static bool warned = false;
+ if (!warned)
+ {
+ error ("X32 does not support ms_abi attribute");
+ warned = true;
+ }
+ }
+ abi = MS_ABI;
+ }
}
else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
abi = SYSV_ABI;
@@ -6212,7 +6245,7 @@ init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
The midde-end can't deal with the vector types > 16 bytes. In this
case, we return the original mode and warn ABI change if CUM isn't
- NULL.
+ NULL.
If INT_RETURN is true, warn ABI change if the vector mode isn't
available for function return value. */
@@ -9083,20 +9116,22 @@ ix86_frame_pointer_required (void)
if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
return true;
- /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
- turns off the frame pointer by default. Turn it back on now if
- we've not got a leaf function. */
- if (TARGET_OMIT_LEAF_FRAME_POINTER
- && (!crtl->is_leaf
- || ix86_current_function_calls_tls_descriptor))
- return true;
-
if (crtl->profile && !flag_fentry)
return true;
return false;
}
+/* Return true if the frame pointer of the function could be omitted. */
+
+static bool
+ix86_can_omit_leaf_frame_pointer (void)
+{
+ return TARGET_OMIT_LEAF_FRAME_POINTER
+ && (crtl->is_leaf
+ && !ix86_current_function_calls_tls_descriptor);
+}
+
/* Record that the current function accesses previous call frames. */
void
@@ -9569,7 +9604,7 @@ ix86_compute_frame_layout (struct ix86_frame *frame)
offset += UNITS_PER_WORD;
/* Skip saved base pointer. */
- if (frame_pointer_needed)
+ if (frame_pointer_needed || frame_pointer_partially_needed)
offset += UNITS_PER_WORD;
frame->hfp_save_offset = offset;
@@ -10890,6 +10925,26 @@ ix86_expand_prologue (void)
m->fs.fp_valid = true;
}
}
+ else if (frame_pointer_partially_needed)
+ {
+ insn = emit_insn (gen_push (hard_frame_pointer_rtx));
+ RTX_FRAME_RELATED_P (insn) = 1;
+ if (fpset_needed_in_prologue)
+ {
+ insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
+ /* Using sp as cfa_reg will involve more .cfi_def_cfa_offset for
+ pushes in prologue, so use fp as cfa_reg to reduce .eh_frame
+ size when possible. */
+ if (!any_fp_def)
+ {
+ RTX_FRAME_RELATED_P (insn) = 1;
+ if (m->fs.cfa_reg == stack_pointer_rtx)
+ m->fs.cfa_reg = hard_frame_pointer_rtx;
+ m->fs.fp_offset = m->fs.sp_offset;
+ m->fs.fp_valid = true;
+ }
+ }
+ }
if (!int_registers_saved)
{
@@ -11067,6 +11122,10 @@ ix86_expand_prologue (void)
if (sp_is_cfa_reg)
m->fs.cfa_offset += UNITS_PER_WORD;
RTX_FRAME_RELATED_P (insn) = 1;
+ add_reg_note (insn, REG_FRAME_RELATED_EXPR,
+ gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+ plus_constant (Pmode, stack_pointer_rtx,
+ -UNITS_PER_WORD)));
}
}
@@ -11080,6 +11139,10 @@ ix86_expand_prologue (void)
if (sp_is_cfa_reg)
m->fs.cfa_offset += UNITS_PER_WORD;
RTX_FRAME_RELATED_P (insn) = 1;
+ add_reg_note (insn, REG_FRAME_RELATED_EXPR,
+ gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+ plus_constant (Pmode, stack_pointer_rtx,
+ -UNITS_PER_WORD)));
}
}
@@ -11231,6 +11294,34 @@ ix86_expand_prologue (void)
emit_insn (gen_prologue_use (stack_pointer_rtx));
}
+/* Get frame pointer setting insn based on frame state. */
+static rtx
+ix86_set_fp_insn ()
+{
+ rtx r, seq;
+ struct ix86_frame frame;
+ HOST_WIDE_INT offset;
+
+ ix86_compute_frame_layout (&frame);
+ gcc_assert (frame_pointer_partially_needed);
+ offset = frame.stack_pointer_offset - frame.hard_frame_pointer_offset;
+
+ if (TARGET_64BIT && (offset > 0x7fffffff))
+ {
+ r = gen_rtx_SET (DImode, hard_frame_pointer_rtx, GEN_INT (offset));
+ emit_insn (r);
+ r = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, stack_pointer_rtx);
+ r = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx, r);
+ }
+ else
+ {
+ r = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (offset));
+ r = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx, r);
+ }
+ emit_insn (r);
+ return r;
+}
+
/* Emit code to restore REG using a POP insn. */
static void
@@ -11415,7 +11506,11 @@ ix86_expand_epilogue (int style)
|| m->fs.sp_offset == frame.stack_pointer_offset);
/* The FP must be valid if the frame pointer is present. */
- gcc_assert (frame_pointer_needed == m->fs.fp_valid);
+ if (!frame_pointer_partially_needed)
+ gcc_assert (frame_pointer_needed == m->fs.fp_valid);
+ else
+ gcc_assert (!(any_fp_def && m->fs.fp_valid));
+
gcc_assert (!m->fs.fp_valid
|| m->fs.fp_offset == frame.hard_frame_pointer_offset);
@@ -11619,7 +11714,7 @@ ix86_expand_epilogue (int style)
/* If we used a stack pointer and haven't already got rid of it,
then do so now. */
- if (m->fs.fp_valid)
+ if (m->fs.fp_valid || frame_pointer_partially_needed)
{
/* If the stack pointer is valid and pointing at the frame
pointer store address, then we only need a pop. */
@@ -11627,15 +11722,20 @@ ix86_expand_epilogue (int style)
ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
/* Leave results in shorter dependency chains on CPUs that are
able to grok it fast. */
- else if (TARGET_USE_LEAVE
- || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
- || !cfun->machine->use_fast_prologue_epilogue)
+ else if (m->fs.fp_valid
+ && (TARGET_USE_LEAVE
+ || optimize_function_for_size_p (cfun)
+ || !cfun->machine->use_fast_prologue_epilogue))
ix86_emit_leave ();
else
{
+ rtx dest, offset;
+ dest = (m->fs.fp_valid) ? hard_frame_pointer_rtx : stack_pointer_rtx;
+ offset = (m->fs.fp_valid) ? const0_rtx :
+ GEN_INT (m->fs.sp_offset - frame.hfp_save_offset);
pro_epilogue_adjust_stack (stack_pointer_rtx,
- hard_frame_pointer_rtx,
- const0_rtx, style, !using_drap);
+ dest,
+ offset, style, !using_drap);
ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
}
}
@@ -11947,7 +12047,7 @@ ix86_output_function_nops_prologue_epilogue (FILE *file,
fprintf (file, "\n");
/* Switching back to text section. */
- switch_to_section (function_section (current_function_decl));
+ switch_to_section (current_function_section ());
return true;
}
@@ -12379,7 +12479,7 @@ ix86_decompose_address (rtx addr, struct ix86_address *out)
addr = XEXP (addr, 0);
if (CONST_INT_P (addr))
return 0;
- }
+ }
else if (GET_CODE (addr) == AND
&& const_32bit_mask (XEXP (addr, 1), DImode))
{
@@ -12905,8 +13005,16 @@ legitimate_pic_address_disp_p (rtx disp)
return true;
}
else if (!SYMBOL_REF_FAR_ADDR_P (op0)
- && (SYMBOL_REF_LOCAL_P (op0)
- || (TARGET_64BIT && ix86_pie_copyrelocs && flag_pie
+ && (SYMBOL_REF_LOCAL_P (op0)
+ || (HAVE_LD_PIE_COPYRELOC
+ && flag_pie
+ && !(SYMBOL_REF_WEAK (op0)
+ /* TODO:Temporary fix for weak defined symbols. Weak defined
+ symbols in an executable cannot be overridden even with
+ a non-weak symbol in a shared library.
+ Revert after fix is checked in here:
+ http://gcc.gnu.org/ml/gcc-patches/2015-02/msg00366.html*/
+ && SYMBOL_REF_EXTERNAL_P (op0))
&& !SYMBOL_REF_FUNCTION_P (op0)))
&& ix86_cmodel != CM_LARGE_PIC)
return true;
@@ -13010,7 +13118,7 @@ ix86_legitimize_reload_address (rtx x,
(reg:DI 2 cx))
This RTX is rejected from ix86_legitimate_address_p due to
- non-strictness of base register 97. Following this rejection,
+ non-strictness of base register 97. Following this rejection,
reload pushes all three components into separate registers,
creating invalid memory address RTX.
@@ -13025,7 +13133,7 @@ ix86_legitimize_reload_address (rtx x,
rtx base, index;
bool something_reloaded = false;
- base = XEXP (XEXP (x, 0), 1);
+ base = XEXP (XEXP (x, 0), 1);
if (!REG_OK_FOR_BASE_STRICT_P (base))
{
push_reload (base, NULL_RTX, &XEXP (XEXP (x, 0), 1), NULL,
@@ -13929,7 +14037,7 @@ get_dllimport_decl (tree decl, bool beimport)
#ifdef SUB_TARGET_RECORD_STUB
SUB_TARGET_RECORD_STUB (name);
#endif
- }
+ }
rtl = gen_const_mem (Pmode, rtl);
set_mem_alias_set (rtl, ix86_GOT_alias_set ());
@@ -13976,7 +14084,7 @@ legitimize_dllimport_symbol (rtx symbol, bool want_reg)
return x;
}
-/* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
+/* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
is true if we require the result be a register. */
static rtx
@@ -14749,7 +14857,7 @@ put_condition_code (enum rtx_code code, enum machine_mode mode, bool reverse,
if (mode == CCmode)
suffix = "b";
else if (mode == CCCmode)
- suffix = "c";
+ suffix = fp ? "b" : "c";
else
gcc_unreachable ();
break;
@@ -14772,9 +14880,9 @@ put_condition_code (enum rtx_code code, enum machine_mode mode, bool reverse,
break;
case GEU:
if (mode == CCmode)
- suffix = fp ? "nb" : "ae";
+ suffix = "nb";
else if (mode == CCCmode)
- suffix = "nc";
+ suffix = fp ? "nb" : "nc";
else
gcc_unreachable ();
break;
@@ -15109,7 +15217,7 @@ ix86_print_operand (FILE *file, rtx x, int code)
case 2:
putc ('w', file);
break;
-
+
case 4:
putc ('l', file);
break;
@@ -16408,7 +16516,7 @@ ix86_mode_needed (int entity, rtx insn)
}
/* Check if a 256bit AVX register is referenced in stores. */
-
+
static void
ix86_check_avx256_stores (rtx dest, const_rtx set ATTRIBUTE_UNUSED, void *data)
{
@@ -16417,7 +16525,7 @@ ix86_check_avx256_stores (rtx dest, const_rtx set ATTRIBUTE_UNUSED, void *data)
bool *used = (bool *) data;
*used = true;
}
- }
+ }
/* Calculate mode of upper 128bit AVX registers after the insn. */
@@ -17463,7 +17571,7 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
t = gen_reg_rtx (V4SFmode);
else
t = op0;
-
+
if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
emit_move_insn (t, CONST0_RTX (V4SFmode));
else
@@ -18527,7 +18635,7 @@ ix86_emit_binop (enum rtx_code code, enum machine_mode mode,
op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, dst, src));
clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
-
+
emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
}
@@ -21852,7 +21960,7 @@ ix86_expand_vec_perm (rtx operands[])
if (TARGET_XOP)
{
- /* The XOP VPPERM insn supports three inputs. By ignoring the
+ /* The XOP VPPERM insn supports three inputs. By ignoring the
one_operand_shuffle special case, we avoid creating another
set of constant vectors in memory. */
one_operand_shuffle = false;
@@ -23708,7 +23816,7 @@ expand_small_movmem_or_setmem (rtx destmem, rtx srcmem,
DONE_LABEL is a label after the whole copying sequence. The label is created
on demand if *DONE_LABEL is NULL.
MIN_SIZE is minimal size of block copied. This value gets adjusted for new
- bounds after the initial copies.
+ bounds after the initial copies.
DESTMEM/SRCMEM are memory expressions pointing to the copies block,
DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
@@ -24013,7 +24121,7 @@ expand_set_or_movmem_constant_prologue (rtx dst, rtx *srcp, rtx destreg,
return dst;
}
-/* Return true if ALG can be used in current context.
+/* Return true if ALG can be used in current context.
Assume we expand memset if MEMSET is true. */
static bool
alg_usable_p (enum stringop_alg alg, bool memset)
@@ -24136,7 +24244,8 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
*noalign = alg_noalign;
return alg;
}
- break;
+ else if (!any_alg_usable_p)
+ break;
}
else if (alg_usable_p (candidate, memset))
{
@@ -24174,9 +24283,10 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
alg = decide_alg (count, max / 2, min_size, max_size, memset,
zero_memset, dynamic_check, noalign);
gcc_assert (*dynamic_check == -1);
- gcc_assert (alg != libcall);
if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
*dynamic_check = max;
+ else
+ gcc_assert (alg != libcall);
return alg;
}
return (alg_usable_p (algs->unknown_size, memset)
@@ -24336,7 +24446,7 @@ promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align,
with specified algorithm.
4) Epilogue: code copying tail of the block that is too small to be
- handled by main body (or up to size guarded by prologue guard).
+ handled by main body (or up to size guarded by prologue guard).
Misaligned move sequence
@@ -24531,7 +24641,7 @@ ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
/* Do the cheap promotion to allow better CSE across the
main loop and epilogue (ie one load of the big constant in the
- front of all code.
+ front of all code.
For now the misaligned move sequences do not have fast path
without broadcasting. */
if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used)))
@@ -25103,13 +25213,19 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
}
else
{
- /* Static functions and indirect calls don't need the pic register. */
+ /* Static functions and indirect calls don't need the pic register. Also,
+ check if PLT was explicitly avoided via no-plt or "noplt" attribute, making
+ it an indirect call. */
if (flag_pic
&& (!TARGET_64BIT
|| (ix86_cmodel == CM_LARGE_PIC
&& DEFAULT_ABI != MS_ABI))
&& GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
- && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
+ && !SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0))
+ && flag_plt
+ && (SYMBOL_REF_DECL ((XEXP (fnaddr, 0))) == NULL_TREE
+ || !lookup_attribute ("noplt",
+ DECL_ATTRIBUTES (SYMBOL_REF_DECL (XEXP (fnaddr, 0))))))
use_reg (&use, pic_offset_table_rtx);
}
@@ -25173,6 +25289,31 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
return call;
}
+/* Return true if the function being called was marked with attribute "noplt"
+ or using -fno-plt and we are compiling for non-PIC and x86_64. We need to
+ handle the non-PIC case in the backend because there is no easy interface
+ for the front-end to force non-PLT calls to use the GOT. This is currently
+ used only with 64-bit ELF targets to call the function marked "noplt"
+ indirectly. */
+
+static bool
+ix86_nopic_noplt_attribute_p (rtx call_op)
+{
+ if (flag_pic || ix86_cmodel == CM_LARGE
+ || !TARGET_64BIT || TARGET_MACHO || TARGET_SEH || TARGET_PECOFF
+ || SYMBOL_REF_LOCAL_P (call_op))
+ return false;
+
+ tree symbol_decl = SYMBOL_REF_DECL (call_op);
+
+ if (!flag_plt
+ || (symbol_decl != NULL_TREE
+ && lookup_attribute ("noplt", DECL_ATTRIBUTES (symbol_decl))))
+ return true;
+
+ return false;
+}
+
/* Output the assembly for a call instruction. */
const char *
@@ -25184,7 +25325,9 @@ ix86_output_call_insn (rtx insn, rtx call_op)
if (SIBLING_CALL_P (insn))
{
- if (direct_p)
+ if (direct_p && ix86_nopic_noplt_attribute_p (call_op))
+ xasm = "jmp\t*%p0@GOTPCREL(%%rip)";
+ else if (direct_p)
xasm = "jmp\t%P0";
/* SEH epilogue detection requires the indirect branch case
to include REX.W. */
@@ -25236,7 +25379,9 @@ ix86_output_call_insn (rtx insn, rtx call_op)
seh_nop_p = true;
}
- if (direct_p)
+ if (direct_p && ix86_nopic_noplt_attribute_p (call_op))
+ xasm = "call\t*%p0@GOTPCREL(%%rip)";
+ else if (direct_p)
xasm = "call\t%P0";
else
xasm = "call\t%A0";
@@ -26506,7 +26651,7 @@ ix86_dependencies_evaluation_hook (rtx head, rtx tail)
using topological ordering in the region. */
if (rgn == CONTAINING_RGN (e->src->index)
&& BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
- add_dependee_for_func_arg (first_arg, e->src);
+ add_dependee_for_func_arg (first_arg, e->src);
}
}
insn = first_arg;
@@ -26974,7 +27119,7 @@ ix86_local_alignment (tree exp, enum machine_mode mode,
other unit can not rely on the alignment.
Exclude va_list type. It is the common case of local array where
- we can not benefit from the alignment.
+ we can not benefit from the alignment.
TODO: Probably one should optimize for size only when var is not escaping. */
if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
@@ -31443,7 +31588,7 @@ add_condition_to_bb (tree function_decl, tree version_decl,
convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
build_fold_addr_expr (version_decl));
result_var = create_tmp_var (ptr_type_node, NULL);
- convert_stmt = gimple_build_assign (result_var, convert_expr);
+ convert_stmt = gimple_build_assign (result_var, convert_expr);
return_stmt = gimple_build_return (result_var);
if (predicate_chain == NULL_TREE)
@@ -31470,7 +31615,7 @@ add_condition_to_bb (tree function_decl, tree version_decl,
gimple_seq_add_stmt (&gseq, call_cond_stmt);
predicate_chain = TREE_CHAIN (predicate_chain);
-
+
if (and_expr_var == NULL)
and_expr_var = cond_var;
else
@@ -31511,7 +31656,7 @@ add_condition_to_bb (tree function_decl, tree version_decl,
gimple_set_bb (return_stmt, bb2);
bb3 = e23->dest;
- make_edge (bb1, bb3, EDGE_FALSE_VALUE);
+ make_edge (bb1, bb3, EDGE_FALSE_VALUE);
remove_edge (e23);
make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
@@ -31563,7 +31708,7 @@ get_builtin_code_for_version (tree decl, tree *predicate_list)
P_FMA4,
P_XOP,
P_PROC_XOP,
- P_FMA,
+ P_FMA,
P_PROC_FMA,
P_AVX2,
P_PROC_AVX2
@@ -31628,11 +31773,11 @@ get_builtin_code_for_version (tree decl, tree *predicate_list)
cl_target_option_save (&cur_target, &global_options);
target_node = ix86_valid_target_attribute_tree (attrs, &global_options,
&global_options_set);
-
+
gcc_assert (target_node);
new_target = TREE_TARGET_OPTION (target_node);
gcc_assert (new_target);
-
+
if (new_target->arch_specified && new_target->arch > 0)
{
switch (new_target->arch)
@@ -31701,18 +31846,18 @@ get_builtin_code_for_version (tree decl, tree *predicate_list)
arg_str = "bdver4";
priority = P_PROC_AVX2;
break;
- }
- }
-
+ }
+ }
+
cl_target_option_restore (&global_options, &cur_target);
-
+
if (predicate_list && arg_str == NULL)
{
error_at (DECL_SOURCE_LOCATION (decl),
"No dispatcher found for the versioning attributes");
return 0;
}
-
+
if (predicate_list)
{
predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS];
@@ -31779,7 +31924,7 @@ get_builtin_code_for_version (tree decl, tree *predicate_list)
*predicate_list = predicate_chain;
}
- return priority;
+ return priority;
}
/* This compares the priority of target features in function DECL1
@@ -31798,7 +31943,7 @@ ix86_compare_version_priority (tree decl1, tree decl2)
/* V1 and V2 point to function versions with different priorities
based on the target ISA. This function compares their priorities. */
-
+
static int
feature_compare (const void *v1, const void *v2)
{
@@ -32111,12 +32256,12 @@ ix86_function_versions (tree fn1, tree fn2)
result = true;
XDELETEVEC (target1);
- XDELETEVEC (target2);
-
+ XDELETEVEC (target2);
+
return result;
}
-static tree
+static tree
ix86_mangle_decl_assembler_name (tree decl, tree id)
{
/* For function version, add the target suffix to the assembler name. */
@@ -32186,7 +32331,7 @@ make_dispatcher_decl (const tree decl)
fn_type = TREE_TYPE (decl);
func_type = build_function_type (TREE_TYPE (fn_type),
TYPE_ARG_TYPES (fn_type));
-
+
func_decl = build_fn_decl (func_name, func_type);
XDELETEVEC (func_name);
TREE_USED (func_decl) = 1;
@@ -32199,7 +32344,7 @@ make_dispatcher_decl (const tree decl)
/* This will be of type IFUNCs have to be externally visible. */
TREE_PUBLIC (func_decl) = 1;
- return func_decl;
+ return func_decl;
}
#endif
@@ -32236,7 +32381,7 @@ ix86_get_function_versions_dispatcher (void *decl)
tree dispatch_decl = NULL;
struct cgraph_function_version_info *default_version_info = NULL;
-
+
gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
node = cgraph_get_node (fn);
@@ -32244,7 +32389,7 @@ ix86_get_function_versions_dispatcher (void *decl)
node_v = get_cgraph_node_version (node);
gcc_assert (node_v != NULL);
-
+
if (node_v->dispatcher_resolver != NULL)
return node_v->dispatcher_resolver;
@@ -32409,7 +32554,7 @@ make_resolver_func (const tree default_decl,
gcc_assert (dispatch_decl != NULL);
/* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
- DECL_ATTRIBUTES (dispatch_decl)
+ DECL_ATTRIBUTES (dispatch_decl)
= make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
/* Create the alias for dispatch to resolver here. */
@@ -32424,7 +32569,7 @@ make_resolver_func (const tree default_decl,
provide the code to dispatch the right function at run-time. NODE points
to the dispatcher decl whose body will be created. */
-static tree
+static tree
ix86_generate_version_dispatcher_body (void *node_p)
{
tree resolver_decl;
@@ -32476,7 +32621,7 @@ ix86_generate_version_dispatcher_body (void *node_p)
}
dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
- rebuild_cgraph_edges ();
+ rebuild_cgraph_edges ();
pop_cfun ();
return resolver_decl;
}
@@ -32587,7 +32732,7 @@ fold_builtin_cpu (tree fndecl, tree *args)
M_AMDFAM15H,
M_INTEL_SILVERMONT,
M_AMD_BTVER1,
- M_AMD_BTVER2,
+ M_AMD_BTVER2,
M_CPU_SUBTYPE_START,
M_INTEL_COREI7_NEHALEM,
M_INTEL_COREI7_WESTMERE,
@@ -32627,13 +32772,13 @@ fold_builtin_cpu (tree fndecl, tree *args)
{"barcelona", M_AMDFAM10H_BARCELONA},
{"shanghai", M_AMDFAM10H_SHANGHAI},
{"istanbul", M_AMDFAM10H_ISTANBUL},
- {"btver1", M_AMD_BTVER1},
+ {"btver1", M_AMD_BTVER1},
{"amdfam15h", M_AMDFAM15H},
{"bdver1", M_AMDFAM15H_BDVER1},
{"bdver2", M_AMDFAM15H_BDVER2},
{"bdver3", M_AMDFAM15H_BDVER3},
{"bdver4", M_AMDFAM15H_BDVER4},
- {"btver2", M_AMD_BTVER2},
+ {"btver2", M_AMD_BTVER2},
};
static struct _isa_names_table
@@ -35238,9 +35383,9 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
{
/* Make it call __cpu_indicator_init in libgcc. */
tree call_expr, fndecl, type;
- type = build_function_type_list (integer_type_node, NULL_TREE);
+ type = build_function_type_list (integer_type_node, NULL_TREE);
fndecl = build_fn_decl ("__cpu_indicator_init", type);
- call_expr = build_call_expr (fndecl, 0);
+ call_expr = build_call_expr (fndecl, 0);
return expand_expr (call_expr, target, mode, EXPAND_NORMAL);
}
case IX86_BUILTIN_CPU_IS:
@@ -41332,8 +41477,8 @@ ix86_encode_section_info (tree decl, rtx rtl, int first)
{
default_encode_section_info (decl, rtl, first);
- if (TREE_CODE (decl) == VAR_DECL
- && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
+ if (((TREE_CODE (decl) == VAR_DECL && is_global_var (decl))
+ || TREE_CODE(decl) == STRING_CST)
&& ix86_in_large_data_p (decl))
SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
}
@@ -42957,8 +43102,8 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
op0 = gen_lowpart (V4DImode, d->op0);
op1 = gen_lowpart (V4DImode, d->op1);
rperm[0]
- = GEN_INT (((d->perm[0] & (nelt / 2)) ? 1 : 0)
- || ((d->perm[nelt / 2] & (nelt / 2)) ? 2 : 0));
+ = GEN_INT ((d->perm[0] / (nelt / 2))
+ | ((d->perm[nelt / 2] / (nelt / 2)) * 16));
emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
if (target != d->target)
emit_move_insn (d->target, gen_lowpart (d->vmode, target));
@@ -47277,6 +47422,9 @@ adjacent_mem_locations (rtx mem1, rtx mem2)
#undef TARGET_PROFILE_BEFORE_PROLOGUE
#define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
+#undef TARGET_SET_FP_INSN
+#define TARGET_SET_FP_INSN ix86_set_fp_insn
+
#undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
#define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
@@ -47562,6 +47710,9 @@ adjacent_mem_locations (rtx mem1, rtx mem2)
#undef TARGET_FRAME_POINTER_REQUIRED
#define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
+#undef TARGET_CAN_OMIT_LEAF_FRAME_POINTER
+#define TARGET_CAN_OMIT_LEAF_FRAME_POINTER ix86_can_omit_leaf_frame_pointer
+
#undef TARGET_CAN_ELIMINATE
#define TARGET_CAN_ELIMINATE ix86_can_eliminate
@@ -47601,6 +47752,8 @@ adjacent_mem_locations (rtx mem1, rtx mem2)
#define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
ix86_float_exceptions_rounding_supported_p
+#undef TARGET_STRICT_ALIGN
+#define TARGET_STRICT_ALIGN true
struct gcc_target targetm = TARGET_INITIALIZER;
#include "gt-i386.h"