diff options
Diffstat (limited to 'gcc-4.9/gcc/config/i386/i386.c')
-rw-r--r-- | gcc-4.9/gcc/config/i386/i386.c | 349 |
1 files changed, 251 insertions, 98 deletions
diff --git a/gcc-4.9/gcc/config/i386/i386.c b/gcc-4.9/gcc/config/i386/i386.c index a598b8eef..54942d520 100644 --- a/gcc-4.9/gcc/config/i386/i386.c +++ b/gcc-4.9/gcc/config/i386/i386.c @@ -2465,7 +2465,7 @@ struct ptt const int align_func; }; -/* This table must be in sync with enum processor_type in i386.h. */ +/* This table must be in sync with enum processor_type in i386.h. */ static const struct ptt processor_target_table[PROCESSOR_max] = { {"generic", &generic_cost, 16, 10, 16, 10, 16}, @@ -3257,14 +3257,14 @@ ix86_option_override_internal (bool main_args_p, | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C - | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE + | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE}, {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4, PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1 - | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2 - | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2 - | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR + | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2 + | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2 + | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND | PTA_MOVBE}, {"btver1", PROCESSOR_BTVER1, CPU_GENERIC, @@ -3334,8 +3334,9 @@ ix86_option_override_internal (bool main_args_p, /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is on and OPTION_MASK_ABI_64 is off. We turn off OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by - -m64. */ - if (TARGET_LP64_P (opts->x_ix86_isa_flags)) + -m64 or OPTION_MASK_CODE16 is turned on by -m16. */ + if (TARGET_LP64_P (opts->x_ix86_isa_flags) + || TARGET_16BIT_P (opts->x_ix86_isa_flags)) opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32; #endif } @@ -3846,11 +3847,30 @@ ix86_option_override_internal (bool main_args_p, opts->x_target_flags |= MASK_NO_RED_ZONE; } + if (!global_options_set.x_flag_shrink_wrap_frame_pointer) + flag_shrink_wrap_frame_pointer = 1; + + /* -fshrink-wrap-frame-pointer is an optimization based on + -fno-omit-frame-pointer mode, so it is only effective when + flag_omit_frame_pointer is false. + Frame pointer shrinkwrap may increase code size, so disable + it when optimize_size is true. */ + if (flag_omit_frame_pointer + || optimize == 0 + || optimize_size) + flag_shrink_wrap_frame_pointer = 0; + + /* If only no -mno-omit-leaf-frame-pointer is explicitly specified, + -fshrink_wrap_frame_pointer will enable omitting leaf frame + pointer by default. */ + if (flag_shrink_wrap_frame_pointer + && !(TARGET_OMIT_LEAF_FRAME_POINTER_P (opts_set->x_target_flags) + && !TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags))) + opts->x_target_flags |= MASK_OMIT_LEAF_FRAME_POINTER; + /* Keep nonleaf frame pointers. */ if (opts->x_flag_omit_frame_pointer) opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER; - else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags)) - opts->x_flag_omit_frame_pointer = 1; /* If we're doing fast math, we don't care about comparison order wrt NaNs. This lets us use a shorter comparison sequence. */ @@ -3969,7 +3989,7 @@ ix86_option_override_internal (bool main_args_p, /* For all chips supporting SSE2, -mfpmath=sse performs better than fpmath=387. The second is however default at many targets since the extra 80bit precision of temporaries is considered to be part of ABI. - Overwrite the default at least for -ffast-math. + Overwrite the default at least for -ffast-math. TODO: -mfpmath=both seems to produce same performing code with bit smaller binaries. It is however not clear if register allocation is ready for this setting. @@ -4291,7 +4311,7 @@ ix86_conditional_register_usage (void) c_mask = (TARGET_64BIT_MS_ABI ? (1 << 3) : TARGET_64BIT ? (1 << 2) : (1 << 1)); - + CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]); for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) @@ -4840,9 +4860,9 @@ ix86_valid_target_attribute_p (tree fndecl, tree old_optimize = build_optimization_node (&global_options); - /* Get the optimization options of the current function. */ + /* Get the optimization options of the current function. */ tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl); - + if (!func_optimize) func_optimize = old_optimize; @@ -4850,7 +4870,7 @@ ix86_valid_target_attribute_p (tree fndecl, memset (&func_options, 0, sizeof (func_options)); init_options_struct (&func_options, NULL); lang_hooks.init_options_struct (&func_options); - + cl_optimization_restore (&func_options, TREE_OPTIMIZATION (func_optimize)); @@ -5007,6 +5027,10 @@ ix86_in_large_data_p (tree exp) if (TREE_CODE (exp) == FUNCTION_DECL) return false; + /* Automatic variables are never large data. */ + if (TREE_CODE (exp) == VAR_DECL && !is_global_var (exp)) + return false; + if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp)) { const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp)); @@ -5040,8 +5064,7 @@ ATTRIBUTE_UNUSED static section * x86_64_elf_select_section (tree decl, int reloc, unsigned HOST_WIDE_INT align) { - if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC) - && ix86_in_large_data_p (decl)) + if (ix86_in_large_data_p (decl)) { const char *sname = NULL; unsigned int flags = SECTION_WRITE; @@ -5127,8 +5150,7 @@ x86_64_elf_section_type_flags (tree decl, const char *name, int reloc) static void ATTRIBUTE_UNUSED x86_64_elf_unique_section (tree decl, int reloc) { - if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC) - && ix86_in_large_data_p (decl)) + if (ix86_in_large_data_p (decl)) { const char *prefix = NULL; /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */ @@ -5197,7 +5219,7 @@ x86_elf_aligned_common (FILE *file, { if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC) && size > (unsigned int)ix86_section_threshold) - fputs (".largecomm\t", file); + fputs ("\t.largecomm\t", file); else fputs (COMMON_ASM_OP, file); assemble_name (file, name); @@ -5976,7 +5998,18 @@ ix86_function_type_abi (const_tree fntype) if (abi == SYSV_ABI) { if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype))) - abi = MS_ABI; + { + if (TARGET_X32) + { + static bool warned = false; + if (!warned) + { + error ("X32 does not support ms_abi attribute"); + warned = true; + } + } + abi = MS_ABI; + } } else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype))) abi = SYSV_ABI; @@ -6212,7 +6245,7 @@ init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */ The midde-end can't deal with the vector types > 16 bytes. In this case, we return the original mode and warn ABI change if CUM isn't - NULL. + NULL. If INT_RETURN is true, warn ABI change if the vector mode isn't available for function return value. */ @@ -9083,20 +9116,22 @@ ix86_frame_pointer_required (void) if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE) return true; - /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER - turns off the frame pointer by default. Turn it back on now if - we've not got a leaf function. */ - if (TARGET_OMIT_LEAF_FRAME_POINTER - && (!crtl->is_leaf - || ix86_current_function_calls_tls_descriptor)) - return true; - if (crtl->profile && !flag_fentry) return true; return false; } +/* Return true if the frame pointer of the function could be omitted. */ + +static bool +ix86_can_omit_leaf_frame_pointer (void) +{ + return TARGET_OMIT_LEAF_FRAME_POINTER + && (crtl->is_leaf + && !ix86_current_function_calls_tls_descriptor); +} + /* Record that the current function accesses previous call frames. */ void @@ -9569,7 +9604,7 @@ ix86_compute_frame_layout (struct ix86_frame *frame) offset += UNITS_PER_WORD; /* Skip saved base pointer. */ - if (frame_pointer_needed) + if (frame_pointer_needed || frame_pointer_partially_needed) offset += UNITS_PER_WORD; frame->hfp_save_offset = offset; @@ -10890,6 +10925,26 @@ ix86_expand_prologue (void) m->fs.fp_valid = true; } } + else if (frame_pointer_partially_needed) + { + insn = emit_insn (gen_push (hard_frame_pointer_rtx)); + RTX_FRAME_RELATED_P (insn) = 1; + if (fpset_needed_in_prologue) + { + insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx); + /* Using sp as cfa_reg will involve more .cfi_def_cfa_offset for + pushes in prologue, so use fp as cfa_reg to reduce .eh_frame + size when possible. */ + if (!any_fp_def) + { + RTX_FRAME_RELATED_P (insn) = 1; + if (m->fs.cfa_reg == stack_pointer_rtx) + m->fs.cfa_reg = hard_frame_pointer_rtx; + m->fs.fp_offset = m->fs.sp_offset; + m->fs.fp_valid = true; + } + } + } if (!int_registers_saved) { @@ -11067,6 +11122,10 @@ ix86_expand_prologue (void) if (sp_is_cfa_reg) m->fs.cfa_offset += UNITS_PER_WORD; RTX_FRAME_RELATED_P (insn) = 1; + add_reg_note (insn, REG_FRAME_RELATED_EXPR, + gen_rtx_SET (VOIDmode, stack_pointer_rtx, + plus_constant (Pmode, stack_pointer_rtx, + -UNITS_PER_WORD))); } } @@ -11080,6 +11139,10 @@ ix86_expand_prologue (void) if (sp_is_cfa_reg) m->fs.cfa_offset += UNITS_PER_WORD; RTX_FRAME_RELATED_P (insn) = 1; + add_reg_note (insn, REG_FRAME_RELATED_EXPR, + gen_rtx_SET (VOIDmode, stack_pointer_rtx, + plus_constant (Pmode, stack_pointer_rtx, + -UNITS_PER_WORD))); } } @@ -11231,6 +11294,34 @@ ix86_expand_prologue (void) emit_insn (gen_prologue_use (stack_pointer_rtx)); } +/* Get frame pointer setting insn based on frame state. */ +static rtx +ix86_set_fp_insn () +{ + rtx r, seq; + struct ix86_frame frame; + HOST_WIDE_INT offset; + + ix86_compute_frame_layout (&frame); + gcc_assert (frame_pointer_partially_needed); + offset = frame.stack_pointer_offset - frame.hard_frame_pointer_offset; + + if (TARGET_64BIT && (offset > 0x7fffffff)) + { + r = gen_rtx_SET (DImode, hard_frame_pointer_rtx, GEN_INT (offset)); + emit_insn (r); + r = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, stack_pointer_rtx); + r = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx, r); + } + else + { + r = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (offset)); + r = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx, r); + } + emit_insn (r); + return r; +} + /* Emit code to restore REG using a POP insn. */ static void @@ -11415,7 +11506,11 @@ ix86_expand_epilogue (int style) || m->fs.sp_offset == frame.stack_pointer_offset); /* The FP must be valid if the frame pointer is present. */ - gcc_assert (frame_pointer_needed == m->fs.fp_valid); + if (!frame_pointer_partially_needed) + gcc_assert (frame_pointer_needed == m->fs.fp_valid); + else + gcc_assert (!(any_fp_def && m->fs.fp_valid)); + gcc_assert (!m->fs.fp_valid || m->fs.fp_offset == frame.hard_frame_pointer_offset); @@ -11619,7 +11714,7 @@ ix86_expand_epilogue (int style) /* If we used a stack pointer and haven't already got rid of it, then do so now. */ - if (m->fs.fp_valid) + if (m->fs.fp_valid || frame_pointer_partially_needed) { /* If the stack pointer is valid and pointing at the frame pointer store address, then we only need a pop. */ @@ -11627,15 +11722,20 @@ ix86_expand_epilogue (int style) ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx); /* Leave results in shorter dependency chains on CPUs that are able to grok it fast. */ - else if (TARGET_USE_LEAVE - || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun)) - || !cfun->machine->use_fast_prologue_epilogue) + else if (m->fs.fp_valid + && (TARGET_USE_LEAVE + || optimize_function_for_size_p (cfun) + || !cfun->machine->use_fast_prologue_epilogue)) ix86_emit_leave (); else { + rtx dest, offset; + dest = (m->fs.fp_valid) ? hard_frame_pointer_rtx : stack_pointer_rtx; + offset = (m->fs.fp_valid) ? const0_rtx : + GEN_INT (m->fs.sp_offset - frame.hfp_save_offset); pro_epilogue_adjust_stack (stack_pointer_rtx, - hard_frame_pointer_rtx, - const0_rtx, style, !using_drap); + dest, + offset, style, !using_drap); ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx); } } @@ -11947,7 +12047,7 @@ ix86_output_function_nops_prologue_epilogue (FILE *file, fprintf (file, "\n"); /* Switching back to text section. */ - switch_to_section (function_section (current_function_decl)); + switch_to_section (current_function_section ()); return true; } @@ -12379,7 +12479,7 @@ ix86_decompose_address (rtx addr, struct ix86_address *out) addr = XEXP (addr, 0); if (CONST_INT_P (addr)) return 0; - } + } else if (GET_CODE (addr) == AND && const_32bit_mask (XEXP (addr, 1), DImode)) { @@ -12905,8 +13005,16 @@ legitimate_pic_address_disp_p (rtx disp) return true; } else if (!SYMBOL_REF_FAR_ADDR_P (op0) - && (SYMBOL_REF_LOCAL_P (op0) - || (TARGET_64BIT && ix86_pie_copyrelocs && flag_pie + && (SYMBOL_REF_LOCAL_P (op0) + || (HAVE_LD_PIE_COPYRELOC + && flag_pie + && !(SYMBOL_REF_WEAK (op0) + /* TODO:Temporary fix for weak defined symbols. Weak defined + symbols in an executable cannot be overridden even with + a non-weak symbol in a shared library. + Revert after fix is checked in here: + http://gcc.gnu.org/ml/gcc-patches/2015-02/msg00366.html*/ + && SYMBOL_REF_EXTERNAL_P (op0)) && !SYMBOL_REF_FUNCTION_P (op0))) && ix86_cmodel != CM_LARGE_PIC) return true; @@ -13010,7 +13118,7 @@ ix86_legitimize_reload_address (rtx x, (reg:DI 2 cx)) This RTX is rejected from ix86_legitimate_address_p due to - non-strictness of base register 97. Following this rejection, + non-strictness of base register 97. Following this rejection, reload pushes all three components into separate registers, creating invalid memory address RTX. @@ -13025,7 +13133,7 @@ ix86_legitimize_reload_address (rtx x, rtx base, index; bool something_reloaded = false; - base = XEXP (XEXP (x, 0), 1); + base = XEXP (XEXP (x, 0), 1); if (!REG_OK_FOR_BASE_STRICT_P (base)) { push_reload (base, NULL_RTX, &XEXP (XEXP (x, 0), 1), NULL, @@ -13929,7 +14037,7 @@ get_dllimport_decl (tree decl, bool beimport) #ifdef SUB_TARGET_RECORD_STUB SUB_TARGET_RECORD_STUB (name); #endif - } + } rtl = gen_const_mem (Pmode, rtl); set_mem_alias_set (rtl, ix86_GOT_alias_set ()); @@ -13976,7 +14084,7 @@ legitimize_dllimport_symbol (rtx symbol, bool want_reg) return x; } -/* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG +/* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG is true if we require the result be a register. */ static rtx @@ -14749,7 +14857,7 @@ put_condition_code (enum rtx_code code, enum machine_mode mode, bool reverse, if (mode == CCmode) suffix = "b"; else if (mode == CCCmode) - suffix = "c"; + suffix = fp ? "b" : "c"; else gcc_unreachable (); break; @@ -14772,9 +14880,9 @@ put_condition_code (enum rtx_code code, enum machine_mode mode, bool reverse, break; case GEU: if (mode == CCmode) - suffix = fp ? "nb" : "ae"; + suffix = "nb"; else if (mode == CCCmode) - suffix = "nc"; + suffix = fp ? "nb" : "nc"; else gcc_unreachable (); break; @@ -15109,7 +15217,7 @@ ix86_print_operand (FILE *file, rtx x, int code) case 2: putc ('w', file); break; - + case 4: putc ('l', file); break; @@ -16408,7 +16516,7 @@ ix86_mode_needed (int entity, rtx insn) } /* Check if a 256bit AVX register is referenced in stores. */ - + static void ix86_check_avx256_stores (rtx dest, const_rtx set ATTRIBUTE_UNUSED, void *data) { @@ -16417,7 +16525,7 @@ ix86_check_avx256_stores (rtx dest, const_rtx set ATTRIBUTE_UNUSED, void *data) bool *used = (bool *) data; *used = true; } - } + } /* Calculate mode of upper 128bit AVX registers after the insn. */ @@ -17463,7 +17571,7 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[]) t = gen_reg_rtx (V4SFmode); else t = op0; - + if (TARGET_SSE_PARTIAL_REG_DEPENDENCY) emit_move_insn (t, CONST0_RTX (V4SFmode)); else @@ -18527,7 +18635,7 @@ ix86_emit_binop (enum rtx_code code, enum machine_mode mode, op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, dst, src)); clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); - + emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob))); } @@ -21852,7 +21960,7 @@ ix86_expand_vec_perm (rtx operands[]) if (TARGET_XOP) { - /* The XOP VPPERM insn supports three inputs. By ignoring the + /* The XOP VPPERM insn supports three inputs. By ignoring the one_operand_shuffle special case, we avoid creating another set of constant vectors in memory. */ one_operand_shuffle = false; @@ -23708,7 +23816,7 @@ expand_small_movmem_or_setmem (rtx destmem, rtx srcmem, DONE_LABEL is a label after the whole copying sequence. The label is created on demand if *DONE_LABEL is NULL. MIN_SIZE is minimal size of block copied. This value gets adjusted for new - bounds after the initial copies. + bounds after the initial copies. DESTMEM/SRCMEM are memory expressions pointing to the copies block, DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether @@ -24013,7 +24121,7 @@ expand_set_or_movmem_constant_prologue (rtx dst, rtx *srcp, rtx destreg, return dst; } -/* Return true if ALG can be used in current context. +/* Return true if ALG can be used in current context. Assume we expand memset if MEMSET is true. */ static bool alg_usable_p (enum stringop_alg alg, bool memset) @@ -24136,7 +24244,8 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, *noalign = alg_noalign; return alg; } - break; + else if (!any_alg_usable_p) + break; } else if (alg_usable_p (candidate, memset)) { @@ -24174,9 +24283,10 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, alg = decide_alg (count, max / 2, min_size, max_size, memset, zero_memset, dynamic_check, noalign); gcc_assert (*dynamic_check == -1); - gcc_assert (alg != libcall); if (TARGET_INLINE_STRINGOPS_DYNAMICALLY) *dynamic_check = max; + else + gcc_assert (alg != libcall); return alg; } return (alg_usable_p (algs->unknown_size, memset) @@ -24336,7 +24446,7 @@ promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, with specified algorithm. 4) Epilogue: code copying tail of the block that is too small to be - handled by main body (or up to size guarded by prologue guard). + handled by main body (or up to size guarded by prologue guard). Misaligned move sequence @@ -24531,7 +24641,7 @@ ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp, /* Do the cheap promotion to allow better CSE across the main loop and epilogue (ie one load of the big constant in the - front of all code. + front of all code. For now the misaligned move sequences do not have fast path without broadcasting. */ if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used))) @@ -25103,13 +25213,19 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1, } else { - /* Static functions and indirect calls don't need the pic register. */ + /* Static functions and indirect calls don't need the pic register. Also, + check if PLT was explicitly avoided via no-plt or "noplt" attribute, making + it an indirect call. */ if (flag_pic && (!TARGET_64BIT || (ix86_cmodel == CM_LARGE_PIC && DEFAULT_ABI != MS_ABI)) && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF - && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0))) + && !SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)) + && flag_plt + && (SYMBOL_REF_DECL ((XEXP (fnaddr, 0))) == NULL_TREE + || !lookup_attribute ("noplt", + DECL_ATTRIBUTES (SYMBOL_REF_DECL (XEXP (fnaddr, 0)))))) use_reg (&use, pic_offset_table_rtx); } @@ -25173,6 +25289,31 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1, return call; } +/* Return true if the function being called was marked with attribute "noplt" + or using -fno-plt and we are compiling for non-PIC and x86_64. We need to + handle the non-PIC case in the backend because there is no easy interface + for the front-end to force non-PLT calls to use the GOT. This is currently + used only with 64-bit ELF targets to call the function marked "noplt" + indirectly. */ + +static bool +ix86_nopic_noplt_attribute_p (rtx call_op) +{ + if (flag_pic || ix86_cmodel == CM_LARGE + || !TARGET_64BIT || TARGET_MACHO || TARGET_SEH || TARGET_PECOFF + || SYMBOL_REF_LOCAL_P (call_op)) + return false; + + tree symbol_decl = SYMBOL_REF_DECL (call_op); + + if (!flag_plt + || (symbol_decl != NULL_TREE + && lookup_attribute ("noplt", DECL_ATTRIBUTES (symbol_decl)))) + return true; + + return false; +} + /* Output the assembly for a call instruction. */ const char * @@ -25184,7 +25325,9 @@ ix86_output_call_insn (rtx insn, rtx call_op) if (SIBLING_CALL_P (insn)) { - if (direct_p) + if (direct_p && ix86_nopic_noplt_attribute_p (call_op)) + xasm = "jmp\t*%p0@GOTPCREL(%%rip)"; + else if (direct_p) xasm = "jmp\t%P0"; /* SEH epilogue detection requires the indirect branch case to include REX.W. */ @@ -25236,7 +25379,9 @@ ix86_output_call_insn (rtx insn, rtx call_op) seh_nop_p = true; } - if (direct_p) + if (direct_p && ix86_nopic_noplt_attribute_p (call_op)) + xasm = "call\t*%p0@GOTPCREL(%%rip)"; + else if (direct_p) xasm = "call\t%P0"; else xasm = "call\t%A0"; @@ -26506,7 +26651,7 @@ ix86_dependencies_evaluation_hook (rtx head, rtx tail) using topological ordering in the region. */ if (rgn == CONTAINING_RGN (e->src->index) && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index)) - add_dependee_for_func_arg (first_arg, e->src); + add_dependee_for_func_arg (first_arg, e->src); } } insn = first_arg; @@ -26974,7 +27119,7 @@ ix86_local_alignment (tree exp, enum machine_mode mode, other unit can not rely on the alignment. Exclude va_list type. It is the common case of local array where - we can not benefit from the alignment. + we can not benefit from the alignment. TODO: Probably one should optimize for size only when var is not escaping. */ if (TARGET_64BIT && optimize_function_for_speed_p (cfun) @@ -31443,7 +31588,7 @@ add_condition_to_bb (tree function_decl, tree version_decl, convert_expr = build1 (CONVERT_EXPR, ptr_type_node, build_fold_addr_expr (version_decl)); result_var = create_tmp_var (ptr_type_node, NULL); - convert_stmt = gimple_build_assign (result_var, convert_expr); + convert_stmt = gimple_build_assign (result_var, convert_expr); return_stmt = gimple_build_return (result_var); if (predicate_chain == NULL_TREE) @@ -31470,7 +31615,7 @@ add_condition_to_bb (tree function_decl, tree version_decl, gimple_seq_add_stmt (&gseq, call_cond_stmt); predicate_chain = TREE_CHAIN (predicate_chain); - + if (and_expr_var == NULL) and_expr_var = cond_var; else @@ -31511,7 +31656,7 @@ add_condition_to_bb (tree function_decl, tree version_decl, gimple_set_bb (return_stmt, bb2); bb3 = e23->dest; - make_edge (bb1, bb3, EDGE_FALSE_VALUE); + make_edge (bb1, bb3, EDGE_FALSE_VALUE); remove_edge (e23); make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0); @@ -31563,7 +31708,7 @@ get_builtin_code_for_version (tree decl, tree *predicate_list) P_FMA4, P_XOP, P_PROC_XOP, - P_FMA, + P_FMA, P_PROC_FMA, P_AVX2, P_PROC_AVX2 @@ -31628,11 +31773,11 @@ get_builtin_code_for_version (tree decl, tree *predicate_list) cl_target_option_save (&cur_target, &global_options); target_node = ix86_valid_target_attribute_tree (attrs, &global_options, &global_options_set); - + gcc_assert (target_node); new_target = TREE_TARGET_OPTION (target_node); gcc_assert (new_target); - + if (new_target->arch_specified && new_target->arch > 0) { switch (new_target->arch) @@ -31701,18 +31846,18 @@ get_builtin_code_for_version (tree decl, tree *predicate_list) arg_str = "bdver4"; priority = P_PROC_AVX2; break; - } - } - + } + } + cl_target_option_restore (&global_options, &cur_target); - + if (predicate_list && arg_str == NULL) { error_at (DECL_SOURCE_LOCATION (decl), "No dispatcher found for the versioning attributes"); return 0; } - + if (predicate_list) { predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS]; @@ -31779,7 +31924,7 @@ get_builtin_code_for_version (tree decl, tree *predicate_list) *predicate_list = predicate_chain; } - return priority; + return priority; } /* This compares the priority of target features in function DECL1 @@ -31798,7 +31943,7 @@ ix86_compare_version_priority (tree decl1, tree decl2) /* V1 and V2 point to function versions with different priorities based on the target ISA. This function compares their priorities. */ - + static int feature_compare (const void *v1, const void *v2) { @@ -32111,12 +32256,12 @@ ix86_function_versions (tree fn1, tree fn2) result = true; XDELETEVEC (target1); - XDELETEVEC (target2); - + XDELETEVEC (target2); + return result; } -static tree +static tree ix86_mangle_decl_assembler_name (tree decl, tree id) { /* For function version, add the target suffix to the assembler name. */ @@ -32186,7 +32331,7 @@ make_dispatcher_decl (const tree decl) fn_type = TREE_TYPE (decl); func_type = build_function_type (TREE_TYPE (fn_type), TYPE_ARG_TYPES (fn_type)); - + func_decl = build_fn_decl (func_name, func_type); XDELETEVEC (func_name); TREE_USED (func_decl) = 1; @@ -32199,7 +32344,7 @@ make_dispatcher_decl (const tree decl) /* This will be of type IFUNCs have to be externally visible. */ TREE_PUBLIC (func_decl) = 1; - return func_decl; + return func_decl; } #endif @@ -32236,7 +32381,7 @@ ix86_get_function_versions_dispatcher (void *decl) tree dispatch_decl = NULL; struct cgraph_function_version_info *default_version_info = NULL; - + gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn)); node = cgraph_get_node (fn); @@ -32244,7 +32389,7 @@ ix86_get_function_versions_dispatcher (void *decl) node_v = get_cgraph_node_version (node); gcc_assert (node_v != NULL); - + if (node_v->dispatcher_resolver != NULL) return node_v->dispatcher_resolver; @@ -32409,7 +32554,7 @@ make_resolver_func (const tree default_decl, gcc_assert (dispatch_decl != NULL); /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */ - DECL_ATTRIBUTES (dispatch_decl) + DECL_ATTRIBUTES (dispatch_decl) = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl)); /* Create the alias for dispatch to resolver here. */ @@ -32424,7 +32569,7 @@ make_resolver_func (const tree default_decl, provide the code to dispatch the right function at run-time. NODE points to the dispatcher decl whose body will be created. */ -static tree +static tree ix86_generate_version_dispatcher_body (void *node_p) { tree resolver_decl; @@ -32476,7 +32621,7 @@ ix86_generate_version_dispatcher_body (void *node_p) } dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb); - rebuild_cgraph_edges (); + rebuild_cgraph_edges (); pop_cfun (); return resolver_decl; } @@ -32587,7 +32732,7 @@ fold_builtin_cpu (tree fndecl, tree *args) M_AMDFAM15H, M_INTEL_SILVERMONT, M_AMD_BTVER1, - M_AMD_BTVER2, + M_AMD_BTVER2, M_CPU_SUBTYPE_START, M_INTEL_COREI7_NEHALEM, M_INTEL_COREI7_WESTMERE, @@ -32627,13 +32772,13 @@ fold_builtin_cpu (tree fndecl, tree *args) {"barcelona", M_AMDFAM10H_BARCELONA}, {"shanghai", M_AMDFAM10H_SHANGHAI}, {"istanbul", M_AMDFAM10H_ISTANBUL}, - {"btver1", M_AMD_BTVER1}, + {"btver1", M_AMD_BTVER1}, {"amdfam15h", M_AMDFAM15H}, {"bdver1", M_AMDFAM15H_BDVER1}, {"bdver2", M_AMDFAM15H_BDVER2}, {"bdver3", M_AMDFAM15H_BDVER3}, {"bdver4", M_AMDFAM15H_BDVER4}, - {"btver2", M_AMD_BTVER2}, + {"btver2", M_AMD_BTVER2}, }; static struct _isa_names_table @@ -35238,9 +35383,9 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget, { /* Make it call __cpu_indicator_init in libgcc. */ tree call_expr, fndecl, type; - type = build_function_type_list (integer_type_node, NULL_TREE); + type = build_function_type_list (integer_type_node, NULL_TREE); fndecl = build_fn_decl ("__cpu_indicator_init", type); - call_expr = build_call_expr (fndecl, 0); + call_expr = build_call_expr (fndecl, 0); return expand_expr (call_expr, target, mode, EXPAND_NORMAL); } case IX86_BUILTIN_CPU_IS: @@ -41332,8 +41477,8 @@ ix86_encode_section_info (tree decl, rtx rtl, int first) { default_encode_section_info (decl, rtl, first); - if (TREE_CODE (decl) == VAR_DECL - && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)) + if (((TREE_CODE (decl) == VAR_DECL && is_global_var (decl)) + || TREE_CODE(decl) == STRING_CST) && ix86_in_large_data_p (decl)) SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR; } @@ -42957,8 +43102,8 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d) op0 = gen_lowpart (V4DImode, d->op0); op1 = gen_lowpart (V4DImode, d->op1); rperm[0] - = GEN_INT (((d->perm[0] & (nelt / 2)) ? 1 : 0) - || ((d->perm[nelt / 2] & (nelt / 2)) ? 2 : 0)); + = GEN_INT ((d->perm[0] / (nelt / 2)) + | ((d->perm[nelt / 2] / (nelt / 2)) * 16)); emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0])); if (target != d->target) emit_move_insn (d->target, gen_lowpart (d->vmode, target)); @@ -47277,6 +47422,9 @@ adjacent_mem_locations (rtx mem1, rtx mem2) #undef TARGET_PROFILE_BEFORE_PROLOGUE #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue +#undef TARGET_SET_FP_INSN +#define TARGET_SET_FP_INSN ix86_set_fp_insn + #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name @@ -47562,6 +47710,9 @@ adjacent_mem_locations (rtx mem1, rtx mem2) #undef TARGET_FRAME_POINTER_REQUIRED #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required +#undef TARGET_CAN_OMIT_LEAF_FRAME_POINTER +#define TARGET_CAN_OMIT_LEAF_FRAME_POINTER ix86_can_omit_leaf_frame_pointer + #undef TARGET_CAN_ELIMINATE #define TARGET_CAN_ELIMINATE ix86_can_eliminate @@ -47601,6 +47752,8 @@ adjacent_mem_locations (rtx mem1, rtx mem2) #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \ ix86_float_exceptions_rounding_supported_p +#undef TARGET_STRICT_ALIGN +#define TARGET_STRICT_ALIGN true struct gcc_target targetm = TARGET_INITIALIZER; #include "gt-i386.h" |