aboutsummaryrefslogtreecommitdiffstats
path: root/gcc-4.9/gcc/config/i386
diff options
context:
space:
mode:
Diffstat (limited to 'gcc-4.9/gcc/config/i386')
-rw-r--r--gcc-4.9/gcc/config/i386/driver-i386.c5
-rw-r--r--gcc-4.9/gcc/config/i386/gnu-user.h6
-rw-r--r--gcc-4.9/gcc/config/i386/i386-protos.h11
-rw-r--r--gcc-4.9/gcc/config/i386/i386.c362
-rw-r--r--gcc-4.9/gcc/config/i386/i386.md97
-rw-r--r--gcc-4.9/gcc/config/i386/i386.opt20
-rw-r--r--gcc-4.9/gcc/config/i386/linux.h20
-rw-r--r--gcc-4.9/gcc/config/i386/linux64.h19
-rw-r--r--gcc-4.9/gcc/config/i386/sse.md64
9 files changed, 530 insertions, 74 deletions
diff --git a/gcc-4.9/gcc/config/i386/driver-i386.c b/gcc-4.9/gcc/config/i386/driver-i386.c
index 1f5a11c9c..80f6a0879 100644
--- a/gcc-4.9/gcc/config/i386/driver-i386.c
+++ b/gcc-4.9/gcc/config/i386/driver-i386.c
@@ -739,6 +739,11 @@ const char *host_detect_local_cpu (int argc, const char **argv)
/* Assume Core 2. */
cpu = "core2";
}
+ else if (has_longmode)
+ /* Perhaps some emulator? Assume x86-64, otherwise gcc
+ -march=native would be unusable for 64-bit compilations,
+ as all the CPUs below are 32-bit only. */
+ cpu = "x86-64";
else if (has_sse3)
/* It is Core Duo. */
cpu = "pentium-m";
diff --git a/gcc-4.9/gcc/config/i386/gnu-user.h b/gcc-4.9/gcc/config/i386/gnu-user.h
index d9e3fa434..21b9e9692 100644
--- a/gcc-4.9/gcc/config/i386/gnu-user.h
+++ b/gcc-4.9/gcc/config/i386/gnu-user.h
@@ -70,10 +70,12 @@ along with GCC; see the file COPYING3. If not see
"--32 %{!mno-sse2avx:%{mavx:-msse2avx}} %{msse2avx:%{!mavx:-msse2avx}} " \
LINUX_OR_ANDROID_CC ("", ANDROID_ASM_SPEC)
-#undef SUBTARGET_EXTRA_SPECS
-#define SUBTARGET_EXTRA_SPECS \
+#undef SUBTARGET_EXTRA_SPECS_STR
+#define SUBTARGET_EXTRA_SPECS_STR \
{ "link_emulation", GNU_USER_LINK_EMULATION },\
{ "dynamic_linker", GNU_USER_DYNAMIC_LINKER }
+#undef SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS SUBTARGET_EXTRA_SPECS_STR
#define GNU_USER_TARGET_LINK_SPEC "-m %(link_emulation) %{shared:-shared} \
%{!shared: \
diff --git a/gcc-4.9/gcc/config/i386/i386-protos.h b/gcc-4.9/gcc/config/i386/i386-protos.h
index 6e3297880..fc0eb53f8 100644
--- a/gcc-4.9/gcc/config/i386/i386-protos.h
+++ b/gcc-4.9/gcc/config/i386/i386-protos.h
@@ -28,6 +28,16 @@ extern bool ix86_target_stack_probe (void);
extern bool ix86_can_use_return_insn_p (void);
extern void ix86_setup_frame_addresses (void);
+/* Section names for function patch prologue and epilogue section. See
+ ix86_output_function_nops_prologue_epilogue() in i386.c for details. */
+#define FUNCTION_PATCH_PROLOGUE_SECTION "_function_patch_prologue"
+#define FUNCTION_PATCH_EPILOGUE_SECTION "_function_patch_epilogue"
+
+extern bool ix86_output_function_nops_prologue_epilogue (FILE *,
+ const char *,
+ const char *,
+ int);
+
extern HOST_WIDE_INT ix86_initial_elimination_offset (int, int);
extern void ix86_expand_prologue (void);
extern void ix86_maybe_emit_epilogue_vzeroupper (void);
@@ -312,6 +322,7 @@ extern enum attr_cpu ix86_schedule;
#endif
extern const char * ix86_output_call_insn (rtx insn, rtx call_op);
+extern bool adjacent_mem_locations (rtx mem1, rtx mem2);
#ifdef RTX_CODE
/* Target data for multipass lookahead scheduling.
diff --git a/gcc-4.9/gcc/config/i386/i386.c b/gcc-4.9/gcc/config/i386/i386.c
index d7c592f48..df504335e 100644
--- a/gcc-4.9/gcc/config/i386/i386.c
+++ b/gcc-4.9/gcc/config/i386/i386.c
@@ -78,6 +78,7 @@ along with GCC; see the file COPYING3. If not see
#include "diagnostic.h"
#include "dumpfile.h"
#include "tree-pass.h"
+#include "cfgloop.h"
#include "context.h"
#include "pass_manager.h"
#include "target-globals.h"
@@ -5017,8 +5018,11 @@ ix86_in_large_data_p (tree exp)
HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
/* If this is an incomplete type with size 0, then we can't put it
- in data because it might be too big when completed. */
- if (!size || size > ix86_section_threshold)
+ in data because it might be too big when completed. Also,
+ int_size_in_bytes returns -1 if size can vary or is larger than
+ an integer in which case also it is safer to assume that it goes in
+ large data. */
+ if (size <= 0 || size > ix86_section_threshold)
return true;
}
@@ -11730,6 +11734,246 @@ ix86_expand_epilogue (int style)
m->fs = frame_state_save;
}
+
+/* True if the current function should be patched with nops at prologue and
+ returns. */
+static bool patch_current_function_p = false;
+
+static inline bool
+has_attribute (const char* attribute_name)
+{
+ return lookup_attribute (attribute_name,
+ DECL_ATTRIBUTES (current_function_decl)) != NULL;
+}
+
+/* Return true if we patch the current function. By default a function
+ is patched if it has loops or if the number of insns is greater than
+ patch_functions_min_instructions (number of insns roughly translates
+ to number of instructions). */
+
+static bool
+check_should_patch_current_function (void)
+{
+ int num_insns = 0;
+ rtx insn;
+ const char *func_name = NULL;
+ struct loops *loops;
+ int num_loops = 0;
+ int min_functions_instructions;
+
+ /* If a function has an attribute forcing patching on or off, do as it
+ indicates. */
+ if (has_attribute ("always_patch_for_instrumentation"))
+ return true;
+ else if (has_attribute ("never_patch_for_instrumentation"))
+ return false;
+
+ /* Patch the function if it has at least a loop. */
+ if (!patch_functions_ignore_loops)
+ {
+ if (DECL_STRUCT_FUNCTION (current_function_decl)->cfg)
+ {
+ loops = flow_loops_find (NULL);
+ num_loops = loops->larray->length();
+ /* FIXME - Deallocating the loop causes a seg-fault. */
+#if 0
+ flow_loops_free (loops);
+#endif
+ /* We are not concerned with the function body as a loop. */
+ if (num_loops > 1)
+ return true;
+ }
+ }
+
+ /* Else, check if function has more than patch_functions_min_instrctions. */
+
+ /* Borrowed this code from rest_of_handle_final() in final.c. */
+ func_name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
+ if (!patch_functions_dont_always_patch_main &&
+ func_name &&
+ strcmp("main", func_name) == 0)
+ return true;
+
+ min_functions_instructions =
+ PARAM_VALUE (PARAM_FUNCTION_PATCH_MIN_INSTRUCTIONS);
+ if (min_functions_instructions > 0)
+ {
+ /* Calculate the number of instructions in this function and only emit
+ function patch for instrumentation if it is greater than
+ patch_functions_min_instructions. */
+ for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+ {
+ if (NONDEBUG_INSN_P (insn))
+ ++num_insns;
+ }
+ if (num_insns < min_functions_instructions)
+ return false;
+ }
+
+ return true;
+}
+
+/* Emit the 11-byte patch space for the function prologue for functions that
+ qualify. */
+
+static void
+ix86_output_function_prologue (FILE *file,
+ HOST_WIDE_INT size ATTRIBUTE_UNUSED)
+{
+ /* Only for 64-bit target. */
+ if (TARGET_64BIT && patch_functions_for_instrumentation)
+ {
+ patch_current_function_p = check_should_patch_current_function();
+ /* Emit the instruction 'jmp 09' followed by 9 bytes to make it 11-bytes
+ of nop. */
+ ix86_output_function_nops_prologue_epilogue (
+ file,
+ FUNCTION_PATCH_PROLOGUE_SECTION,
+ ASM_BYTE"0xeb,0x09",
+ 9);
+ }
+}
+
+/* Emit the nop bytes at function prologue or return (including tail call
+ jumps). The number of nop bytes generated is at least 8.
+ Also emits a section named SECTION_NAME, which is a backpointer section
+ holding the addresses of the nop bytes in the text section.
+ SECTION_NAME is either '_function_patch_prologue' or
+ '_function_patch_epilogue'. The backpointer section can be used to navigate
+ through all the function entry and exit points which are patched with nops.
+ PRE_INSTRUCTIONS are the instructions, if any, at the start of the nop byte
+ sequence. NUM_REMAINING_NOPS are the number of nop bytes to fill,
+ excluding the number of bytes in PRE_INSTRUCTIONS.
+ Returns true if the function was patched, false otherwise. */
+
+bool
+ix86_output_function_nops_prologue_epilogue (FILE *file,
+ const char *section_name,
+ const char *pre_instructions,
+ int num_remaining_nops)
+{
+ static int labelno = 0;
+ char label[32], section_label[32];
+ section *section = NULL;
+ int num_actual_nops = num_remaining_nops - sizeof(void *);
+ unsigned int section_flags = SECTION_RELRO;
+ char *section_name_comdat = NULL;
+ const char *decl_section_name = NULL;
+ const char *func_name = NULL;
+ char *section_name_function_sections = NULL;
+ size_t len;
+
+ gcc_assert (num_remaining_nops >= 0);
+
+ if (!patch_current_function_p)
+ return false;
+
+ ASM_GENERATE_INTERNAL_LABEL (label, "LFPEL", labelno);
+ ASM_GENERATE_INTERNAL_LABEL (section_label, "LFPESL", labelno++);
+
+ /* Align the start of nops to 2-byte boundary so that the 2-byte jump
+ instruction can be patched atomically at run time. */
+ ASM_OUTPUT_ALIGN (file, 1);
+
+ /* Emit nop bytes. They look like the following:
+ $LFPEL0:
+ <pre_instruction>
+ 0x90 (repeated num_actual_nops times)
+ .quad $LFPESL0 - .
+ followed by section 'section_name' which contains the address
+ of instruction at 'label'.
+ */
+ ASM_OUTPUT_INTERNAL_LABEL (file, label);
+ if (pre_instructions)
+ fprintf (file, "%s\n", pre_instructions);
+
+ while (num_actual_nops-- > 0)
+ asm_fprintf (file, ASM_BYTE"0x90\n");
+
+ fprintf (file, ASM_QUAD);
+ /* Output "section_label - ." for the relative address of the entry in
+ the section 'section_name'. */
+ assemble_name_raw (file, section_label);
+ fprintf (file, " - .");
+ fprintf (file, "\n");
+
+ /* Emit the backpointer section. For functions belonging to comdat group,
+ we emit a different section named '<section_name>.foo' where 'foo' is
+ the name of the comdat section. This section is later renamed to
+ '<section_name>' by ix86_elf_asm_named_section().
+ We emit a unique section name for the back pointer section for comdat
+ functions because otherwise the 'get_section' call may return an existing
+ non-comdat section with the same name, leading to references from
+ non-comdat section to comdat functions.
+ */
+ if (current_function_decl != NULL_TREE &&
+ DECL_ONE_ONLY (current_function_decl) &&
+ HAVE_COMDAT_GROUP)
+ {
+ decl_section_name =
+ TREE_STRING_POINTER (DECL_SECTION_NAME (current_function_decl));
+ len = strlen (decl_section_name) + strlen (section_name) + 2;
+ section_name_comdat = (char *) alloca (len);
+ sprintf (section_name_comdat, "%s.%s", section_name, decl_section_name);
+ section_name = section_name_comdat;
+ section_flags |= SECTION_LINKONCE;
+ }
+ else if (flag_function_sections)
+ {
+ func_name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
+ if (func_name)
+ {
+ len = strlen (func_name) + strlen (section_name) + 2;
+ section_name_function_sections = (char *) alloca (len);
+ sprintf (section_name_function_sections, "%s.%s", section_name,
+ func_name);
+ section_name = section_name_function_sections;
+ }
+ }
+ section = get_section (section_name, section_flags, current_function_decl);
+ switch_to_section (section);
+ /* Align the section to 8-byte boundary. */
+ ASM_OUTPUT_ALIGN (file, 3);
+
+ /* Emit address of the start of nop bytes in the section:
+ $LFPESP0:
+ .quad $LFPEL0
+ */
+ ASM_OUTPUT_INTERNAL_LABEL (file, section_label);
+ fprintf(file, ASM_QUAD);
+ assemble_name_raw (file, label);
+ fprintf (file, "\n");
+
+ /* Switching back to text section. */
+ switch_to_section (function_section (current_function_decl));
+ return true;
+}
+
+/* Strips the characters after '_function_patch_prologue' or
+ '_function_patch_epilogue' and emits the section. */
+
+static void
+ix86_elf_asm_named_section (const char *name, unsigned int flags,
+ tree decl)
+{
+ const char *section_name = name;
+ if (!flag_function_sections && HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
+ {
+ const int prologue_section_name_length =
+ sizeof(FUNCTION_PATCH_PROLOGUE_SECTION) - 1;
+ const int epilogue_section_name_length =
+ sizeof(FUNCTION_PATCH_EPILOGUE_SECTION) - 1;
+
+ if (strncmp (name, FUNCTION_PATCH_PROLOGUE_SECTION,
+ prologue_section_name_length) == 0)
+ section_name = FUNCTION_PATCH_PROLOGUE_SECTION;
+ else if (strncmp (name, FUNCTION_PATCH_EPILOGUE_SECTION,
+ epilogue_section_name_length) == 0)
+ section_name = FUNCTION_PATCH_EPILOGUE_SECTION;
+ }
+ default_elf_asm_named_section (section_name, flags, decl);
+}
+
/* Reset from the function's potential modifications. */
static void
@@ -12659,7 +12903,9 @@ legitimate_pic_address_disp_p (rtx disp)
return true;
}
else if (!SYMBOL_REF_FAR_ADDR_P (op0)
- && SYMBOL_REF_LOCAL_P (op0)
+ && (SYMBOL_REF_LOCAL_P (op0)
+ || (TARGET_64BIT && ix86_pie_copyrelocs && flag_pie
+ && !SYMBOL_REF_FUNCTION_P (op0)))
&& ix86_cmodel != CM_LARGE_PIC)
return true;
break;
@@ -21507,7 +21753,7 @@ ix86_expand_vec_perm (rtx operands[])
t1 = gen_reg_rtx (V32QImode);
t2 = gen_reg_rtx (V32QImode);
t3 = gen_reg_rtx (V32QImode);
- vt2 = GEN_INT (128);
+ vt2 = GEN_INT (-128);
for (i = 0; i < 32; i++)
vec[i] = vt2;
vt = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
@@ -23794,7 +24040,7 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
{
const struct stringop_algs * algs;
bool optimize_for_speed;
- int max = -1;
+ int max = 0;
const struct processor_costs *cost;
int i;
bool any_alg_usable_p = false;
@@ -23832,7 +24078,7 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
/* If expected size is not known but max size is small enough
so inline version is a win, set expected size into
the range. */
- if (max > 1 && (unsigned HOST_WIDE_INT) max >= max_size
+ if (((max > 1 && (unsigned HOST_WIDE_INT) max >= max_size) || max == -1)
&& expected_size == -1)
expected_size = min_size / 2 + max_size / 2;
@@ -23921,7 +24167,7 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
*dynamic_check = 128;
return loop_1_byte;
}
- if (max == -1)
+ if (max <= 0)
max = 4096;
alg = decide_alg (count, max / 2, min_size, max_size, memset,
zero_memset, dynamic_check, noalign);
@@ -24945,6 +25191,15 @@ ix86_output_call_insn (rtx insn, rtx call_op)
else
xasm = "jmp\t%A0";
+ /* Just before the sibling call, add 11-bytes of nops to patch function
+ exit: 2 bytes for 'jmp 09' and remaining 9 bytes. */
+ if (TARGET_64BIT && patch_functions_for_instrumentation)
+ ix86_output_function_nops_prologue_epilogue (
+ asm_out_file,
+ FUNCTION_PATCH_EPILOGUE_SECTION,
+ ASM_BYTE"0xeb, 0x09",
+ 9);
+
output_asm_insn (xasm, &call_op);
return "";
}
@@ -26238,13 +26493,17 @@ ix86_dependencies_evaluation_hook (rtx head, rtx tail)
{
edge e;
edge_iterator ei;
- /* Assume that region is SCC, i.e. all immediate predecessors
- of non-head block are in the same region. */
+
+ /* Regions are SCCs with the exception of selective
+ scheduling with pipelining of outer blocks enabled.
+ So also check that immediate predecessors of a non-head
+ block are in the same region. */
FOR_EACH_EDGE (e, ei, bb->preds)
{
/* Avoid creating of loop-carried dependencies through
- using topological odering in region. */
- if (BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
+ using topological ordering in the region. */
+ if (rgn == CONTAINING_RGN (e->src->index)
+ && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
add_dependee_for_func_arg (first_arg, e->src);
}
}
@@ -28789,7 +29048,8 @@ def_builtin (HOST_WIDE_INT mask, const char *name,
ix86_builtins_isa[(int) code].isa = mask;
mask &= ~OPTION_MASK_ISA_64BIT;
- if (mask == 0
+ if (flag_dyn_ipa
+ || mask == 0
|| (mask & ix86_isa_flags) != 0
|| (lang_hooks.builtin_function
== lang_hooks.builtin_function_ext_scope))
@@ -37802,10 +38062,10 @@ ix86_rtx_costs (rtx x, int code_i, int outer_code_i, int opno, int *total,
else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
*total = 2;
else if (flag_pic && SYMBOLIC_CONST (x)
- && (!TARGET_64BIT
- || (!GET_CODE (x) != LABEL_REF
- && (GET_CODE (x) != SYMBOL_REF
- || !SYMBOL_REF_LOCAL_P (x)))))
+ && !(TARGET_64BIT
+ && (GET_CODE (x) == LABEL_REF
+ || (GET_CODE (x) == SYMBOL_REF
+ && SYMBOL_REF_LOCAL_P (x)))))
*total = 1;
else
*total = 0;
@@ -46745,6 +47005,70 @@ ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
atomic_feraiseexcept_call);
}
+/* Try to determine BASE/OFFSET/SIZE parts of the given MEM.
+ Return true if successful, false if all the values couldn't
+ be determined.
+
+ This function only looks for REG/SYMBOL or REG/SYMBOL+CONST
+ address forms. */
+
+static bool
+get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset,
+ HOST_WIDE_INT *size)
+{
+ rtx addr_rtx;
+ if MEM_SIZE_KNOWN_P (mem)
+ *size = MEM_SIZE (mem);
+ else
+ return false;
+
+ if (GET_CODE (XEXP (mem, 0)) == CONST)
+ addr_rtx = XEXP (XEXP (mem, 0), 0);
+ else
+ addr_rtx = (XEXP (mem, 0));
+
+ if (GET_CODE (addr_rtx) == REG
+ || GET_CODE (addr_rtx) == SYMBOL_REF)
+ {
+ *base = addr_rtx;
+ *offset = 0;
+ }
+ else if (GET_CODE (addr_rtx) == PLUS
+ && CONST_INT_P (XEXP (addr_rtx, 1)))
+ {
+ *base = XEXP (addr_rtx, 0);
+ *offset = INTVAL (XEXP (addr_rtx, 1));
+ }
+ else
+ return false;
+
+ return true;
+}
+
+/* If MEM1 is adjacent to MEM2 and MEM1 has lower address,
+ return true. */
+
+extern bool
+adjacent_mem_locations (rtx mem1, rtx mem2)
+{
+ rtx base1, base2;
+ HOST_WIDE_INT off1, size1, off2, size2;
+
+ if (get_memref_parts (mem1, &base1, &off1, &size1)
+ && get_memref_parts (mem2, &base2, &off2, &size2))
+ {
+ if (GET_CODE (base1) == SYMBOL_REF
+ && GET_CODE (base2) == SYMBOL_REF
+ && SYMBOL_REF_DECL (base1) == SYMBOL_REF_DECL (base2))
+ return (off1 + size1 == off2);
+ else if (REG_P (base1)
+ && REG_P (base2)
+ && REGNO (base1) == REGNO (base2))
+ return (off1 + size1 == off2);
+ }
+ return false;
+}
+
/* Initialize the GCC target structure. */
#undef TARGET_RETURN_IN_MEMORY
#define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
@@ -46787,9 +47111,15 @@ ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
#undef TARGET_BUILTIN_RECIPROCAL
#define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
+#undef TARGET_ASM_FUNCTION_PROLOGUE
+#define TARGET_ASM_FUNCTION_PROLOGUE ix86_output_function_prologue
+
#undef TARGET_ASM_FUNCTION_EPILOGUE
#define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
+#undef TARGET_ASM_NAMED_SECTION
+#define TARGET_ASM_NAMED_SECTION ix86_elf_asm_named_section
+
#undef TARGET_ENCODE_SECTION_INFO
#ifndef SUBTARGET_ENCODE_SECTION_INFO
#define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
diff --git a/gcc-4.9/gcc/config/i386/i386.md b/gcc-4.9/gcc/config/i386/i386.md
index 9f103cf30..058702904 100644
--- a/gcc-4.9/gcc/config/i386/i386.md
+++ b/gcc-4.9/gcc/config/i386/i386.md
@@ -3201,7 +3201,7 @@
(const_string "1")
(const_string "*")))
(set (attr "mode")
- (cond [(eq_attr "alternative" "3,4,9,10,13,14,15")
+ (cond [(eq_attr "alternative" "3,4,9,10,12,13,14,15")
(const_string "SI")
(eq_attr "alternative" "11")
(const_string "DI")
@@ -4933,66 +4933,37 @@
;; Avoid store forwarding (partial memory) stall penalty by extending
;; SImode value to DImode through XMM register instead of pushing two
-;; SImode values to stack. Note that even !TARGET_INTER_UNIT_MOVES_TO_VEC
-;; targets benefit from this optimization. Also note that fild
-;; loads from memory only.
+;; SImode values to stack. Also note that fild loads from memory only.
-(define_insn "*floatunssi<mode>2_1"
- [(set (match_operand:X87MODEF 0 "register_operand" "=f,f")
+(define_insn_and_split "*floatunssi<mode>2_i387_with_xmm"
+ [(set (match_operand:X87MODEF 0 "register_operand" "=f")
(unsigned_float:X87MODEF
- (match_operand:SI 1 "nonimmediate_operand" "x,m")))
- (clobber (match_operand:DI 2 "memory_operand" "=m,m"))
- (clobber (match_scratch:SI 3 "=X,x"))]
+ (match_operand:SI 1 "nonimmediate_operand" "rm")))
+ (clobber (match_scratch:DI 3 "=x"))
+ (clobber (match_operand:DI 2 "memory_operand" "=m"))]
"!TARGET_64BIT
&& TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, DImode)
- && TARGET_SSE"
+ && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC"
"#"
- [(set_attr "type" "multi")
- (set_attr "mode" "<MODE>")])
-
-(define_split
- [(set (match_operand:X87MODEF 0 "register_operand")
- (unsigned_float:X87MODEF
- (match_operand:SI 1 "register_operand")))
- (clobber (match_operand:DI 2 "memory_operand"))
- (clobber (match_scratch:SI 3))]
- "!TARGET_64BIT
- && TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, DImode)
- && TARGET_SSE
- && reload_completed"
- [(set (match_dup 2) (match_dup 1))
- (set (match_dup 0)
- (float:X87MODEF (match_dup 2)))]
- "operands[1] = simplify_gen_subreg (DImode, operands[1], SImode, 0);")
-
-(define_split
- [(set (match_operand:X87MODEF 0 "register_operand")
- (unsigned_float:X87MODEF
- (match_operand:SI 1 "memory_operand")))
- (clobber (match_operand:DI 2 "memory_operand"))
- (clobber (match_scratch:SI 3))]
- "!TARGET_64BIT
- && TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, DImode)
- && TARGET_SSE
- && reload_completed"
- [(set (match_dup 2) (match_dup 3))
+ "&& reload_completed"
+ [(set (match_dup 3) (zero_extend:DI (match_dup 1)))
+ (set (match_dup 2) (match_dup 3))
(set (match_dup 0)
(float:X87MODEF (match_dup 2)))]
-{
- emit_move_insn (operands[3], operands[1]);
- operands[3] = simplify_gen_subreg (DImode, operands[3], SImode, 0);
-})
+ ""
+ [(set_attr "type" "multi")
+ (set_attr "mode" "<MODE>")])
(define_expand "floatunssi<mode>2"
[(parallel
[(set (match_operand:X87MODEF 0 "register_operand")
(unsigned_float:X87MODEF
(match_operand:SI 1 "nonimmediate_operand")))
- (clobber (match_dup 2))
- (clobber (match_scratch:SI 3))])]
+ (clobber (match_scratch:DI 3))
+ (clobber (match_dup 2))])]
"!TARGET_64BIT
&& ((TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, DImode)
- && TARGET_SSE)
+ && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC)
|| (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH))"
{
if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
@@ -9627,7 +9598,7 @@
(define_insn "x86_64_shrd"
[(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
- (ior:DI (ashiftrt:DI (match_dup 0)
+ (ior:DI (lshiftrt:DI (match_dup 0)
(match_operand:QI 2 "nonmemory_operand" "Jc"))
(ashift:DI (match_operand:DI 1 "register_operand" "r")
(minus:QI (const_int 64) (match_dup 2)))))
@@ -9643,7 +9614,7 @@
(define_insn "x86_shrd"
[(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
- (ior:SI (ashiftrt:SI (match_dup 0)
+ (ior:SI (lshiftrt:SI (match_dup 0)
(match_operand:QI 2 "nonmemory_operand" "Ic"))
(ashift:SI (match_operand:SI 1 "register_operand" "r")
(minus:QI (const_int 32) (match_dup 2)))))
@@ -10095,13 +10066,13 @@
[(set (match_dup 3) (match_dup 4))
(parallel
[(set (match_dup 4)
- (ior:DWIH (ashiftrt:DWIH (match_dup 4) (match_dup 2))
+ (ior:DWIH (lshiftrt:DWIH (match_dup 4) (match_dup 2))
(ashift:DWIH (match_dup 5)
(minus:QI (match_dup 6) (match_dup 2)))))
(clobber (reg:CC FLAGS_REG))])
(parallel
[(set (match_dup 5)
- (ior:DWIH (ashiftrt:DWIH (match_dup 5) (match_dup 2))
+ (ior:DWIH (lshiftrt:DWIH (match_dup 5) (match_dup 2))
(ashift:DWIH (match_dup 3)
(minus:QI (match_dup 6) (match_dup 2)))))
(clobber (reg:CC FLAGS_REG))])]
@@ -11611,7 +11582,18 @@
(define_insn "simple_return_internal"
[(simple_return)]
"reload_completed"
- "ret"
+{
+ if (TARGET_64BIT && patch_functions_for_instrumentation)
+ {
+ /* Emit 10 nop bytes after ret. */
+ if (ix86_output_function_nops_prologue_epilogue (asm_out_file,
+ FUNCTION_PATCH_EPILOGUE_SECTION,
+ "\tret",
+ 10))
+ return "";
+ }
+ return "ret";
+}
[(set_attr "length" "1")
(set_attr "atom_unit" "jeu")
(set_attr "length_immediate" "0")
@@ -11624,7 +11606,18 @@
[(simple_return)
(unspec [(const_int 0)] UNSPEC_REP)]
"reload_completed"
- "rep%; ret"
+{
+ if (TARGET_64BIT && patch_functions_for_instrumentation)
+ {
+ /* Emit 9 nop bytes after rep;ret. */
+ if (ix86_output_function_nops_prologue_epilogue (asm_out_file,
+ FUNCTION_PATCH_EPILOGUE_SECTION,
+ "\trep\;ret",
+ 9))
+ return "";
+ }
+ return "rep\;ret";
+}
[(set_attr "length" "2")
(set_attr "atom_unit" "jeu")
(set_attr "length_immediate" "0")
diff --git a/gcc-4.9/gcc/config/i386/i386.opt b/gcc-4.9/gcc/config/i386/i386.opt
index 0f463a238..1e00b660e 100644
--- a/gcc-4.9/gcc/config/i386/i386.opt
+++ b/gcc-4.9/gcc/config/i386/i386.opt
@@ -108,6 +108,10 @@ int x_ix86_dump_tunes
TargetSave
int x_ix86_force_align_arg_pointer
+;; -mcopyrelocs=
+TargetSave
+int x_ix86_copyrelocs
+
;; -mforce-drap=
TargetSave
int x_ix86_force_drap
@@ -291,6 +295,10 @@ mfancy-math-387
Target RejectNegative Report InverseMask(NO_FANCY_MATH_387, USE_FANCY_MATH_387) Save
Generate sin, cos, sqrt for FPU
+mcopyrelocs
+Target Report Var(ix86_pie_copyrelocs) Init(0)
+Assume copy relocations support for pie builds.
+
mforce-drap
Target Report Var(ix86_force_drap)
Always use Dynamic Realigned Argument Pointer (DRAP) to realign stack
@@ -781,6 +789,18 @@ mrtm
Target Report Mask(ISA_RTM) Var(ix86_isa_flags) Save
Support RTM built-in functions and code generation
+mpatch-functions-for-instrumentation
+Target RejectNegative Report Var(patch_functions_for_instrumentation) Save
+Patch function prologue and epilogue with custom NOPs for dynamic instrumentation. By default, functions with loops (controlled by -mpatch-functions-without-loop) or functions having instructions more than -mpatch-functions-min-instructions are patched.
+
+mpatch-functions-ignore-loops
+Target RejectNegative Report Var(patch_functions_ignore_loops) Save
+Ignore loops when deciding whether to patch a function for instrumentation (for use with -mpatch-functions-for-instrumentation).
+
+mno-patch-functions-main-always
+Target Report RejectNegative Var(patch_functions_dont_always_patch_main) Save
+Treat 'main' as any other function and only patch it if it meets the criteria for loops and minimum number of instructions (for use with -mpatch-functions-for-instrumentation).
+
mstack-protector-guard=
Target RejectNegative Joined Enum(stack_protector_guard) Var(ix86_stack_protector_guard) Init(SSP_TLS)
Use given stack-protector guard
diff --git a/gcc-4.9/gcc/config/i386/linux.h b/gcc-4.9/gcc/config/i386/linux.h
index 1fb1e0321..27d68b5db 100644
--- a/gcc-4.9/gcc/config/i386/linux.h
+++ b/gcc-4.9/gcc/config/i386/linux.h
@@ -20,4 +20,22 @@ along with GCC; see the file COPYING3. If not see
<http://www.gnu.org/licenses/>. */
#define GNU_USER_LINK_EMULATION "elf_i386"
-#define GLIBC_DYNAMIC_LINKER "/lib/ld-linux.so.2"
+#ifndef RUNTIME_ROOT_PREFIX
+#define RUNTIME_ROOT_PREFIX ""
+#endif
+#define GLIBC_DYNAMIC_LINKER RUNTIME_ROOT_PREFIX "/lib/ld-linux.so.2"
+
+/* These may be provided by config/linux-grtev*.h. */
+#ifndef LINUX_GRTE_EXTRA_SPECS
+#define LINUX_GRTE_EXTRA_SPECS
+#endif
+
+#undef SUBTARGET_EXTRA_SPECS
+#ifndef SUBTARGET_EXTRA_SPECS_STR
+#define SUBTARGET_EXTRA_SPECS \
+ LINUX_GRTE_EXTRA_SPECS
+#else
+#define SUBTARGET_EXTRA_SPECS \
+ LINUX_GRTE_EXTRA_SPECS \
+ SUBTARGET_EXTRA_SPECS_STR
+#endif
diff --git a/gcc-4.9/gcc/config/i386/linux64.h b/gcc-4.9/gcc/config/i386/linux64.h
index a90171e8c..5124a341b 100644
--- a/gcc-4.9/gcc/config/i386/linux64.h
+++ b/gcc-4.9/gcc/config/i386/linux64.h
@@ -27,6 +27,19 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
#define GNU_USER_LINK_EMULATION64 "elf_x86_64"
#define GNU_USER_LINK_EMULATIONX32 "elf32_x86_64"
-#define GLIBC_DYNAMIC_LINKER32 "/lib/ld-linux.so.2"
-#define GLIBC_DYNAMIC_LINKER64 "/lib64/ld-linux-x86-64.so.2"
-#define GLIBC_DYNAMIC_LINKERX32 "/libx32/ld-linux-x32.so.2"
+#ifndef RUNTIME_ROOT_PREFIX
+#define RUNTIME_ROOT_PREFIX ""
+#endif
+#define GLIBC_DYNAMIC_LINKER32 RUNTIME_ROOT_PREFIX "/lib/ld-linux.so.2"
+#define GLIBC_DYNAMIC_LINKER64 RUNTIME_ROOT_PREFIX "/lib64/ld-linux-x86-64.so.2"
+#define GLIBC_DYNAMIC_LINKERX32 RUNTIME_ROOT_PREFIX "/libx32/ld-linux-x32.so.2"
+
+/* These may be provided by config/linux-grtev*.h. */
+#ifndef LINUX_GRTE_EXTRA_SPECS
+#define LINUX_GRTE_EXTRA_SPECS
+#endif
+
+#undef SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS \
+ LINUX_GRTE_EXTRA_SPECS
+
diff --git a/gcc-4.9/gcc/config/i386/sse.md b/gcc-4.9/gcc/config/i386/sse.md
index 72a4d6d07..27ade1964 100644
--- a/gcc-4.9/gcc/config/i386/sse.md
+++ b/gcc-4.9/gcc/config/i386/sse.md
@@ -8255,6 +8255,36 @@
DONE;
})
+(define_expand "usadv16qi"
+ [(match_operand:V4SI 0 "register_operand")
+ (match_operand:V16QI 1 "register_operand")
+ (match_operand:V16QI 2 "nonimmediate_operand")
+ (match_operand:V4SI 3 "nonimmediate_operand")]
+ "TARGET_SSE2"
+{
+ rtx t1 = gen_reg_rtx (V2DImode);
+ rtx t2 = gen_reg_rtx (V4SImode);
+ emit_insn (gen_sse2_psadbw (t1, operands[1], operands[2]));
+ convert_move (t2, t1, 0);
+ emit_insn (gen_addv4si3 (operands[0], t2, operands[3]));
+ DONE;
+})
+
+(define_expand "usadv32qi"
+ [(match_operand:V8SI 0 "register_operand")
+ (match_operand:V32QI 1 "register_operand")
+ (match_operand:V32QI 2 "nonimmediate_operand")
+ (match_operand:V8SI 3 "nonimmediate_operand")]
+ "TARGET_AVX2"
+{
+ rtx t1 = gen_reg_rtx (V4DImode);
+ rtx t2 = gen_reg_rtx (V8SImode);
+ emit_insn (gen_avx2_psadbw (t1, operands[1], operands[2]));
+ convert_move (t2, t1, 0);
+ emit_insn (gen_addv8si3 (operands[0], t2, operands[3]));
+ DONE;
+})
+
(define_insn "ashr<mode>3"
[(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x")
(ashiftrt:VI24_AVX2
@@ -15606,3 +15636,37 @@
[(set_attr "type" "sselog1")
(set_attr "length_immediate" "1")
(set_attr "mode" "TI")])
+
+;; merge movsd/movhpd to movupd when TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
+;; is true.
+(define_peephole2
+ [(set (match_operand:DF 0 "register_operand")
+ (match_operand:DF 1 "memory_operand"))
+ (set (match_operand:V2DF 2 "register_operand")
+ (vec_concat:V2DF (match_dup 0)
+ (match_operand:DF 3 "memory_operand")))]
+ "TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
+ && REGNO (operands[0]) == REGNO (operands[2])
+ && adjacent_mem_locations (operands[1], operands[3])"
+ [(set (match_dup 2)
+ (unspec:V2DF [(match_dup 4)] UNSPEC_LOADU))]
+{
+ operands[4] = gen_rtx_MEM (V2DFmode, XEXP(operands[1], 0));
+})
+
+;; merge movsd/movhpd to movupd when TARGET_SSE_UNALIGNED_STORE_OPTIMAL
+;; is true.
+(define_peephole2
+ [(set (match_operand:DF 0 "memory_operand")
+ (vec_select:DF (match_operand:V2DF 1 "register_operand")
+ (parallel [(const_int 0)])))
+ (set (match_operand:DF 2 "memory_operand")
+ (vec_select:DF (match_dup 1)
+ (parallel [(const_int 1)])))]
+ "TARGET_SSE_UNALIGNED_STORE_OPTIMAL
+ && adjacent_mem_locations (operands[0], operands[2])"
+ [(set (match_dup 3)
+ (unspec:V2DF [(match_dup 1)] UNSPEC_STOREU))]
+{
+ operands[3] = gen_rtx_MEM (V2DFmode, XEXP(operands[0], 0));
+})