aboutsummaryrefslogtreecommitdiffstats
path: root/gcc-4.9/gcc/config/i386
diff options
context:
space:
mode:
Diffstat (limited to 'gcc-4.9/gcc/config/i386')
-rw-r--r--gcc-4.9/gcc/config/i386/driver-i386.c5
-rw-r--r--gcc-4.9/gcc/config/i386/i386.c15
-rw-r--r--gcc-4.9/gcc/config/i386/i386.h2
-rw-r--r--gcc-4.9/gcc/config/i386/i386.md175
-rw-r--r--gcc-4.9/gcc/config/i386/sse.md4
-rw-r--r--gcc-4.9/gcc/config/i386/x86-tune.def5
6 files changed, 159 insertions, 47 deletions
diff --git a/gcc-4.9/gcc/config/i386/driver-i386.c b/gcc-4.9/gcc/config/i386/driver-i386.c
index 80f6a0879..722c54692 100644
--- a/gcc-4.9/gcc/config/i386/driver-i386.c
+++ b/gcc-4.9/gcc/config/i386/driver-i386.c
@@ -431,7 +431,8 @@ const char *host_detect_local_cpu (int argc, const char **argv)
model = (eax >> 4) & 0x0f;
family = (eax >> 8) & 0x0f;
- if (vendor == signature_INTEL_ebx)
+ if (vendor == signature_INTEL_ebx
+ || vendor == signature_AMD_ebx)
{
unsigned int extended_model, extended_family;
@@ -570,7 +571,7 @@ const char *host_detect_local_cpu (int argc, const char **argv)
if (name == signature_NSC_ebx)
processor = PROCESSOR_GEODE;
- else if (has_movbe)
+ else if (has_movbe && family == 22)
processor = PROCESSOR_BTVER2;
else if (has_avx2)
processor = PROCESSOR_BDVER4;
diff --git a/gcc-4.9/gcc/config/i386/i386.c b/gcc-4.9/gcc/config/i386/i386.c
index 8a1fbde2c..fcd5f0dd1 100644
--- a/gcc-4.9/gcc/config/i386/i386.c
+++ b/gcc-4.9/gcc/config/i386/i386.c
@@ -3260,12 +3260,13 @@ ix86_option_override_internal (bool main_args_p,
| PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
| PTA_XSAVEOPT | PTA_FSGSBASE},
{"bdver4", PROCESSOR_BDVER4, CPU_BDVER4,
- PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
- | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
- | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
+ PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
+ | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
+ | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
| PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2
| PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR
- | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE},
+ | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND
+ | PTA_MOVBE},
{"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
| PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW
@@ -45349,8 +45350,10 @@ ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
/* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
- /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
- emit_insn (gen_xop_pmacsdql (op0, op1, op2, t4));
+ /* Multiply lower parts and add all */
+ t5 = gen_reg_rtx (V2DImode);
+ emit_insn (gen_vec_widen_umult_even_v4si (t5, gen_lowpart (V4SImode, op1), gen_lowpart (V4SImode, op2)));
+ op0 = expand_binop (mode, add_optab, t5, t4, op0, 1, OPTAB_DIRECT);
}
else
{
diff --git a/gcc-4.9/gcc/config/i386/i386.h b/gcc-4.9/gcc/config/i386/i386.h
index fb527411a..f6b169c24 100644
--- a/gcc-4.9/gcc/config/i386/i386.h
+++ b/gcc-4.9/gcc/config/i386/i386.h
@@ -461,6 +461,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
ix86_tune_features[X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS]
#define TARGET_ADJUST_UNROLL \
ix86_tune_features[X86_TUNE_ADJUST_UNROLL]
+#define TARGET_AVOID_FALSE_DEP_FOR_BMI \
+ ix86_tune_features[X86_TUNE_AVOID_FALSE_DEP_FOR_BMI]
/* Feature tests against the various architecture variations. */
enum ix86_arch_indices {
diff --git a/gcc-4.9/gcc/config/i386/i386.md b/gcc-4.9/gcc/config/i386/i386.md
index 058702904..39d395875 100644
--- a/gcc-4.9/gcc/config/i386/i386.md
+++ b/gcc-4.9/gcc/config/i386/i386.md
@@ -111,6 +111,7 @@
UNSPEC_LEA_ADDR
UNSPEC_XBEGIN_ABORT
UNSPEC_STOS
+ UNSPEC_INSN_FALSE_DEP
;; For SSE/MMX support:
UNSPEC_FIX_NOTRUNC
@@ -11878,7 +11879,8 @@
DONE;
}
- flags_mode = TARGET_BMI ? CCCmode : CCZmode;
+ flags_mode
+ = (TARGET_BMI && !TARGET_AVOID_FALSE_DEP_FOR_BMI) ? CCCmode : CCZmode;
operands[2] = gen_reg_rtx (<MODE>mode);
operands[3] = gen_rtx_REG (flags_mode, FLAGS_REG);
@@ -11904,7 +11906,8 @@
(parallel [(set (match_dup 0) (plus:SI (match_dup 0) (const_int 1)))
(clobber (reg:CC FLAGS_REG))])]
{
- enum machine_mode flags_mode = TARGET_BMI ? CCCmode : CCZmode;
+ enum machine_mode flags_mode
+ = (TARGET_BMI && !TARGET_AVOID_FALSE_DEP_FOR_BMI) ? CCCmode : CCZmode;
operands[3] = gen_lowpart (QImode, operands[2]);
operands[4] = gen_rtx_REG (flags_mode, FLAGS_REG);
@@ -11919,7 +11922,7 @@
(const_int 0)))
(set (match_operand:SWI48 0 "register_operand" "=r")
(ctz:SWI48 (match_dup 1)))]
- "TARGET_BMI"
+ "TARGET_BMI && !TARGET_AVOID_FALSE_DEP_FOR_BMI"
"tzcnt{<imodesuffix>}\t{%1, %0|%0, %1}"
[(set_attr "type" "alu1")
(set_attr "prefix_0f" "1")
@@ -11940,7 +11943,58 @@
(set_attr "btver2_decode" "double")
(set_attr "mode" "<MODE>")])
-(define_insn "ctz<mode>2"
+(define_expand "ctz<mode>2"
+ [(parallel
+ [(set (match_operand:SWI248 0 "register_operand")
+ (ctz:SWI248
+ (match_operand:SWI248 1 "nonimmediate_operand")))
+ (clobber (reg:CC FLAGS_REG))])])
+
+; False dependency happens when destination is only updated by tzcnt,
+; lzcnt or popcnt. There is no false dependency when destination is
+; also used in source.
+(define_insn_and_split "*ctz<mode>2_falsedep_1"
+ [(set (match_operand:SWI48 0 "register_operand" "=r")
+ (ctz:SWI48
+ (match_operand:SWI48 1 "nonimmediate_operand" "rm")))
+ (clobber (reg:CC FLAGS_REG))]
+ "(TARGET_BMI || TARGET_GENERIC)
+ && TARGET_AVOID_FALSE_DEP_FOR_BMI && optimize_function_for_speed_p (cfun)"
+ "#"
+ "&& reload_completed"
+ [(parallel
+ [(set (match_dup 0)
+ (ctz:SWI48 (match_dup 1)))
+ (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)
+ (clobber (reg:CC FLAGS_REG))])]
+{
+ if (!reg_mentioned_p (operands[0], operands[1]))
+ ix86_expand_clear (operands[0]);
+})
+
+(define_insn "*ctz<mode>2_falsedep"
+ [(set (match_operand:SWI48 0 "register_operand" "=r")
+ (ctz:SWI48
+ (match_operand:SWI48 1 "nonimmediate_operand" "rm")))
+ (unspec [(match_operand:SWI48 2 "register_operand" "0")]
+ UNSPEC_INSN_FALSE_DEP)
+ (clobber (reg:CC FLAGS_REG))]
+ ""
+{
+ if (TARGET_BMI)
+ return "tzcnt{<imodesuffix>}\t{%1, %0|%0, %1}";
+ else if (TARGET_GENERIC)
+ /* tzcnt expands to 'rep bsf' and we can use it even if !TARGET_BMI. */
+ return "rep%; bsf{<imodesuffix>}\t{%1, %0|%0, %1}";
+ else
+ gcc_unreachable ();
+}
+ [(set_attr "type" "alu1")
+ (set_attr "prefix_0f" "1")
+ (set_attr "prefix_rep" "1")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*ctz<mode>2"
[(set (match_operand:SWI248 0 "register_operand" "=r")
(ctz:SWI248 (match_operand:SWI248 1 "nonimmediate_operand" "rm")))
(clobber (reg:CC FLAGS_REG))]
@@ -11987,7 +12041,47 @@
operands[2] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode)-1);
})
-(define_insn "clz<mode>2_lzcnt"
+(define_expand "clz<mode>2_lzcnt"
+ [(parallel
+ [(set (match_operand:SWI248 0 "register_operand")
+ (clz:SWI248
+ (match_operand:SWI248 1 "nonimmediate_operand")))
+ (clobber (reg:CC FLAGS_REG))])]
+ "TARGET_LZCNT")
+
+(define_insn_and_split "*clz<mode>2_lzcnt_falsedep_1"
+ [(set (match_operand:SWI48 0 "register_operand" "=r")
+ (clz:SWI48
+ (match_operand:SWI48 1 "nonimmediate_operand" "rm")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_LZCNT
+ && TARGET_AVOID_FALSE_DEP_FOR_BMI && optimize_function_for_speed_p (cfun)"
+ "#"
+ "&& reload_completed"
+ [(parallel
+ [(set (match_dup 0)
+ (clz:SWI48 (match_dup 1)))
+ (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)
+ (clobber (reg:CC FLAGS_REG))])]
+{
+ if (!reg_mentioned_p (operands[0], operands[1]))
+ ix86_expand_clear (operands[0]);
+})
+
+(define_insn "*clz<mode>2_lzcnt_falsedep"
+ [(set (match_operand:SWI48 0 "register_operand" "=r")
+ (clz:SWI48
+ (match_operand:SWI48 1 "nonimmediate_operand" "rm")))
+ (unspec [(match_operand:SWI48 2 "register_operand" "0")]
+ UNSPEC_INSN_FALSE_DEP)
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_LZCNT"
+ "lzcnt{<imodesuffix>}\t{%1, %0|%0, %1}"
+ [(set_attr "prefix_rep" "1")
+ (set_attr "type" "bitmanip")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*clz<mode>2_lzcnt"
[(set (match_operand:SWI248 0 "register_operand" "=r")
(clz:SWI248 (match_operand:SWI248 1 "nonimmediate_operand" "rm")))
(clobber (reg:CC FLAGS_REG))]
@@ -12270,10 +12364,39 @@
(set_attr "prefix_0f" "1")
(set_attr "mode" "HI")])
-(define_insn "popcount<mode>2"
- [(set (match_operand:SWI248 0 "register_operand" "=r")
- (popcount:SWI248
- (match_operand:SWI248 1 "nonimmediate_operand" "rm")))
+(define_expand "popcount<mode>2"
+ [(parallel
+ [(set (match_operand:SWI248 0 "register_operand")
+ (popcount:SWI248
+ (match_operand:SWI248 1 "nonimmediate_operand")))
+ (clobber (reg:CC FLAGS_REG))])]
+ "TARGET_POPCNT")
+
+(define_insn_and_split "*popcount<mode>2_falsedep_1"
+ [(set (match_operand:SWI48 0 "register_operand" "=r")
+ (popcount:SWI48
+ (match_operand:SWI48 1 "nonimmediate_operand" "rm")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_POPCNT
+ && TARGET_AVOID_FALSE_DEP_FOR_BMI && optimize_function_for_speed_p (cfun)"
+ "#"
+ "&& reload_completed"
+ [(parallel
+ [(set (match_dup 0)
+ (popcount:SWI48 (match_dup 1)))
+ (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)
+ (clobber (reg:CC FLAGS_REG))])]
+{
+ if (!reg_mentioned_p (operands[0], operands[1]))
+ ix86_expand_clear (operands[0]);
+})
+
+(define_insn "*popcount<mode>2_falsedep"
+ [(set (match_operand:SWI48 0 "register_operand" "=r")
+ (popcount:SWI48
+ (match_operand:SWI48 1 "nonimmediate_operand" "rm")))
+ (unspec [(match_operand:SWI48 2 "register_operand" "0")]
+ UNSPEC_INSN_FALSE_DEP)
(clobber (reg:CC FLAGS_REG))]
"TARGET_POPCNT"
{
@@ -12287,15 +12410,12 @@
(set_attr "type" "bitmanip")
(set_attr "mode" "<MODE>")])
-(define_insn "*popcount<mode>2_cmp"
- [(set (reg FLAGS_REG)
- (compare
- (popcount:SWI248
- (match_operand:SWI248 1 "nonimmediate_operand" "rm"))
- (const_int 0)))
- (set (match_operand:SWI248 0 "register_operand" "=r")
- (popcount:SWI248 (match_dup 1)))]
- "TARGET_POPCNT && ix86_match_ccmode (insn, CCZmode)"
+(define_insn "*popcount<mode>2"
+ [(set (match_operand:SWI248 0 "register_operand" "=r")
+ (popcount:SWI248
+ (match_operand:SWI248 1 "nonimmediate_operand" "rm")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_POPCNT"
{
#if TARGET_MACHO
return "popcnt\t{%1, %0|%0, %1}";
@@ -12307,25 +12427,6 @@
(set_attr "type" "bitmanip")
(set_attr "mode" "<MODE>")])
-(define_insn "*popcountsi2_cmp_zext"
- [(set (reg FLAGS_REG)
- (compare
- (popcount:SI (match_operand:SI 1 "nonimmediate_operand" "rm"))
- (const_int 0)))
- (set (match_operand:DI 0 "register_operand" "=r")
- (zero_extend:DI(popcount:SI (match_dup 1))))]
- "TARGET_64BIT && TARGET_POPCNT && ix86_match_ccmode (insn, CCZmode)"
-{
-#if TARGET_MACHO
- return "popcnt\t{%1, %0|%0, %1}";
-#else
- return "popcnt{l}\t{%1, %0|%0, %1}";
-#endif
-}
- [(set_attr "prefix_rep" "1")
- (set_attr "type" "bitmanip")
- (set_attr "mode" "SI")])
-
(define_expand "bswapdi2"
[(set (match_operand:DI 0 "register_operand")
(bswap:DI (match_operand:DI 1 "nonimmediate_operand")))]
diff --git a/gcc-4.9/gcc/config/i386/sse.md b/gcc-4.9/gcc/config/i386/sse.md
index 57e2daa22..4aced2da9 100644
--- a/gcc-4.9/gcc/config/i386/sse.md
+++ b/gcc-4.9/gcc/config/i386/sse.md
@@ -5994,9 +5994,9 @@
(set_attr "mode" "<sseinsnmode>")])
(define_insn "vec_extract_lo_<mode><mask_name>"
- [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
+ [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>,v")
(vec_select:<ssehalfvecmode>
- (match_operand:V8FI 1 "nonimmediate_operand" "vm")
+ (match_operand:V8FI 1 "nonimmediate_operand" "v,m")
(parallel [(const_int 0) (const_int 1)
(const_int 2) (const_int 3)])))]
"TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
diff --git a/gcc-4.9/gcc/config/i386/x86-tune.def b/gcc-4.9/gcc/config/i386/x86-tune.def
index cb44dc312..b7a703fa0 100644
--- a/gcc-4.9/gcc/config/i386/x86-tune.def
+++ b/gcc-4.9/gcc/config/i386/x86-tune.def
@@ -509,6 +509,11 @@ DEF_TUNE (X86_TUNE_NOT_VECTORMODE, "not_vectormode", m_K6)
DEF_TUNE (X86_TUNE_AVOID_VECTOR_DECODE, "avoid_vector_decode",
m_K8)
+/* X86_TUNE_AVOID_FALSE_DEP_FOR_BMI: Avoid false dependency
+ for bit-manipulation instructions. */
+DEF_TUNE (X86_TUNE_AVOID_FALSE_DEP_FOR_BMI, "avoid_false_dep_for_bmi",
+ m_SANDYBRIDGE | m_HASWELL | m_INTEL | m_GENERIC)
+
/*****************************************************************************/
/* This never worked well before. */
/*****************************************************************************/