diff options
Diffstat (limited to 'gcc-4.9/gcc/config/i386')
-rw-r--r-- | gcc-4.9/gcc/config/i386/driver-i386.c | 5 | ||||
-rw-r--r-- | gcc-4.9/gcc/config/i386/i386.c | 15 | ||||
-rw-r--r-- | gcc-4.9/gcc/config/i386/i386.h | 2 | ||||
-rw-r--r-- | gcc-4.9/gcc/config/i386/i386.md | 175 | ||||
-rw-r--r-- | gcc-4.9/gcc/config/i386/sse.md | 4 | ||||
-rw-r--r-- | gcc-4.9/gcc/config/i386/x86-tune.def | 5 |
6 files changed, 159 insertions, 47 deletions
diff --git a/gcc-4.9/gcc/config/i386/driver-i386.c b/gcc-4.9/gcc/config/i386/driver-i386.c index 80f6a0879..722c54692 100644 --- a/gcc-4.9/gcc/config/i386/driver-i386.c +++ b/gcc-4.9/gcc/config/i386/driver-i386.c @@ -431,7 +431,8 @@ const char *host_detect_local_cpu (int argc, const char **argv) model = (eax >> 4) & 0x0f; family = (eax >> 8) & 0x0f; - if (vendor == signature_INTEL_ebx) + if (vendor == signature_INTEL_ebx + || vendor == signature_AMD_ebx) { unsigned int extended_model, extended_family; @@ -570,7 +571,7 @@ const char *host_detect_local_cpu (int argc, const char **argv) if (name == signature_NSC_ebx) processor = PROCESSOR_GEODE; - else if (has_movbe) + else if (has_movbe && family == 22) processor = PROCESSOR_BTVER2; else if (has_avx2) processor = PROCESSOR_BDVER4; diff --git a/gcc-4.9/gcc/config/i386/i386.c b/gcc-4.9/gcc/config/i386/i386.c index 8a1fbde2c..fcd5f0dd1 100644 --- a/gcc-4.9/gcc/config/i386/i386.c +++ b/gcc-4.9/gcc/config/i386/i386.c @@ -3260,12 +3260,13 @@ ix86_option_override_internal (bool main_args_p, | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE}, {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4, - PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 - | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1 - | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2 + PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 + | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1 + | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2 | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2 | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR - | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE}, + | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND + | PTA_MOVBE}, {"btver1", PROCESSOR_BTVER1, CPU_GENERIC, PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW @@ -45349,8 +45350,10 @@ ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2) /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */ emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32))); - /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */ - emit_insn (gen_xop_pmacsdql (op0, op1, op2, t4)); + /* Multiply lower parts and add all */ + t5 = gen_reg_rtx (V2DImode); + emit_insn (gen_vec_widen_umult_even_v4si (t5, gen_lowpart (V4SImode, op1), gen_lowpart (V4SImode, op2))); + op0 = expand_binop (mode, add_optab, t5, t4, op0, 1, OPTAB_DIRECT); } else { diff --git a/gcc-4.9/gcc/config/i386/i386.h b/gcc-4.9/gcc/config/i386/i386.h index fb527411a..f6b169c24 100644 --- a/gcc-4.9/gcc/config/i386/i386.h +++ b/gcc-4.9/gcc/config/i386/i386.h @@ -461,6 +461,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST]; ix86_tune_features[X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS] #define TARGET_ADJUST_UNROLL \ ix86_tune_features[X86_TUNE_ADJUST_UNROLL] +#define TARGET_AVOID_FALSE_DEP_FOR_BMI \ + ix86_tune_features[X86_TUNE_AVOID_FALSE_DEP_FOR_BMI] /* Feature tests against the various architecture variations. */ enum ix86_arch_indices { diff --git a/gcc-4.9/gcc/config/i386/i386.md b/gcc-4.9/gcc/config/i386/i386.md index 058702904..39d395875 100644 --- a/gcc-4.9/gcc/config/i386/i386.md +++ b/gcc-4.9/gcc/config/i386/i386.md @@ -111,6 +111,7 @@ UNSPEC_LEA_ADDR UNSPEC_XBEGIN_ABORT UNSPEC_STOS + UNSPEC_INSN_FALSE_DEP ;; For SSE/MMX support: UNSPEC_FIX_NOTRUNC @@ -11878,7 +11879,8 @@ DONE; } - flags_mode = TARGET_BMI ? CCCmode : CCZmode; + flags_mode + = (TARGET_BMI && !TARGET_AVOID_FALSE_DEP_FOR_BMI) ? CCCmode : CCZmode; operands[2] = gen_reg_rtx (<MODE>mode); operands[3] = gen_rtx_REG (flags_mode, FLAGS_REG); @@ -11904,7 +11906,8 @@ (parallel [(set (match_dup 0) (plus:SI (match_dup 0) (const_int 1))) (clobber (reg:CC FLAGS_REG))])] { - enum machine_mode flags_mode = TARGET_BMI ? CCCmode : CCZmode; + enum machine_mode flags_mode + = (TARGET_BMI && !TARGET_AVOID_FALSE_DEP_FOR_BMI) ? CCCmode : CCZmode; operands[3] = gen_lowpart (QImode, operands[2]); operands[4] = gen_rtx_REG (flags_mode, FLAGS_REG); @@ -11919,7 +11922,7 @@ (const_int 0))) (set (match_operand:SWI48 0 "register_operand" "=r") (ctz:SWI48 (match_dup 1)))] - "TARGET_BMI" + "TARGET_BMI && !TARGET_AVOID_FALSE_DEP_FOR_BMI" "tzcnt{<imodesuffix>}\t{%1, %0|%0, %1}" [(set_attr "type" "alu1") (set_attr "prefix_0f" "1") @@ -11940,7 +11943,58 @@ (set_attr "btver2_decode" "double") (set_attr "mode" "<MODE>")]) -(define_insn "ctz<mode>2" +(define_expand "ctz<mode>2" + [(parallel + [(set (match_operand:SWI248 0 "register_operand") + (ctz:SWI248 + (match_operand:SWI248 1 "nonimmediate_operand"))) + (clobber (reg:CC FLAGS_REG))])]) + +; False dependency happens when destination is only updated by tzcnt, +; lzcnt or popcnt. There is no false dependency when destination is +; also used in source. +(define_insn_and_split "*ctz<mode>2_falsedep_1" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (ctz:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "rm"))) + (clobber (reg:CC FLAGS_REG))] + "(TARGET_BMI || TARGET_GENERIC) + && TARGET_AVOID_FALSE_DEP_FOR_BMI && optimize_function_for_speed_p (cfun)" + "#" + "&& reload_completed" + [(parallel + [(set (match_dup 0) + (ctz:SWI48 (match_dup 1))) + (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP) + (clobber (reg:CC FLAGS_REG))])] +{ + if (!reg_mentioned_p (operands[0], operands[1])) + ix86_expand_clear (operands[0]); +}) + +(define_insn "*ctz<mode>2_falsedep" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (ctz:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "rm"))) + (unspec [(match_operand:SWI48 2 "register_operand" "0")] + UNSPEC_INSN_FALSE_DEP) + (clobber (reg:CC FLAGS_REG))] + "" +{ + if (TARGET_BMI) + return "tzcnt{<imodesuffix>}\t{%1, %0|%0, %1}"; + else if (TARGET_GENERIC) + /* tzcnt expands to 'rep bsf' and we can use it even if !TARGET_BMI. */ + return "rep%; bsf{<imodesuffix>}\t{%1, %0|%0, %1}"; + else + gcc_unreachable (); +} + [(set_attr "type" "alu1") + (set_attr "prefix_0f" "1") + (set_attr "prefix_rep" "1") + (set_attr "mode" "<MODE>")]) + +(define_insn "*ctz<mode>2" [(set (match_operand:SWI248 0 "register_operand" "=r") (ctz:SWI248 (match_operand:SWI248 1 "nonimmediate_operand" "rm"))) (clobber (reg:CC FLAGS_REG))] @@ -11987,7 +12041,47 @@ operands[2] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode)-1); }) -(define_insn "clz<mode>2_lzcnt" +(define_expand "clz<mode>2_lzcnt" + [(parallel + [(set (match_operand:SWI248 0 "register_operand") + (clz:SWI248 + (match_operand:SWI248 1 "nonimmediate_operand"))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_LZCNT") + +(define_insn_and_split "*clz<mode>2_lzcnt_falsedep_1" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (clz:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "rm"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_LZCNT + && TARGET_AVOID_FALSE_DEP_FOR_BMI && optimize_function_for_speed_p (cfun)" + "#" + "&& reload_completed" + [(parallel + [(set (match_dup 0) + (clz:SWI48 (match_dup 1))) + (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP) + (clobber (reg:CC FLAGS_REG))])] +{ + if (!reg_mentioned_p (operands[0], operands[1])) + ix86_expand_clear (operands[0]); +}) + +(define_insn "*clz<mode>2_lzcnt_falsedep" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (clz:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "rm"))) + (unspec [(match_operand:SWI48 2 "register_operand" "0")] + UNSPEC_INSN_FALSE_DEP) + (clobber (reg:CC FLAGS_REG))] + "TARGET_LZCNT" + "lzcnt{<imodesuffix>}\t{%1, %0|%0, %1}" + [(set_attr "prefix_rep" "1") + (set_attr "type" "bitmanip") + (set_attr "mode" "<MODE>")]) + +(define_insn "*clz<mode>2_lzcnt" [(set (match_operand:SWI248 0 "register_operand" "=r") (clz:SWI248 (match_operand:SWI248 1 "nonimmediate_operand" "rm"))) (clobber (reg:CC FLAGS_REG))] @@ -12270,10 +12364,39 @@ (set_attr "prefix_0f" "1") (set_attr "mode" "HI")]) -(define_insn "popcount<mode>2" - [(set (match_operand:SWI248 0 "register_operand" "=r") - (popcount:SWI248 - (match_operand:SWI248 1 "nonimmediate_operand" "rm"))) +(define_expand "popcount<mode>2" + [(parallel + [(set (match_operand:SWI248 0 "register_operand") + (popcount:SWI248 + (match_operand:SWI248 1 "nonimmediate_operand"))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_POPCNT") + +(define_insn_and_split "*popcount<mode>2_falsedep_1" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (popcount:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "rm"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_POPCNT + && TARGET_AVOID_FALSE_DEP_FOR_BMI && optimize_function_for_speed_p (cfun)" + "#" + "&& reload_completed" + [(parallel + [(set (match_dup 0) + (popcount:SWI48 (match_dup 1))) + (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP) + (clobber (reg:CC FLAGS_REG))])] +{ + if (!reg_mentioned_p (operands[0], operands[1])) + ix86_expand_clear (operands[0]); +}) + +(define_insn "*popcount<mode>2_falsedep" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (popcount:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "rm"))) + (unspec [(match_operand:SWI48 2 "register_operand" "0")] + UNSPEC_INSN_FALSE_DEP) (clobber (reg:CC FLAGS_REG))] "TARGET_POPCNT" { @@ -12287,15 +12410,12 @@ (set_attr "type" "bitmanip") (set_attr "mode" "<MODE>")]) -(define_insn "*popcount<mode>2_cmp" - [(set (reg FLAGS_REG) - (compare - (popcount:SWI248 - (match_operand:SWI248 1 "nonimmediate_operand" "rm")) - (const_int 0))) - (set (match_operand:SWI248 0 "register_operand" "=r") - (popcount:SWI248 (match_dup 1)))] - "TARGET_POPCNT && ix86_match_ccmode (insn, CCZmode)" +(define_insn "*popcount<mode>2" + [(set (match_operand:SWI248 0 "register_operand" "=r") + (popcount:SWI248 + (match_operand:SWI248 1 "nonimmediate_operand" "rm"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_POPCNT" { #if TARGET_MACHO return "popcnt\t{%1, %0|%0, %1}"; @@ -12307,25 +12427,6 @@ (set_attr "type" "bitmanip") (set_attr "mode" "<MODE>")]) -(define_insn "*popcountsi2_cmp_zext" - [(set (reg FLAGS_REG) - (compare - (popcount:SI (match_operand:SI 1 "nonimmediate_operand" "rm")) - (const_int 0))) - (set (match_operand:DI 0 "register_operand" "=r") - (zero_extend:DI(popcount:SI (match_dup 1))))] - "TARGET_64BIT && TARGET_POPCNT && ix86_match_ccmode (insn, CCZmode)" -{ -#if TARGET_MACHO - return "popcnt\t{%1, %0|%0, %1}"; -#else - return "popcnt{l}\t{%1, %0|%0, %1}"; -#endif -} - [(set_attr "prefix_rep" "1") - (set_attr "type" "bitmanip") - (set_attr "mode" "SI")]) - (define_expand "bswapdi2" [(set (match_operand:DI 0 "register_operand") (bswap:DI (match_operand:DI 1 "nonimmediate_operand")))] diff --git a/gcc-4.9/gcc/config/i386/sse.md b/gcc-4.9/gcc/config/i386/sse.md index 57e2daa22..4aced2da9 100644 --- a/gcc-4.9/gcc/config/i386/sse.md +++ b/gcc-4.9/gcc/config/i386/sse.md @@ -5994,9 +5994,9 @@ (set_attr "mode" "<sseinsnmode>")]) (define_insn "vec_extract_lo_<mode><mask_name>" - [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>") + [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>,v") (vec_select:<ssehalfvecmode> - (match_operand:V8FI 1 "nonimmediate_operand" "vm") + (match_operand:V8FI 1 "nonimmediate_operand" "v,m") (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)])))] "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))" diff --git a/gcc-4.9/gcc/config/i386/x86-tune.def b/gcc-4.9/gcc/config/i386/x86-tune.def index cb44dc312..b7a703fa0 100644 --- a/gcc-4.9/gcc/config/i386/x86-tune.def +++ b/gcc-4.9/gcc/config/i386/x86-tune.def @@ -509,6 +509,11 @@ DEF_TUNE (X86_TUNE_NOT_VECTORMODE, "not_vectormode", m_K6) DEF_TUNE (X86_TUNE_AVOID_VECTOR_DECODE, "avoid_vector_decode", m_K8) +/* X86_TUNE_AVOID_FALSE_DEP_FOR_BMI: Avoid false dependency + for bit-manipulation instructions. */ +DEF_TUNE (X86_TUNE_AVOID_FALSE_DEP_FOR_BMI, "avoid_false_dep_for_bmi", + m_SANDYBRIDGE | m_HASWELL | m_INTEL | m_GENERIC) + /*****************************************************************************/ /* This never worked well before. */ /*****************************************************************************/ |