aboutsummaryrefslogtreecommitdiffstats
path: root/gcc-4.8
diff options
context:
space:
mode:
authorAndrew Hsieh <andrewhsieh@google.com>2014-06-18 09:53:32 -0700
committerPavel Chupin <pavel.v.chupin@intel.com>2014-06-19 22:38:12 +0400
commitf1fba0f5b24322265b9c3d37863a504c4b67cdc2 (patch)
tree034f988a2cfa56e4d20f358a68cb122f801832ea /gcc-4.8
parent6179a54ad632315562364c24533134f38c366156 (diff)
downloadtoolchain_gcc-f1fba0f5b24322265b9c3d37863a504c4b67cdc2.tar.gz
toolchain_gcc-f1fba0f5b24322265b9c3d37863a504c4b67cdc2.tar.bz2
toolchain_gcc-f1fba0f5b24322265b9c3d37863a504c4b67cdc2.zip
revert 01b34967a57ca33621130d36e007214b93bdfeaa
For the purpose of merging gcc-4.8.3, will put it back. https://android-review.googlesource.com/#/c/60083 [4.7, 4.8] Extended Silvermont tuning Change-Id: If13a6989286c0ab57def1dd65f0bbd2f6ed8d807
Diffstat (limited to 'gcc-4.8')
-rw-r--r--gcc-4.8/gcc/config/i386/i386.c372
-rw-r--r--gcc-4.8/gcc/config/i386/i386.h3
-rw-r--r--gcc-4.8/gcc/config/i386/i386.md24
-rw-r--r--gcc-4.8/libgcc/config/i386/cpuinfo.c4
4 files changed, 80 insertions, 323 deletions
diff --git a/gcc-4.8/gcc/config/i386/i386.c b/gcc-4.8/gcc/config/i386/i386.c
index 7e8f653b6..0c5b57d3f 100644
--- a/gcc-4.8/gcc/config/i386/i386.c
+++ b/gcc-4.8/gcc/config/i386/i386.c
@@ -2103,12 +2103,7 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
/* X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE: Try to avoid memory operands for
a conditional move. */
- m_ATOM,
-
- /* X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS: Try to split memory operand for
- fp converts to destination register. */
- m_SLM
-
+ m_ATOM
};
/* Feature tests against the various architecture variations. */
@@ -17317,24 +17312,10 @@ distance_agu_use (unsigned int regno0, rtx insn)
static bool
ix86_lea_outperforms (rtx insn, unsigned int regno0, unsigned int regno1,
- unsigned int regno2, int split_cost, bool has_scale)
+ unsigned int regno2, int split_cost)
{
int dist_define, dist_use;
- /* For Silvermont if using a 2-source or 3-source LEA for
- non-destructive destination purposes, or due to wanting
- ability to use SCALE, the use of LEA is justified. */
- if (ix86_tune == PROCESSOR_SLM)
- {
- if (has_scale)
- return true;
- if (split_cost < 1)
- return false;
- if (regno0 == regno1 || regno0 == regno2)
- return false;
- return true;
- }
-
dist_define = distance_non_agu_define (regno1, regno2, insn);
dist_use = distance_agu_use (regno0, insn);
@@ -17423,7 +17404,7 @@ ix86_avoid_lea_for_add (rtx insn, rtx operands[])
if (regno0 == regno1 || regno0 == regno2)
return false;
else
- return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
+ return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1);
}
/* Return true if we should emit lea instruction instead of mov
@@ -17445,7 +17426,7 @@ ix86_use_lea_for_mov (rtx insn, rtx operands[])
regno0 = true_regnum (operands[0]);
regno1 = true_regnum (operands[1]);
- return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
+ return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0);
}
/* Return true if we need to split lea into a sequence of
@@ -17524,8 +17505,7 @@ ix86_avoid_lea_for_addr (rtx insn, rtx operands[])
split_cost -= 1;
}
- return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
- parts.scale > 1);
+ return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost);
}
/* Emit x86 binary operand CODE in mode MODE, where the first operand
@@ -17710,7 +17690,7 @@ ix86_lea_for_add_ok (rtx insn, rtx operands[])
if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
return false;
- return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
+ return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0);
}
/* Return true if destination reg of SET_BODY is shift count of
@@ -24309,73 +24289,6 @@ ix86_agi_dependent (rtx set_insn, rtx use_insn)
return false;
}
-/* Helper function for exact_store_load_dependency.
- Return true if addr is found in insn. */
-static bool
-exact_dependency_1 (rtx addr, rtx insn)
-{
- enum rtx_code code;
- const char *format_ptr;
- int i, j;
-
- code = GET_CODE (insn);
- switch (code)
- {
- case MEM:
- if (rtx_equal_p (addr, insn))
- return true;
- break;
- case REG:
- CASE_CONST_ANY:
- case SYMBOL_REF:
- case CODE_LABEL:
- case PC:
- case CC0:
- case EXPR_LIST:
- return false;
- default:
- break;
- }
-
- format_ptr = GET_RTX_FORMAT (code);
- for (i = 0; i < GET_RTX_LENGTH (code); i++)
- {
- switch (*format_ptr++)
- {
- case 'e':
- if (exact_dependency_1 (addr, XEXP (insn, i)))
- return true;
- break;
- case 'E':
- for (j = 0; j < XVECLEN (insn, i); j++)
- if (exact_dependency_1 (addr, XVECEXP (insn, i, j)))
- return true;
- break;
- }
- }
- return false;
-}
-
-/* Return true if there exists exact dependency for store & load, i.e.
- the same memory address is used in them. */
-static bool
-exact_store_load_dependency (rtx store, rtx load)
-{
- rtx set1, set2;
-
- set1 = single_set (store);
- if (!set1)
- return false;
- if (!MEM_P (SET_DEST (set1)))
- return false;
- set2 = single_set (load);
- if (!set2)
- return false;
- if (exact_dependency_1 (SET_DEST (set1), SET_SRC (set2)))
- return true;
- return false;
-}
-
static int
ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
{
@@ -24527,39 +24440,6 @@ ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
else
cost = 0;
}
- break;
-
- case PROCESSOR_SLM:
- if (!reload_completed)
- return cost;
-
- /* Increase cost of integer loads. */
- memory = get_attr_memory (dep_insn);
- if (memory == MEMORY_LOAD || memory == MEMORY_BOTH)
- {
- enum attr_unit unit = get_attr_unit (dep_insn);
- if (unit == UNIT_INTEGER && cost == 1)
- {
- if (memory == MEMORY_LOAD)
- cost = 3;
- else
- {
- /* Increase cost of ld/st for short int types only
- because of store forwarding issue. */
- rtx set = single_set (dep_insn);
- if (set && (GET_MODE (SET_DEST (set)) == QImode
- || GET_MODE (SET_DEST (set)) == HImode))
- {
- /* Increase cost of store/load insn if exact
- dependence exists and it is load insn. */
- enum attr_memory insn_memory = get_attr_memory (insn);
- if (insn_memory == MEMORY_LOAD
- && exact_store_load_dependency (dep_insn, insn))
- cost = 3;
- }
- }
- }
- }
default:
break;
@@ -24606,204 +24486,110 @@ ia32_multipass_dfa_lookahead (void)
execution. It is applied if
(1) IMUL instruction is on the top of list;
(2) There exists the only producer of independent IMUL instruction in
- ready list.
- Return index of IMUL producer if it was found and -1 otherwise. */
+ ready list;
+ (3) Put found producer on the top of ready list.
+ Returns issue rate. */
+
static int
-do_reorder_for_imul (rtx *ready, int n_ready)
+ix86_sched_reorder(FILE *dump, int sched_verbose, rtx *ready, int *pn_ready,
+ int clock_var ATTRIBUTE_UNUSED)
{
- rtx insn, set, insn1, insn2;
+ static int issue_rate = -1;
+ int n_ready = *pn_ready;
+ rtx insn, insn1, insn2;
+ int i;
sd_iterator_def sd_it;
dep_t dep;
int index = -1;
- int i;
+ /* Set up issue rate. */
+ issue_rate = ix86_issue_rate();
+
+ /* Do reodering for Atom only. */
if (ix86_tune != PROCESSOR_ATOM)
- return index;
+ return issue_rate;
+ /* Do not perform ready list reodering for pre-reload schedule pass. */
+ if (!reload_completed)
+ return issue_rate;
+ /* Nothing to do if ready list contains only 1 instruction. */
+ if (n_ready <= 1)
+ return issue_rate;
/* Check that IMUL instruction is on the top of ready list. */
insn = ready[n_ready - 1];
- set = single_set (insn);
- if (!set)
- return index;
- if (!(GET_CODE (SET_SRC (set)) == MULT
- && GET_MODE (SET_SRC (set)) == SImode))
- return index;
+ if (!NONDEBUG_INSN_P (insn))
+ return issue_rate;
+ insn = PATTERN (insn);
+ if (GET_CODE (insn) == PARALLEL)
+ insn = XVECEXP (insn, 0, 0);
+ if (GET_CODE (insn) != SET)
+ return issue_rate;
+ if (!(GET_CODE (SET_SRC (insn)) == MULT
+ && GET_MODE (SET_SRC (insn)) == SImode))
+ return issue_rate;
/* Search for producer of independent IMUL instruction. */
- for (i = n_ready - 2; i >= 0; i--)
+ for (i = n_ready - 2; i>= 0; i--)
{
insn = ready[i];
if (!NONDEBUG_INSN_P (insn))
- continue;
+ continue;
/* Skip IMUL instruction. */
insn2 = PATTERN (insn);
if (GET_CODE (insn2) == PARALLEL)
- insn2 = XVECEXP (insn2, 0, 0);
+ insn2 = XVECEXP (insn2, 0, 0);
if (GET_CODE (insn2) == SET
- && GET_CODE (SET_SRC (insn2)) == MULT
- && GET_MODE (SET_SRC (insn2)) == SImode)
- continue;
+ && GET_CODE (SET_SRC (insn2)) == MULT
+ && GET_MODE (SET_SRC (insn2)) == SImode)
+ continue;
FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
- {
- rtx con;
+ {
+ rtx con;
con = DEP_CON (dep);
if (!NONDEBUG_INSN_P (con))
continue;
- insn1 = PATTERN (con);
- if (GET_CODE (insn1) == PARALLEL)
- insn1 = XVECEXP (insn1, 0, 0);
+ insn1 = PATTERN (con);
+ if (GET_CODE (insn1) == PARALLEL)
+ insn1 = XVECEXP (insn1, 0, 0);
- if (GET_CODE (insn1) == SET
- && GET_CODE (SET_SRC (insn1)) == MULT
- && GET_MODE (SET_SRC (insn1)) == SImode)
- {
- sd_iterator_def sd_it1;
- dep_t dep1;
- /* Check if there is no other dependee for IMUL. */
- index = i;
- FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1)
- {
- rtx pro;
- pro = DEP_PRO (dep1);
+ if (GET_CODE (insn1) == SET
+ && GET_CODE (SET_SRC (insn1)) == MULT
+ && GET_MODE (SET_SRC (insn1)) == SImode)
+ {
+ sd_iterator_def sd_it1;
+ dep_t dep1;
+ /* Check if there is no other dependee for IMUL. */
+ index = i;
+ FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1)
+ {
+ rtx pro;
+ pro = DEP_PRO (dep1);
if (!NONDEBUG_INSN_P (pro))
continue;
- if (pro != insn)
- index = -1;
- }
- if (index >= 0)
- break;
- }
- }
+ if (pro != insn)
+ index = -1;
+ }
+ if (index >= 0)
+ break;
+ }
+ }
if (index >= 0)
- break;
- }
- return index;
-}
-
-/* Try to find the best candidate on the top of ready list if two insns
- have the same priority - candidate is best if its dependees were
- scheduled earlier. Applied for Silvermont only.
- Return true if top 2 insns must be interchanged. */
-static bool
-swap_top_of_ready_list (rtx *ready, int n_ready)
-{
- rtx top = ready[n_ready - 1];
- rtx next = ready[n_ready - 2];
- rtx set;
- sd_iterator_def sd_it;
- dep_t dep;
- int clock1 = -1;
- int clock2 = -1;
- #define INSN_TICK(INSN) (HID (INSN)->tick)
-
- if (ix86_tune != PROCESSOR_SLM)
- return false;
-
- if (!NONDEBUG_INSN_P (top))
- return false;
- if (!NONJUMP_INSN_P (top))
- return false;
- if (!NONDEBUG_INSN_P (next))
- return false;
- if (!NONJUMP_INSN_P (next))
- return false;
- set = single_set (top);
- if (!set)
- return false;
- set = single_set (next);
- if (!set)
- return false;
-
- if (INSN_PRIORITY_KNOWN (top) && INSN_PRIORITY_KNOWN (next))
- {
- if (INSN_PRIORITY (top) != INSN_PRIORITY (next))
- return false;
- /* Determine winner more precise. */
- FOR_EACH_DEP (top, SD_LIST_RES_BACK, sd_it, dep)
- {
- rtx pro;
- pro = DEP_PRO (dep);
- if (!NONDEBUG_INSN_P (pro))
- continue;
- if (INSN_TICK (pro) > clock1)
- clock1 = INSN_TICK (pro);
- }
- FOR_EACH_DEP (next, SD_LIST_RES_BACK, sd_it, dep)
- {
- rtx pro;
- pro = DEP_PRO (dep);
- if (!NONDEBUG_INSN_P (pro))
- continue;
- if (INSN_TICK (pro) > clock2)
- clock2 = INSN_TICK (pro);
- }
-
- if (clock1 == clock2)
- {
- /* Determine winner - load must win. */
- enum attr_memory memory1, memory2;
- memory1 = get_attr_memory (top);
- memory2 = get_attr_memory (next);
- if (memory2 == MEMORY_LOAD && memory1 != MEMORY_LOAD)
- return true;
- }
- return (bool) (clock2 < clock1);
+ break;
}
- return false;
- #undef INSN_TICK
-}
-
-/* Perform possible reodering of ready list for Atom/Silvermont only.
- Return issue rate. */
-static int
-ix86_sched_reorder (FILE *dump, int sched_verbose, rtx *ready, int *pn_ready,
- int clock_var)
-{
- int issue_rate = -1;
- int n_ready = *pn_ready;
- int i;
- rtx insn;
- int index = -1;
-
- /* Set up issue rate. */
- issue_rate = ix86_issue_rate ();
+ if (index < 0)
+ return issue_rate; /* Didn't find IMUL producer. */
- /* Do reodering for Atom/SLM only. */
- if (ix86_tune != PROCESSOR_ATOM && ix86_tune != PROCESSOR_SLM)
- return issue_rate;
+ if (sched_verbose > 1)
+ fprintf(dump, ";;\tatom sched_reorder: swap %d and %d insns\n",
+ INSN_UID (ready[index]), INSN_UID (ready[n_ready - 1]));
- /* Nothing to do if ready list contains only 1 instruction. */
- if (n_ready <= 1)
- return issue_rate;
+ /* Put IMUL producer (ready[index]) at the top of ready list. */
+ insn1= ready[index];
+ for (i = index; i < n_ready - 1; i++)
+ ready[i] = ready[i + 1];
+ ready[n_ready - 1] = insn1;
- /* Do reodering for post-reload scheduler only. */
- if (!reload_completed)
- return issue_rate;
-
- if ((index = do_reorder_for_imul (ready, n_ready)) >= 0)
- {
- if (sched_verbose > 1)
- fprintf (dump, ";;\tatom sched_reorder: put %d insn on top\n",
- INSN_UID (ready[index]));
-
- /* Put IMUL producer (ready[index]) at the top of ready list. */
- insn = ready[index];
- for (i = index; i < n_ready - 1; i++)
- ready[i] = ready[i + 1];
- ready[n_ready - 1] = insn;
- return issue_rate;
- }
- if (clock_var != 0 && swap_top_of_ready_list (ready, n_ready))
- {
- if (sched_verbose > 1)
- fprintf (dump, ";;\tslm sched_reorder: swap %d and %d insns\n",
- INSN_UID (ready[n_ready - 1]), INSN_UID (ready[n_ready - 2]));
- /* Swap 2 top elements of ready list. */
- insn = ready[n_ready - 1];
- ready[n_ready - 1] = ready[n_ready - 2];
- ready[n_ready - 2] = insn;
- }
return issue_rate;
}
@@ -29964,11 +29750,11 @@ fold_builtin_cpu (tree fndecl, tree *args)
M_AMD,
M_CPU_TYPE_START,
M_INTEL_ATOM,
+ M_INTEL_SLM,
M_INTEL_CORE2,
M_INTEL_COREI7,
M_AMDFAM10H,
M_AMDFAM15H,
- M_INTEL_SLM,
M_CPU_SUBTYPE_START,
M_INTEL_COREI7_NEHALEM,
M_INTEL_COREI7_WESTMERE,
diff --git a/gcc-4.8/gcc/config/i386/i386.h b/gcc-4.8/gcc/config/i386/i386.h
index ebf62a557..6c1c33466 100644
--- a/gcc-4.8/gcc/config/i386/i386.h
+++ b/gcc-4.8/gcc/config/i386/i386.h
@@ -332,7 +332,6 @@ enum ix86_tune_indices {
X86_TUNE_REASSOC_FP_TO_PARALLEL,
X86_TUNE_GENERAL_REGS_SSE_SPILL,
X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE,
- X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS,
X86_TUNE_LAST
};
@@ -440,8 +439,6 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
ix86_tune_features[X86_TUNE_GENERAL_REGS_SSE_SPILL]
#define TARGET_AVOID_MEM_OPND_FOR_CMOVE \
ix86_tune_features[X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE]
-#define TARGET_SPLIT_MEM_OPND_FOR_FP_CONVERTS \
- ix86_tune_features[X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS]
/* Feature tests against the various architecture variations. */
enum ix86_arch_indices {
diff --git a/gcc-4.8/gcc/config/i386/i386.md b/gcc-4.8/gcc/config/i386/i386.md
index 2d28d694c..6dcad7e02 100644
--- a/gcc-4.8/gcc/config/i386/i386.md
+++ b/gcc-4.8/gcc/config/i386/i386.md
@@ -3945,18 +3945,6 @@
CONST0_RTX (V4SFmode), operands[1]));
})
-;; It's more profitable to split and then extend in the same register.
-(define_peephole2
- [(set (match_operand:DF 0 "register_operand")
- (float_extend:DF
- (match_operand:SF 1 "memory_operand")))]
- "TARGET_SPLIT_MEM_OPND_FOR_FP_CONVERTS
- && optimize_insn_for_speed_p ()
- && SSE_REG_P (operands[0])"
- [(set (match_dup 2) (match_dup 1))
- (set (match_dup 0) (float_extend:DF (match_dup 2)))]
- "operands[2] = gen_rtx_REG (SFmode, REGNO (operands[0]));")
-
(define_insn "*extendsfdf2_mixed"
[(set (match_operand:DF 0 "nonimmediate_operand" "=f,m,x")
(float_extend:DF
@@ -4098,18 +4086,6 @@
CONST0_RTX (V2DFmode), operands[1]));
})
-;; It's more profitable to split and then extend in the same register.
-(define_peephole2
- [(set (match_operand:SF 0 "register_operand")
- (float_truncate:SF
- (match_operand:DF 1 "memory_operand")))]
- "TARGET_SPLIT_MEM_OPND_FOR_FP_CONVERTS
- && optimize_insn_for_speed_p ()
- && SSE_REG_P (operands[0])"
- [(set (match_dup 2) (match_dup 1))
- (set (match_dup 0) (float_truncate:SF (match_dup 2)))]
- "operands[2] = gen_rtx_REG (DFmode, REGNO (operands[0]));")
-
(define_expand "truncdfsf2_with_temp"
[(parallel [(set (match_operand:SF 0)
(float_truncate:SF (match_operand:DF 1)))
diff --git a/gcc-4.8/libgcc/config/i386/cpuinfo.c b/gcc-4.8/libgcc/config/i386/cpuinfo.c
index 1c744f123..f32ec17aa 100644
--- a/gcc-4.8/libgcc/config/i386/cpuinfo.c
+++ b/gcc-4.8/libgcc/config/i386/cpuinfo.c
@@ -52,16 +52,14 @@ enum processor_vendor
VENDOR_MAX
};
-/* Any new types or subtypes have to be inserted at the end. */
-
enum processor_types
{
INTEL_ATOM = 1,
+ INTEL_SLM,
INTEL_CORE2,
INTEL_COREI7,
AMDFAM10H,
AMDFAM15H,
- INTEL_SLM,
CPU_TYPE_MAX
};