From 27f3b6e52df39b2bf4038d7fb92e1c642c4913dc Mon Sep 17 00:00:00 2001 From: Pavel Chupin Date: Thu, 30 May 2013 15:35:10 +0400 Subject: [4.8, REAPPLY] Release basic tuning for new Silvermont architecture Reapplying https://android-review.googlesource.com/#/c/59726 after 4.8.3 merge Change-Id: I855de6c963d423f68899f90aada1758ae6f6c0d9 Signed-off-by: Pavel Chupin --- gcc-4.8/gcc/config.gcc | 22 +- gcc-4.8/gcc/config/i386/driver-i386.c | 10 +- gcc-4.8/gcc/config/i386/i386-c.c | 7 + gcc-4.8/gcc/config/i386/i386.c | 127 +++++- gcc-4.8/gcc/config/i386/i386.h | 2 + gcc-4.8/gcc/config/i386/i386.md | 3 +- gcc-4.8/gcc/config/i386/slm.md | 758 ++++++++++++++++++++++++++++++++++ gcc-4.8/libgcc/config/i386/cpuinfo.c | 1 + 8 files changed, 898 insertions(+), 32 deletions(-) create mode 100644 gcc-4.8/gcc/config/i386/slm.md diff --git a/gcc-4.8/gcc/config.gcc b/gcc-4.8/gcc/config.gcc index 7ae195e44..80030f3c6 100644 --- a/gcc-4.8/gcc/config.gcc +++ b/gcc-4.8/gcc/config.gcc @@ -1289,7 +1289,7 @@ i[34567]86-*-linux* | i[34567]86-*-kfreebsd*-gnu | i[34567]86-*-knetbsd*-gnu | i TM_MULTILIB_CONFIG=`echo $TM_MULTILIB_CONFIG | sed 's/^,//'` need_64bit_isa=yes case X"${with_cpu}" in - Xgeneric|Xatom|Xcore2|Xcorei7|Xcorei7-avx|Xnocona|Xx86-64|Xbdver3|Xbdver2|Xbdver1|Xbtver2|Xbtver1|Xamdfam10|Xbarcelona|Xk8|Xopteron|Xathlon64|Xathlon-fx|Xathlon64-sse3|Xk8-sse3|Xopteron-sse3) + Xgeneric|Xatom|Xslm|Xcore2|Xcorei7|Xcorei7-avx|Xnocona|Xx86-64|Xbdver3|Xbdver2|Xbdver1|Xbtver2|Xbtver1|Xamdfam10|Xbarcelona|Xk8|Xopteron|Xathlon64|Xathlon-fx|Xathlon64-sse3|Xk8-sse3|Xopteron-sse3) ;; X) if test x$with_cpu_64 = x; then @@ -1298,7 +1298,7 @@ i[34567]86-*-linux* | i[34567]86-*-kfreebsd*-gnu | i[34567]86-*-knetbsd*-gnu | i ;; *) echo "Unsupported CPU used in --with-cpu=$with_cpu, supported values:" 1>&2 - echo "generic atom core2 corei7 corei7-avx nocona x86-64 bdver3 bdver2 bdver1 btver2 btver1 amdfam10 barcelona k8 opteron athlon64 athlon-fx athlon64-sse3 k8-sse3 opteron-sse3" 1>&2 + echo "generic atom slm core2 corei7 corei7-avx nocona x86-64 bdver3 bdver2 bdver1 btver2 btver1 amdfam10 barcelona k8 opteron athlon64 athlon-fx athlon64-sse3 k8-sse3 opteron-sse3" 1>&2 exit 1 ;; esac @@ -1410,7 +1410,7 @@ i[34567]86-*-solaris2* | x86_64-*-solaris2.1[0-9]*) tmake_file="$tmake_file i386/t-sol2-64" need_64bit_isa=yes case X"${with_cpu}" in - Xgeneric|Xatom|Xcore2|Xcorei7|Xcorei7-avx|Xnocona|Xx86-64|Xbdver3|Xbdver2|Xbdver1|Xbtver2|Xbtver1|Xamdfam10|Xbarcelona|Xk8|Xopteron|Xathlon64|Xathlon-fx|Xathlon64-sse3|Xk8-sse3|Xopteron-sse3) + Xgeneric|Xatom|Xslm|Xcore2|Xcorei7|Xcorei7-avx|Xnocona|Xx86-64|Xbdver3|Xbdver2|Xbdver1|Xbtver2|Xbtver1|Xamdfam10|Xbarcelona|Xk8|Xopteron|Xathlon64|Xathlon-fx|Xathlon64-sse3|Xk8-sse3|Xopteron-sse3) ;; X) if test x$with_cpu_64 = x; then @@ -1419,7 +1419,7 @@ i[34567]86-*-solaris2* | x86_64-*-solaris2.1[0-9]*) ;; *) echo "Unsupported CPU used in --with-cpu=$with_cpu, supported values:" 1>&2 - echo "generic atom core2 corei7 corei7-avx nocona x86-64 bdver3 bdver2 bdver1 btver2 btver1 amdfam10 barcelona k8 opteron athlon64 athlon-fx athlon64-sse3 k8-sse3 opteron-sse3" 1>&2 + echo "generic atom slm core2 corei7 corei7-avx nocona x86-64 bdver3 bdver2 bdver1 btver2 btver1 amdfam10 barcelona k8 opteron athlon64 athlon-fx athlon64-sse3 k8-sse3 opteron-sse3" 1>&2 exit 1 ;; esac @@ -1476,7 +1476,7 @@ i[34567]86-*-mingw* | x86_64-*-mingw*) if test x$enable_targets = xall; then tm_defines="${tm_defines} TARGET_BI_ARCH=1" case X"${with_cpu}" in - Xgeneric|Xatom|Xcore2|Xcorei7|Xcorei7-avx|Xnocona|Xx86-64|Xbdver3|Xbdver2|Xbdver1|Xbtver2|Xbtver1|Xamdfam10|Xbarcelona|Xk8|Xopteron|Xathlon64|Xathlon-fx|Xathlon64-sse3|Xk8-sse3|Xopteron-sse3) + Xgeneric|Xatom|Xslm|Xcore2|Xcorei7|Xcorei7-avx|Xnocona|Xx86-64|Xbdver3|Xbdver2|Xbdver1|Xbtver2|Xbtver1|Xamdfam10|Xbarcelona|Xk8|Xopteron|Xathlon64|Xathlon-fx|Xathlon64-sse3|Xk8-sse3|Xopteron-sse3) ;; X) if test x$with_cpu_64 = x; then @@ -1485,7 +1485,7 @@ i[34567]86-*-mingw* | x86_64-*-mingw*) ;; *) echo "Unsupported CPU used in --with-cpu=$with_cpu, supported values:" 1>&2 - echo "generic atom core2 corei7 Xcorei7-avx nocona x86-64 bdver3 bdver2 bdver1 btver2 btver1 amdfam10 barcelona k8 opteron athlon64 athlon-fx athlon64-sse3 k8-sse3 opteron-sse3" 1>&2 + echo "generic atom slm core2 corei7 Xcorei7-avx nocona x86-64 bdver3 bdver2 bdver1 btver2 btver1 amdfam10 barcelona k8 opteron athlon64 athlon-fx athlon64-sse3 k8-sse3 opteron-sse3" 1>&2 exit 1 ;; esac @@ -2863,6 +2863,10 @@ case ${target} in arch=atom cpu=atom ;; + slm-*) + arch=slm + cpu=slm + ;; core2-*) arch=core2 cpu=core2 @@ -2934,6 +2938,10 @@ case ${target} in arch=atom cpu=atom ;; + slm-*) + arch=slm + cpu=slm + ;; core2-*) arch=core2 cpu=core2 @@ -3449,7 +3457,7 @@ case "${target}" in | k8 | k8-sse3 | athlon64 | athlon64-sse3 | opteron \ | opteron-sse3 | athlon-fx | bdver3 | bdver2 | bdver1 | btver2 \ | btver1 | amdfam10 | barcelona | nocona | core2 | corei7 \ - | corei7-avx | core-avx-i | core-avx2 | atom) + | corei7-avx | core-avx-i | core-avx2 | atom | slm) # OK ;; *) diff --git a/gcc-4.8/gcc/config/i386/driver-i386.c b/gcc-4.8/gcc/config/i386/driver-i386.c index 148fbc227..2f4c1c13d 100644 --- a/gcc-4.8/gcc/config/i386/driver-i386.c +++ b/gcc-4.8/gcc/config/i386/driver-i386.c @@ -702,8 +702,14 @@ const char *host_detect_local_cpu (int argc, const char **argv) /* Assume Sandy Bridge. */ cpu = "corei7-avx"; else if (has_sse4_2) - /* Assume Core i7. */ - cpu = "corei7"; + { + if (has_movbe) + /* Assume SLM. */ + cpu = "slm"; + else + /* Assume Core i7. */ + cpu = "corei7"; + } else if (has_ssse3) { if (has_movbe) diff --git a/gcc-4.8/gcc/config/i386/i386-c.c b/gcc-4.8/gcc/config/i386/i386-c.c index e5630af7e..932e3d2eb 100644 --- a/gcc-4.8/gcc/config/i386/i386-c.c +++ b/gcc-4.8/gcc/config/i386/i386-c.c @@ -149,6 +149,10 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag, def_or_undef (parse_in, "__atom"); def_or_undef (parse_in, "__atom__"); break; + case PROCESSOR_SLM: + def_or_undef (parse_in, "__slm"); + def_or_undef (parse_in, "__slm__"); + break; /* use PROCESSOR_max to not set/unset the arch macro. */ case PROCESSOR_max: break; @@ -241,6 +245,9 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag, case PROCESSOR_ATOM: def_or_undef (parse_in, "__tune_atom__"); break; + case PROCESSOR_SLM: + def_or_undef (parse_in, "__tune_slm__"); + break; case PROCESSOR_GENERIC32: case PROCESSOR_GENERIC64: break; diff --git a/gcc-4.8/gcc/config/i386/i386.c b/gcc-4.8/gcc/config/i386/i386.c index 0569828f3..99d1eb8e7 100644 --- a/gcc-4.8/gcc/config/i386/i386.c +++ b/gcc-4.8/gcc/config/i386/i386.c @@ -1480,6 +1480,79 @@ struct processor_costs atom_cost = { 1, /* cond_not_taken_branch_cost. */ }; +static const +struct processor_costs slm_cost = { + COSTS_N_INSNS (1), /* cost of an add instruction */ + COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */ + COSTS_N_INSNS (1), /* variable shift costs */ + COSTS_N_INSNS (1), /* constant shift costs */ + {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ + COSTS_N_INSNS (4), /* HI */ + COSTS_N_INSNS (3), /* SI */ + COSTS_N_INSNS (4), /* DI */ + COSTS_N_INSNS (2)}, /* other */ + 0, /* cost of multiply per each bit set */ + {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ + COSTS_N_INSNS (26), /* HI */ + COSTS_N_INSNS (42), /* SI */ + COSTS_N_INSNS (74), /* DI */ + COSTS_N_INSNS (74)}, /* other */ + COSTS_N_INSNS (1), /* cost of movsx */ + COSTS_N_INSNS (1), /* cost of movzx */ + 8, /* "large" insn */ + 17, /* MOVE_RATIO */ + 4, /* cost for loading QImode using movzbl */ + {4, 4, 4}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {4, 4, 4}, /* cost of storing integer registers */ + 4, /* cost of reg,reg fld/fst */ + {12, 12, 12}, /* cost of loading fp registers + in SFmode, DFmode and XFmode */ + {6, 6, 8}, /* cost of storing fp registers + in SFmode, DFmode and XFmode */ + 2, /* cost of moving MMX register */ + {8, 8}, /* cost of loading MMX registers + in SImode and DImode */ + {8, 8}, /* cost of storing MMX registers + in SImode and DImode */ + 2, /* cost of moving SSE register */ + {8, 8, 8}, /* cost of loading SSE registers + in SImode, DImode and TImode */ + {8, 8, 8}, /* cost of storing SSE registers + in SImode, DImode and TImode */ + 5, /* MMX or SSE register to integer */ + 32, /* size of l1 cache. */ + 256, /* size of l2 cache. */ + 64, /* size of prefetch block */ + 6, /* number of parallel prefetches */ + 3, /* Branch cost */ + COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */ + COSTS_N_INSNS (8), /* cost of FMUL instruction. */ + COSTS_N_INSNS (20), /* cost of FDIV instruction. */ + COSTS_N_INSNS (8), /* cost of FABS instruction. */ + COSTS_N_INSNS (8), /* cost of FCHS instruction. */ + COSTS_N_INSNS (40), /* cost of FSQRT instruction. */ + {{libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}}, + {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false}, + {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}}, + {{libcall, {{8, loop, false}, {15, unrolled_loop, false}, + {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}}, + {libcall, {{24, loop, false}, {32, unrolled_loop, false}, + {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}}, + 1, /* scalar_stmt_cost. */ + 1, /* scalar load_cost. */ + 1, /* scalar_store_cost. */ + 1, /* vec_stmt_cost. */ + 1, /* vec_to_scalar_cost. */ + 1, /* scalar_to_vec_cost. */ + 1, /* vec_align_load_cost. */ + 2, /* vec_unalign_load_cost. */ + 1, /* vec_store_cost. */ + 3, /* cond_taken_branch_cost. */ + 1, /* cond_not_taken_branch_cost. */ +}; + /* Generic64 should produce code tuned for Nocona and K8. */ static const struct processor_costs generic64_cost = { @@ -1733,6 +1806,7 @@ const struct processor_costs *ix86_cost = &pentium_cost; #define m_HASWELL (1<= 0) break; } @@ -29650,6 +29731,7 @@ fold_builtin_cpu (tree fndecl, tree *args) M_AMD, M_CPU_TYPE_START, M_INTEL_ATOM, + M_INTEL_SLM, M_INTEL_CORE2, M_INTEL_COREI7, M_AMDFAM10H, @@ -29676,6 +29758,7 @@ fold_builtin_cpu (tree fndecl, tree *args) {"amd", M_AMD}, {"intel", M_INTEL}, {"atom", M_INTEL_ATOM}, + {"slm", M_INTEL_SLM}, {"core2", M_INTEL_CORE2}, {"corei7", M_INTEL_COREI7}, {"nehalem", M_INTEL_COREI7_NEHALEM}, diff --git a/gcc-4.8/gcc/config/i386/i386.h b/gcc-4.8/gcc/config/i386/i386.h index 3810be048..eca7c4bc8 100644 --- a/gcc-4.8/gcc/config/i386/i386.h +++ b/gcc-4.8/gcc/config/i386/i386.h @@ -257,6 +257,7 @@ extern const struct processor_costs ix86_size_cost; #define TARGET_BTVER1 (ix86_tune == PROCESSOR_BTVER1) #define TARGET_BTVER2 (ix86_tune == PROCESSOR_BTVER2) #define TARGET_ATOM (ix86_tune == PROCESSOR_ATOM) +#define TARGET_SLM (ix86_tune == PROCESSOR_SLM) /* Feature tests against the various tunings. */ enum ix86_tune_indices { @@ -2075,6 +2076,7 @@ enum processor_type PROCESSOR_COREI7, PROCESSOR_HASWELL, PROCESSOR_ATOM, + PROCESSOR_SLM, PROCESSOR_GEODE, PROCESSOR_K6, PROCESSOR_ATHLON, diff --git a/gcc-4.8/gcc/config/i386/i386.md b/gcc-4.8/gcc/config/i386/i386.md index 3b5bf884d..b390df447 100644 --- a/gcc-4.8/gcc/config/i386/i386.md +++ b/gcc-4.8/gcc/config/i386/i386.md @@ -325,7 +325,7 @@ ;; Processor type. (define_attr "cpu" "none,pentium,pentiumpro,geode,k6,athlon,k8,core2,corei7, - atom,generic64,amdfam10,bdver1,bdver2,bdver3,btver1,btver2" + atom,slm,generic64,amdfam10,bdver1,bdver2,bdver3,btver1,btver2" (const (symbol_ref "ix86_schedule"))) ;; A basic instruction type. Refinements due to arguments to be @@ -954,6 +954,7 @@ (include "btver2.md") (include "geode.md") (include "atom.md") +(include "slm.md") (include "core2.md") diff --git a/gcc-4.8/gcc/config/i386/slm.md b/gcc-4.8/gcc/config/i386/slm.md new file mode 100644 index 000000000..3ac919e37 --- /dev/null +++ b/gcc-4.8/gcc/config/i386/slm.md @@ -0,0 +1,758 @@ +;; Slivermont(SLM) Scheduling +;; Copyright (C) 2009, 2010 Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . +;; +;; Silvermont has 2 out-of-order IEC, 2 in-order FEC and 1 in-order MEC. + + +(define_automaton "slm") + +;; EU: Execution Unit +;; Silvermont EUs are connected by port 0 or port 1. + +;; SLM has two ports: port 0 and port 1 connecting to all execution units +(define_cpu_unit "slm-port-0,slm-port-1" "slm") + +(define_cpu_unit "slm-ieu-0, slm-ieu-1, + slm-imul, slm-feu-0, slm-feu-1" + "slm") + +(define_reservation "slm-all-ieu" "(slm-ieu-0 + slm-ieu-1 + slm-imul)") +(define_reservation "slm-all-feu" "(slm-feu-0 + slm-feu-1)") +(define_reservation "slm-all-eu" "(slm-all-ieu + slm-all-feu)") +(define_reservation "slm-fp-0" "(slm-port-0 + slm-feu-0)") + +;; Some EUs have duplicated copied and can be accessed via either +;; port 0 or port 1 +;; (define_reservation "slm-port-either" "(slm-port-0 | slm-port-1)" +(define_reservation "slm-port-dual" "(slm-port-0 + slm-port-1)") + +;;; fmul insn can have 4 or 5 cycles latency +(define_reservation "slm-fmul-5c" + "(slm-port-0 + slm-feu-0), slm-feu-0, nothing*3") +(define_reservation "slm-fmul-4c" "(slm-port-0 + slm-feu-0), nothing*3") + +;;; fadd can has 3 cycles latency depends on instruction forms +(define_reservation "slm-fadd-3c" "(slm-port-1 + slm-feu-1), nothing*2") +(define_reservation "slm-fadd-4c" + "(slm-port-1 + slm-feu-1), slm-feu-1, nothing*2") + +;;; imul insn has 3 cycles latency for SI operands +(define_reservation "slm-imul-32" + "(slm-port-1 + slm-imul), nothing*2") +(define_reservation "slm-imul-mem-32" + "(slm-port-1 + slm-imul + slm-port-0), nothing*2") +;;; imul has 4 cycles latency for DI operands with 1/2 tput +(define_reservation "slm-imul-64" + "(slm-port-1 + slm-imul), slm-imul, nothing*2") + +;;; dual-execution instructions can have 1,2,4,5 cycles latency depends on +;;; instruction forms +(define_reservation "slm-dual-1c" "(slm-port-dual + slm-all-eu)") +(define_reservation "slm-dual-2c" + "(slm-port-dual + slm-all-eu, nothing)") + +;;; Most of simple ALU instructions have 1 cycle latency. Some of them +;;; issue in port 0, some in port 0 and some in either port. +(define_reservation "slm-simple-0" "(slm-port-0 + slm-ieu-0)") +(define_reservation "slm-simple-1" "(slm-port-1 + slm-ieu-1)") +(define_reservation "slm-simple-either" "(slm-simple-0 | slm-simple-1)") + +;;; Complex macro-instruction has variants of latency, and uses both ports. +(define_reservation "slm-complex" "(slm-port-dual + slm-all-eu)") + +(define_insn_reservation "slm_other" 9 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "other") + (eq_attr "atom_unit" "!jeu"))) + "slm-complex, slm-all-eu*8") + +;; return has type "other" with atom_unit "jeu" +(define_insn_reservation "slm_other_2" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "other") + (eq_attr "atom_unit" "jeu"))) + "slm-dual-1c") + +(define_insn_reservation "slm_multi" 9 + (and (eq_attr "cpu" "slm") + (eq_attr "type" "multi")) + "slm-complex, slm-all-eu*8") + +;; Normal alu insns without carry +(define_insn_reservation "slm_alu" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "alu") + (and (eq_attr "memory" "none") + (eq_attr "use_carry" "0")))) + "slm-simple-either") + +;; Normal alu insns without carry, but use MEC. +(define_insn_reservation "slm_alu_mem" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "alu") + (and (eq_attr "memory" "!none") + (eq_attr "use_carry" "0")))) + "slm-simple-either") + +;; Alu insn consuming CF, such as add/sbb +(define_insn_reservation "slm_alu_carry" 2 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "alu") + (and (eq_attr "memory" "none") + (eq_attr "use_carry" "1")))) + "slm-simple-either, nothing") + +;; Alu insn consuming CF, such as add/sbb +(define_insn_reservation "slm_alu_carry_mem" 2 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "alu") + (and (eq_attr "memory" "!none") + (eq_attr "use_carry" "1")))) + "slm-simple-either, nothing") + +(define_insn_reservation "slm_alu1" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "alu1") + (eq_attr "memory" "none") (eq_attr "prefix_0f" "0"))) + "slm-simple-either") + +;; bsf and bsf insn +(define_insn_reservation "slm_alu1_1" 10 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "alu1") + (eq_attr "memory" "none") (eq_attr "prefix_0f" "1"))) + "slm-simple-1, slm-ieu-1*9") + +(define_insn_reservation "slm_alu1_mem" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "alu1") + (eq_attr "memory" "!none"))) + "slm-simple-either") + +(define_insn_reservation "slm_negnot" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "negnot") + (eq_attr "memory" "none"))) + "slm-simple-either") + +(define_insn_reservation "slm_negnot_mem" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "negnot") + (eq_attr "memory" "!none"))) + "slm-simple-either") + +(define_insn_reservation "slm_imov" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "imov") + (eq_attr "memory" "none"))) + "slm-simple-either") + +(define_insn_reservation "slm_imov_mem" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "imov") + (eq_attr "memory" "!none"))) + "slm-simple-0") + +;; 16<-16, 32<-32 +(define_insn_reservation "slm_imovx" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "imovx") + (and (eq_attr "memory" "none") + (ior (and (match_operand:HI 0 "register_operand") + (match_operand:HI 1 "general_operand")) + (and (match_operand:SI 0 "register_operand") + (match_operand:SI 1 "general_operand")))))) + "slm-simple-either") + +;; 16<-16, 32<-32, mem +(define_insn_reservation "slm_imovx_mem" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "imovx") + (and (eq_attr "memory" "!none") + (ior (and (match_operand:HI 0 "register_operand") + (match_operand:HI 1 "general_operand")) + (and (match_operand:SI 0 "register_operand") + (match_operand:SI 1 "general_operand")))))) + "slm-simple-either") + +;; 32<-16, 32<-8, 64<-16, 64<-8, 64<-32, 8<-8 +(define_insn_reservation "slm_imovx_2" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "imovx") + (and (eq_attr "memory" "none") + (ior (match_operand:QI 0 "register_operand") + (ior (and (match_operand:SI 0 "register_operand") + (not (match_operand:SI 1 "general_operand"))) + (match_operand:DI 0 "register_operand")))))) + "slm-simple-either") + +;; 32<-16, 32<-8, 64<-16, 64<-8, 64<-32, 8<-8, mem +(define_insn_reservation "slm_imovx_2_mem" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "imovx") + (and (eq_attr "memory" "!none") + (ior (match_operand:QI 0 "register_operand") + (ior (and (match_operand:SI 0 "register_operand") + (not (match_operand:SI 1 "general_operand"))) + (match_operand:DI 0 "register_operand")))))) + "slm-simple-0") + +;; 16<-8 +(define_insn_reservation "slm_imovx_3" 3 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "imovx") + (and (match_operand:HI 0 "register_operand") + (match_operand:QI 1 "general_operand")))) + "slm-simple-0, nothing*2") + +(define_insn_reservation "slm_lea" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "lea") + (eq_attr "mode" "!HI"))) + "slm-simple-either") + +;; lea 16bit address is complex insn +(define_insn_reservation "slm_lea_2" 2 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "lea") + (eq_attr "mode" "HI"))) + "slm-complex, slm-all-eu") + +(define_insn_reservation "slm_incdec" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "incdec") + (eq_attr "memory" "none"))) + "slm-simple-0") + +(define_insn_reservation "slm_incdec_mem" 3 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "incdec") + (eq_attr "memory" "!none"))) + "slm-simple-0, nothing*2") + +;; simple shift instruction use SHIFT eu, none memory +(define_insn_reservation "slm_ishift" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "ishift") + (and (eq_attr "memory" "none") (eq_attr "prefix_0f" "0")))) + "slm-simple-0") + +;; simple shift instruction use SHIFT eu, memory +(define_insn_reservation "slm_ishift_mem" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "ishift") + (and (eq_attr "memory" "!none") (eq_attr "prefix_0f" "0")))) + "slm-simple-0") + +;; DF shift (prefixed with 0f) is complex insn with latency of 4 cycles +(define_insn_reservation "slm_ishift_3" 4 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "ishift") + (eq_attr "prefix_0f" "1"))) + "slm-complex, slm-all-eu*3") + +(define_insn_reservation "slm_ishift1" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "ishift1") + (eq_attr "memory" "none"))) + "slm-simple-0") + +(define_insn_reservation "slm_ishift1_mem" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "ishift1") + (eq_attr "memory" "!none"))) + "slm-simple-0") + +(define_insn_reservation "slm_rotate" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "rotate") + (eq_attr "memory" "none"))) + "slm-simple-0") + +(define_insn_reservation "slm_rotate_mem" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "rotate") + (eq_attr "memory" "!none"))) + "slm-simple-0") + +(define_insn_reservation "slm_rotate1" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "rotate1") + (eq_attr "memory" "none"))) + "slm-simple-0") + +(define_insn_reservation "slm_rotate1_mem" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "rotate1") + (eq_attr "memory" "!none"))) + "slm-simple-0") + +(define_insn_reservation "slm_imul" 3 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "imul") + (and (eq_attr "memory" "none") (eq_attr "mode" "SI")))) + "slm-imul-32") + +(define_insn_reservation "slm_imul_mem" 3 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "imul") + (and (eq_attr "memory" "!none") (eq_attr "mode" "SI")))) + "slm-imul-mem-32") + +;; latency set to 4 as common 64x64 imul with 1/2 tput +(define_insn_reservation "slm_imul_3" 4 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "imul") + (eq_attr "mode" "!SI"))) + "slm-imul-64") + +(define_insn_reservation "slm_idiv" 33 + (and (eq_attr "cpu" "slm") + (eq_attr "type" "idiv")) + "slm-complex, slm-all-eu*16, nothing*16") + +(define_insn_reservation "slm_icmp" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "icmp") + (eq_attr "memory" "none"))) + "slm-simple-either") + +(define_insn_reservation "slm_icmp_mem" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "icmp") + (eq_attr "memory" "!none"))) + "slm-simple-either") + +(define_insn_reservation "slm_test" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "test") + (eq_attr "memory" "none"))) + "slm-simple-either") + +(define_insn_reservation "slm_test_mem" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "test") + (eq_attr "memory" "!none"))) + "slm-simple-either") + +(define_insn_reservation "slm_ibr" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "ibr") + (eq_attr "memory" "!load"))) + "slm-simple-1") + +;; complex if jump target is from address +(define_insn_reservation "slm_ibr_2" 2 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "ibr") + (eq_attr "memory" "load"))) + "slm-complex, slm-all-eu") + +(define_insn_reservation "slm_setcc" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "setcc") + (eq_attr "memory" "!store"))) + "slm-simple-either") + +;; 2 cycles complex if target is in memory +(define_insn_reservation "slm_setcc_2" 2 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "setcc") + (eq_attr "memory" "store"))) + "slm-complex, slm-all-eu") + +(define_insn_reservation "slm_icmov" 2 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "icmov") + (eq_attr "memory" "none"))) + "slm-simple-either, nothing") + +(define_insn_reservation "slm_icmov_mem" 2 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "icmov") + (eq_attr "memory" "!none"))) + "slm-simple-0, nothing") + +;; UCODE if segreg, ignored +(define_insn_reservation "slm_push" 2 + (and (eq_attr "cpu" "slm") + (eq_attr "type" "push")) + "slm-dual-2c") + +;; pop r64 is 1 cycle. UCODE if segreg, ignored +(define_insn_reservation "slm_pop" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "pop") + (eq_attr "mode" "DI"))) + "slm-dual-1c") + +;; pop non-r64 is 2 cycles. UCODE if segreg, ignored +(define_insn_reservation "slm_pop_2" 2 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "pop") + (eq_attr "mode" "!DI"))) + "slm-dual-2c") + +;; UCODE if segreg, ignored +(define_insn_reservation "slm_call" 1 + (and (eq_attr "cpu" "slm") + (eq_attr "type" "call")) + "slm-dual-1c") + +(define_insn_reservation "slm_callv" 1 + (and (eq_attr "cpu" "slm") + (eq_attr "type" "callv")) + "slm-dual-1c") + +(define_insn_reservation "slm_leave" 3 + (and (eq_attr "cpu" "slm") + (eq_attr "type" "leave")) + "slm-complex, slm-all-eu*2") + +(define_insn_reservation "slm_str" 3 + (and (eq_attr "cpu" "slm") + (eq_attr "type" "str")) + "slm-complex, slm-all-eu*2") + +(define_insn_reservation "slm_sselog" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "sselog") + (eq_attr "memory" "none"))) + "slm-simple-either") + +(define_insn_reservation "slm_sselog_mem" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "sselog") + (eq_attr "memory" "!none"))) + "slm-simple-either") + +(define_insn_reservation "slm_sselog1" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "sselog1") + (eq_attr "memory" "none"))) + "slm-simple-0") + +(define_insn_reservation "slm_sselog1_mem" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "sselog1") + (eq_attr "memory" "!none"))) + "slm-simple-0") + +;; not pmad, not psad +(define_insn_reservation "slm_sseiadd" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "sseiadd") + (and (not (match_operand:V2DI 0 "register_operand")) + (and (eq_attr "atom_unit" "!simul") + (eq_attr "atom_unit" "!complex"))))) + "slm-simple-either") + +;; pmad, psad and 64 +(define_insn_reservation "slm_sseiadd_2" 4 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "sseiadd") + (and (not (match_operand:V2DI 0 "register_operand")) + (and (eq_attr "atom_unit" "simul" ) + (eq_attr "mode" "DI"))))) + "slm-fmul-4c") + +;; pmad, psad and 128 +(define_insn_reservation "slm_sseiadd_3" 5 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "sseiadd") + (and (not (match_operand:V2DI 0 "register_operand")) + (and (eq_attr "atom_unit" "simul" ) + (eq_attr "mode" "TI"))))) + "slm-fmul-5c") + +;; if paddq(64 bit op), phadd/phsub +(define_insn_reservation "slm_sseiadd_4" 4 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "sseiadd") + (ior (match_operand:V2DI 0 "register_operand") + (eq_attr "atom_unit" "complex")))) + "slm-fadd-4c") + +;; if immediate op. +(define_insn_reservation "slm_sseishft" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "sseishft") + (and (eq_attr "atom_unit" "!sishuf") + (match_operand 2 "immediate_operand")))) + "slm-simple-either") + +;; if palignr or psrldq +(define_insn_reservation "slm_sseishft_2" 1 + (and (eq_attr "cpu" "slm") + (ior (eq_attr "type" "sseishft1") + (and (eq_attr "type" "sseishft") + (and (eq_attr "atom_unit" "sishuf") + (match_operand 2 "immediate_operand"))))) + "slm-simple-0") + +;; if reg/mem op +(define_insn_reservation "slm_sseishft_3" 2 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "sseishft") + (not (match_operand 2 "immediate_operand")))) + "slm-complex, slm-all-eu") + +(define_insn_reservation "slm_sseimul" 5 + (and (eq_attr "cpu" "slm") + (eq_attr "type" "sseimul")) + "slm-fmul-5c") + +;; rcpss or rsqrtss +(define_insn_reservation "slm_sse" 4 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "sse") + (and (eq_attr "atom_sse_attr" "rcp") (eq_attr "mode" "SF")))) + "slm-fmul-4c") + +;; movshdup, movsldup. Suggest to type sseishft +(define_insn_reservation "slm_sse_2" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "sse") + (eq_attr "atom_sse_attr" "movdup"))) + "slm-simple-0") + +;; lfence +(define_insn_reservation "slm_sse_3" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "sse") + (eq_attr "atom_sse_attr" "lfence"))) + "slm-simple-either") + +;; sfence,clflush,mfence, prefetch +(define_insn_reservation "slm_sse_4" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "sse") + (ior (eq_attr "atom_sse_attr" "fence") + (eq_attr "atom_sse_attr" "prefetch")))) + "slm-simple-0") + +;; rcpps, rsqrtss, sqrt, ldmxcsr +(define_insn_reservation "slm_sse_5" 9 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "sse") + (ior (ior (eq_attr "atom_sse_attr" "sqrt") + (eq_attr "atom_sse_attr" "mxcsr")) + (and (eq_attr "atom_sse_attr" "rcp") + (eq_attr "mode" "V4SF"))))) + "slm-complex, slm-all-eu*7, nothing") + +;; xmm->xmm +(define_insn_reservation "slm_ssemov" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "ssemov") + (and (match_operand 0 "register_operand" "xy") + (match_operand 1 "register_operand" "xy")))) + "slm-simple-either") + +;; reg->xmm +(define_insn_reservation "slm_ssemov_2" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "ssemov") + (and (match_operand 0 "register_operand" "xy") + (match_operand 1 "register_operand" "r")))) + "slm-simple-0") + +;; xmm->reg +(define_insn_reservation "slm_ssemov_3" 3 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "ssemov") + (and (match_operand 0 "register_operand" "r") + (match_operand 1 "register_operand" "xy")))) + "slm-simple-0, nothing*2") + +;; mov mem +(define_insn_reservation "slm_ssemov_4" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "ssemov") + (and (eq_attr "movu" "0") (eq_attr "memory" "!none")))) + "slm-simple-0") + +;; movu mem +(define_insn_reservation "slm_ssemov_5" 2 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "ssemov") + (ior (eq_attr "movu" "1") (eq_attr "memory" "!none")))) + "slm-simple-0, nothing") + +;; no memory simple +(define_insn_reservation "slm_sseadd" 3 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "sseadd") + (and (eq_attr "memory" "none") + (and (eq_attr "mode" "!V2DF") + (eq_attr "atom_unit" "!complex"))))) + "slm-fadd-3c") + +;; memory simple +(define_insn_reservation "slm_sseadd_mem" 3 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "sseadd") + (and (eq_attr "memory" "!none") + (and (eq_attr "mode" "!V2DF") + (eq_attr "atom_unit" "!complex"))))) + "slm-fadd-3c") + +;; maxps, minps, *pd, hadd, hsub +(define_insn_reservation "slm_sseadd_3" 4 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "sseadd") + (ior (eq_attr "mode" "V2DF") (eq_attr "atom_unit" "complex")))) + "slm-fadd-4c") + +;; Except dppd/dpps +(define_insn_reservation "slm_ssemul" 5 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "ssemul") + (eq_attr "mode" "!SF"))) + "slm-fmul-5c") + +;; Except dppd/dpps, 4 cycle if mulss +(define_insn_reservation "slm_ssemul_2" 4 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "ssemul") + (eq_attr "mode" "SF"))) + "slm-fmul-4c") + +(define_insn_reservation "slm_ssecmp" 1 + (and (eq_attr "cpu" "slm") + (eq_attr "type" "ssecmp")) + "slm-simple-either") + +(define_insn_reservation "slm_ssecomi" 1 + (and (eq_attr "cpu" "slm") + (eq_attr "type" "ssecomi")) + "slm-simple-0") + +;; no memory and cvtpi2ps, cvtps2pi, cvttps2pi +(define_insn_reservation "slm_ssecvt" 5 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "ssecvt") + (ior (and (match_operand:V2SI 0 "register_operand") + (match_operand:V4SF 1 "register_operand")) + (and (match_operand:V4SF 0 "register_operand") + (match_operand:V2SI 1 "register_operand"))))) + "slm-fp-0, slm-feu-0, nothing*3") + +;; memory and cvtpi2ps, cvtps2pi, cvttps2pi +(define_insn_reservation "slm_ssecvt_mem" 5 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "ssecvt") + (ior (and (match_operand:V2SI 0 "register_operand") + (match_operand:V4SF 1 "memory_operand")) + (and (match_operand:V4SF 0 "register_operand") + (match_operand:V2SI 1 "memory_operand"))))) +"slm-fp-0, slm-feu-0, nothing*3") + +;; cvtpd2pi, cvtpi2pd +(define_insn_reservation "slm_ssecvt_1" 2 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "ssecvt") + (ior (and (match_operand:V2DF 0 "register_operand") + (match_operand:V2SI 1 "register_operand")) + (and (match_operand:V2SI 0 "register_operand") + (match_operand:V2DF 1 "register_operand"))))) + "slm-fp-0, slm-feu-0") + +;; memory and cvtpd2pi, cvtpi2pd +(define_insn_reservation "slm_ssecvt_1_mem" 2 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "ssecvt") + (ior (and (match_operand:V2DF 0 "register_operand") + (match_operand:V2SI 1 "memory_operand")) + (and (match_operand:V2SI 0 "register_operand") + (match_operand:V2DF 1 "memory_operand"))))) + "slm-fp-0, slm-feu-0") + +;; otherwise. 4 cycles average for cvtss2sd +(define_insn_reservation "slm_ssecvt_3" 4 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "ssecvt") + (not (ior (and (match_operand:V2SI 0 "register_operand") + (match_operand:V4SF 1 "nonimmediate_operand")) + (and (match_operand:V4SF 0 "register_operand") + (match_operand:V2SI 1 "nonimmediate_operand")))))) + "slm-fp-0, nothing*3") + +;; memory and cvtsi2sd +(define_insn_reservation "slm_sseicvt" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "sseicvt") + (and (match_operand:V2DF 0 "register_operand") + (match_operand:SI 1 "nonimmediate_operand")))) + "slm-fp-0") + +;; otherwise. 8 cycles average for cvtsd2si +(define_insn_reservation "slm_sseicvt_2" 4 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "sseicvt") + (not (and (match_operand:V2DF 0 "register_operand") + (match_operand:SI 1 "memory_operand"))))) + "slm-fp-0, nothing*3") + +(define_insn_reservation "slm_ssediv" 13 + (and (eq_attr "cpu" "slm") + (eq_attr "type" "ssediv")) + "slm-fp-0, slm-feu-0*10, nothing*2") + +;; simple for fmov +(define_insn_reservation "slm_fmov" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "fmov") + (eq_attr "memory" "none"))) + "slm-simple-either") + +;; simple for fmov +(define_insn_reservation "slm_fmov_mem" 1 + (and (eq_attr "cpu" "slm") + (and (eq_attr "type" "fmov") + (eq_attr "memory" "!none"))) + "slm-simple-either") + +;; Define bypass here + +;; There will be 0 cycle stall from cmp/test to jcc + +;; There will be 1 cycle stall from flag producer to cmov and adc/sbb +(define_bypass 2 "slm_icmp, slm_test, slm_alu, slm_alu_carry, + slm_alu1, slm_negnot, slm_incdec, slm_ishift, + slm_ishift1, slm_rotate, slm_rotate1" + "slm_icmov, slm_alu_carry") + +;; lea to shift source stall is 1 cycle +(define_bypass 2 "slm_lea" + "slm_ishift, slm_ishift1, slm_rotate, slm_rotate1" + "!ix86_dep_by_shift_count") + +;; non-lea to shift count stall is 1 cycle +(define_bypass 2 "slm_alu_carry, + slm_alu,slm_alu1,slm_negnot,slm_imov,slm_imovx, + slm_incdec,slm_ishift,slm_ishift1,slm_rotate, + slm_rotate1, slm_setcc, slm_icmov, slm_pop, + slm_alu_mem, slm_alu_carry_mem, slm_alu1_mem, + slm_imovx_mem, slm_imovx_2_mem, + slm_imov_mem, slm_icmov_mem, slm_fmov_mem" + "slm_ishift, slm_ishift1, slm_rotate, slm_rotate1, + slm_ishift_mem, slm_ishift1_mem, + slm_rotate_mem, slm_rotate1_mem" + "ix86_dep_by_shift_count") diff --git a/gcc-4.8/libgcc/config/i386/cpuinfo.c b/gcc-4.8/libgcc/config/i386/cpuinfo.c index fa1545a6a..f32ec17aa 100644 --- a/gcc-4.8/libgcc/config/i386/cpuinfo.c +++ b/gcc-4.8/libgcc/config/i386/cpuinfo.c @@ -55,6 +55,7 @@ enum processor_vendor enum processor_types { INTEL_ATOM = 1, + INTEL_SLM, INTEL_CORE2, INTEL_COREI7, AMDFAM10H, -- cgit v1.2.3