aboutsummaryrefslogtreecommitdiffstats
path: root/gcc-4.2.1-5666.3/gcc/config/i386/i386.c
diff options
context:
space:
mode:
Diffstat (limited to 'gcc-4.2.1-5666.3/gcc/config/i386/i386.c')
-rw-r--r--gcc-4.2.1-5666.3/gcc/config/i386/i386.c23515
1 files changed, 23515 insertions, 0 deletions
diff --git a/gcc-4.2.1-5666.3/gcc/config/i386/i386.c b/gcc-4.2.1-5666.3/gcc/config/i386/i386.c
new file mode 100644
index 000000000..0e212967a
--- /dev/null
+++ b/gcc-4.2.1-5666.3/gcc/config/i386/i386.c
@@ -0,0 +1,23515 @@
+/* Subroutines used for code generation on IA-32.
+ Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
+ 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING. If not, write to
+the Free Software Foundation, 51 Franklin Street, Fifth Floor,
+Boston, MA 02110-1301, USA. */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "tree.h"
+#include "tm_p.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "real.h"
+#include "insn-config.h"
+#include "conditions.h"
+#include "output.h"
+#include "insn-codes.h"
+#include "insn-attr.h"
+#include "flags.h"
+#include "except.h"
+#include "function.h"
+#include "recog.h"
+#include "expr.h"
+#include "optabs.h"
+#include "toplev.h"
+#include "basic-block.h"
+#include "ggc.h"
+#include "target.h"
+#include "target-def.h"
+#include "langhooks.h"
+#include "cgraph.h"
+#include "tree-gimple.h"
+#include "dwarf2.h"
+#include "tm-constrs.h"
+
+/* APPLE LOCAL begin pascal strings */
+#include "../../libcpp/internal.h"
+extern struct cpp_reader* parse_in;
+/* APPLE LOCAL end pascal strings */
+/* APPLE LOCAL begin regparmandstackparm */
+#include "integrate.h"
+#include "tree-inline.h"
+#include "splay-tree.h"
+#include "tree-pass.h"
+#include "c-tree.h"
+#include "c-common.h"
+/* APPLE LOCAL end regparmandstackparm */
+/* APPLE LOCAL begin dwarf call/pop 5221468 */
+#include "debug.h"
+#include "dwarf2out.h"
+/* APPLE LOCAL end dwarf call/pop 5221468 */
+
+#ifndef CHECK_STACK_LIMIT
+#define CHECK_STACK_LIMIT (-1)
+#endif
+
+/* Return index of given mode in mult and division cost tables. */
+#define MODE_INDEX(mode) \
+ ((mode) == QImode ? 0 \
+ : (mode) == HImode ? 1 \
+ : (mode) == SImode ? 2 \
+ : (mode) == DImode ? 3 \
+ : 4)
+
+/* Processor costs (relative to an add) */
+/* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
+#define COSTS_N_BYTES(N) ((N) * 2)
+
+static const
+struct processor_costs size_cost = { /* costs for tuning for size */
+ COSTS_N_BYTES (2), /* cost of an add instruction */
+ COSTS_N_BYTES (3), /* cost of a lea instruction */
+ COSTS_N_BYTES (2), /* variable shift costs */
+ COSTS_N_BYTES (3), /* constant shift costs */
+ {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
+ COSTS_N_BYTES (3), /* HI */
+ COSTS_N_BYTES (3), /* SI */
+ COSTS_N_BYTES (3), /* DI */
+ COSTS_N_BYTES (5)}, /* other */
+ 0, /* cost of multiply per each bit set */
+ {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
+ COSTS_N_BYTES (3), /* HI */
+ COSTS_N_BYTES (3), /* SI */
+ COSTS_N_BYTES (3), /* DI */
+ COSTS_N_BYTES (5)}, /* other */
+ COSTS_N_BYTES (3), /* cost of movsx */
+ COSTS_N_BYTES (3), /* cost of movzx */
+ 0, /* "large" insn */
+ 2, /* MOVE_RATIO */
+ 2, /* cost for loading QImode using movzbl */
+ {2, 2, 2}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {2, 2, 2}, /* cost of storing integer registers */
+ 2, /* cost of reg,reg fld/fst */
+ {2, 2, 2}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {2, 2, 2}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+ 3, /* cost of moving MMX register */
+ {3, 3}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {3, 3}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 3, /* cost of moving SSE register */
+ {3, 3, 3}, /* cost of loading SSE registers
+ in SImode, DImode and TImode */
+ {3, 3, 3}, /* cost of storing SSE registers
+ in SImode, DImode and TImode */
+ 3, /* MMX or SSE register to integer */
+ 0, /* size of prefetch block */
+ 0, /* number of parallel prefetches */
+ 2, /* Branch cost */
+ COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
+ COSTS_N_BYTES (2), /* cost of FMUL instruction. */
+ COSTS_N_BYTES (2), /* cost of FDIV instruction. */
+ COSTS_N_BYTES (2), /* cost of FABS instruction. */
+ COSTS_N_BYTES (2), /* cost of FCHS instruction. */
+ COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
+};
+
+/* Processor costs (relative to an add) */
+static const
+struct processor_costs i386_cost = { /* 386 specific costs */
+ COSTS_N_INSNS (1), /* cost of an add instruction */
+ COSTS_N_INSNS (1), /* cost of a lea instruction */
+ COSTS_N_INSNS (3), /* variable shift costs */
+ COSTS_N_INSNS (2), /* constant shift costs */
+ {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
+ COSTS_N_INSNS (6), /* HI */
+ COSTS_N_INSNS (6), /* SI */
+ COSTS_N_INSNS (6), /* DI */
+ COSTS_N_INSNS (6)}, /* other */
+ COSTS_N_INSNS (1), /* cost of multiply per each bit set */
+ {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
+ COSTS_N_INSNS (23), /* HI */
+ COSTS_N_INSNS (23), /* SI */
+ COSTS_N_INSNS (23), /* DI */
+ COSTS_N_INSNS (23)}, /* other */
+ COSTS_N_INSNS (3), /* cost of movsx */
+ COSTS_N_INSNS (2), /* cost of movzx */
+ 15, /* "large" insn */
+ 3, /* MOVE_RATIO */
+ 4, /* cost for loading QImode using movzbl */
+ {2, 4, 2}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {2, 4, 2}, /* cost of storing integer registers */
+ 2, /* cost of reg,reg fld/fst */
+ {8, 8, 8}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {8, 8, 8}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+ 2, /* cost of moving MMX register */
+ {4, 8}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {4, 8}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 2, /* cost of moving SSE register */
+ {4, 8, 16}, /* cost of loading SSE registers
+ in SImode, DImode and TImode */
+ {4, 8, 16}, /* cost of storing SSE registers
+ in SImode, DImode and TImode */
+ 3, /* MMX or SSE register to integer */
+ 0, /* size of prefetch block */
+ 0, /* number of parallel prefetches */
+ 1, /* Branch cost */
+ COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
+ COSTS_N_INSNS (27), /* cost of FMUL instruction. */
+ COSTS_N_INSNS (88), /* cost of FDIV instruction. */
+ COSTS_N_INSNS (22), /* cost of FABS instruction. */
+ COSTS_N_INSNS (24), /* cost of FCHS instruction. */
+ COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
+};
+
+static const
+struct processor_costs i486_cost = { /* 486 specific costs */
+ COSTS_N_INSNS (1), /* cost of an add instruction */
+ COSTS_N_INSNS (1), /* cost of a lea instruction */
+ COSTS_N_INSNS (3), /* variable shift costs */
+ COSTS_N_INSNS (2), /* constant shift costs */
+ {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
+ COSTS_N_INSNS (12), /* HI */
+ COSTS_N_INSNS (12), /* SI */
+ COSTS_N_INSNS (12), /* DI */
+ COSTS_N_INSNS (12)}, /* other */
+ 1, /* cost of multiply per each bit set */
+ {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
+ COSTS_N_INSNS (40), /* HI */
+ COSTS_N_INSNS (40), /* SI */
+ COSTS_N_INSNS (40), /* DI */
+ COSTS_N_INSNS (40)}, /* other */
+ COSTS_N_INSNS (3), /* cost of movsx */
+ COSTS_N_INSNS (2), /* cost of movzx */
+ 15, /* "large" insn */
+ 3, /* MOVE_RATIO */
+ 4, /* cost for loading QImode using movzbl */
+ {2, 4, 2}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {2, 4, 2}, /* cost of storing integer registers */
+ 2, /* cost of reg,reg fld/fst */
+ {8, 8, 8}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {8, 8, 8}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+ 2, /* cost of moving MMX register */
+ {4, 8}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {4, 8}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 2, /* cost of moving SSE register */
+ {4, 8, 16}, /* cost of loading SSE registers
+ in SImode, DImode and TImode */
+ {4, 8, 16}, /* cost of storing SSE registers
+ in SImode, DImode and TImode */
+ 3, /* MMX or SSE register to integer */
+ 0, /* size of prefetch block */
+ 0, /* number of parallel prefetches */
+ 1, /* Branch cost */
+ COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
+ COSTS_N_INSNS (16), /* cost of FMUL instruction. */
+ COSTS_N_INSNS (73), /* cost of FDIV instruction. */
+ COSTS_N_INSNS (3), /* cost of FABS instruction. */
+ COSTS_N_INSNS (3), /* cost of FCHS instruction. */
+ COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
+};
+
+static const
+struct processor_costs pentium_cost = {
+ COSTS_N_INSNS (1), /* cost of an add instruction */
+ COSTS_N_INSNS (1), /* cost of a lea instruction */
+ COSTS_N_INSNS (4), /* variable shift costs */
+ COSTS_N_INSNS (1), /* constant shift costs */
+ {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
+ COSTS_N_INSNS (11), /* HI */
+ COSTS_N_INSNS (11), /* SI */
+ COSTS_N_INSNS (11), /* DI */
+ COSTS_N_INSNS (11)}, /* other */
+ 0, /* cost of multiply per each bit set */
+ {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
+ COSTS_N_INSNS (25), /* HI */
+ COSTS_N_INSNS (25), /* SI */
+ COSTS_N_INSNS (25), /* DI */
+ COSTS_N_INSNS (25)}, /* other */
+ COSTS_N_INSNS (3), /* cost of movsx */
+ COSTS_N_INSNS (2), /* cost of movzx */
+ 8, /* "large" insn */
+ 6, /* MOVE_RATIO */
+ 6, /* cost for loading QImode using movzbl */
+ {2, 4, 2}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {2, 4, 2}, /* cost of storing integer registers */
+ 2, /* cost of reg,reg fld/fst */
+ {2, 2, 6}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {4, 4, 6}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+ 8, /* cost of moving MMX register */
+ {8, 8}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {8, 8}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 2, /* cost of moving SSE register */
+ {4, 8, 16}, /* cost of loading SSE registers
+ in SImode, DImode and TImode */
+ {4, 8, 16}, /* cost of storing SSE registers
+ in SImode, DImode and TImode */
+ 3, /* MMX or SSE register to integer */
+ 0, /* size of prefetch block */
+ 0, /* number of parallel prefetches */
+ 2, /* Branch cost */
+ COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
+ COSTS_N_INSNS (3), /* cost of FMUL instruction. */
+ COSTS_N_INSNS (39), /* cost of FDIV instruction. */
+ COSTS_N_INSNS (1), /* cost of FABS instruction. */
+ COSTS_N_INSNS (1), /* cost of FCHS instruction. */
+ COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
+};
+
+static const
+struct processor_costs pentiumpro_cost = {
+ COSTS_N_INSNS (1), /* cost of an add instruction */
+ COSTS_N_INSNS (1), /* cost of a lea instruction */
+ COSTS_N_INSNS (1), /* variable shift costs */
+ COSTS_N_INSNS (1), /* constant shift costs */
+ {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
+ COSTS_N_INSNS (4), /* HI */
+ COSTS_N_INSNS (4), /* SI */
+ COSTS_N_INSNS (4), /* DI */
+ COSTS_N_INSNS (4)}, /* other */
+ 0, /* cost of multiply per each bit set */
+ {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
+ COSTS_N_INSNS (17), /* HI */
+ COSTS_N_INSNS (17), /* SI */
+ COSTS_N_INSNS (17), /* DI */
+ COSTS_N_INSNS (17)}, /* other */
+ COSTS_N_INSNS (1), /* cost of movsx */
+ COSTS_N_INSNS (1), /* cost of movzx */
+ 8, /* "large" insn */
+ 6, /* MOVE_RATIO */
+ 2, /* cost for loading QImode using movzbl */
+ {4, 4, 4}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {2, 2, 2}, /* cost of storing integer registers */
+ 2, /* cost of reg,reg fld/fst */
+ {2, 2, 6}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {4, 4, 6}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+ 2, /* cost of moving MMX register */
+ {2, 2}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {2, 2}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 2, /* cost of moving SSE register */
+ {2, 2, 8}, /* cost of loading SSE registers
+ in SImode, DImode and TImode */
+ {2, 2, 8}, /* cost of storing SSE registers
+ in SImode, DImode and TImode */
+ 3, /* MMX or SSE register to integer */
+ 32, /* size of prefetch block */
+ 6, /* number of parallel prefetches */
+ 2, /* Branch cost */
+ COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
+ COSTS_N_INSNS (5), /* cost of FMUL instruction. */
+ COSTS_N_INSNS (56), /* cost of FDIV instruction. */
+ COSTS_N_INSNS (2), /* cost of FABS instruction. */
+ COSTS_N_INSNS (2), /* cost of FCHS instruction. */
+ COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
+};
+
+static const
+struct processor_costs k6_cost = {
+ COSTS_N_INSNS (1), /* cost of an add instruction */
+ COSTS_N_INSNS (2), /* cost of a lea instruction */
+ COSTS_N_INSNS (1), /* variable shift costs */
+ COSTS_N_INSNS (1), /* constant shift costs */
+ {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
+ COSTS_N_INSNS (3), /* HI */
+ COSTS_N_INSNS (3), /* SI */
+ COSTS_N_INSNS (3), /* DI */
+ COSTS_N_INSNS (3)}, /* other */
+ 0, /* cost of multiply per each bit set */
+ {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
+ COSTS_N_INSNS (18), /* HI */
+ COSTS_N_INSNS (18), /* SI */
+ COSTS_N_INSNS (18), /* DI */
+ COSTS_N_INSNS (18)}, /* other */
+ COSTS_N_INSNS (2), /* cost of movsx */
+ COSTS_N_INSNS (2), /* cost of movzx */
+ 8, /* "large" insn */
+ 4, /* MOVE_RATIO */
+ 3, /* cost for loading QImode using movzbl */
+ {4, 5, 4}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {2, 3, 2}, /* cost of storing integer registers */
+ 4, /* cost of reg,reg fld/fst */
+ {6, 6, 6}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {4, 4, 4}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+ 2, /* cost of moving MMX register */
+ {2, 2}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {2, 2}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 2, /* cost of moving SSE register */
+ {2, 2, 8}, /* cost of loading SSE registers
+ in SImode, DImode and TImode */
+ {2, 2, 8}, /* cost of storing SSE registers
+ in SImode, DImode and TImode */
+ 6, /* MMX or SSE register to integer */
+ 32, /* size of prefetch block */
+ 1, /* number of parallel prefetches */
+ 1, /* Branch cost */
+ COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
+ COSTS_N_INSNS (2), /* cost of FMUL instruction. */
+ COSTS_N_INSNS (56), /* cost of FDIV instruction. */
+ COSTS_N_INSNS (2), /* cost of FABS instruction. */
+ COSTS_N_INSNS (2), /* cost of FCHS instruction. */
+ COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
+};
+
+static const
+struct processor_costs athlon_cost = {
+ COSTS_N_INSNS (1), /* cost of an add instruction */
+ COSTS_N_INSNS (2), /* cost of a lea instruction */
+ COSTS_N_INSNS (1), /* variable shift costs */
+ COSTS_N_INSNS (1), /* constant shift costs */
+ {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
+ COSTS_N_INSNS (5), /* HI */
+ COSTS_N_INSNS (5), /* SI */
+ COSTS_N_INSNS (5), /* DI */
+ COSTS_N_INSNS (5)}, /* other */
+ 0, /* cost of multiply per each bit set */
+ {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
+ COSTS_N_INSNS (26), /* HI */
+ COSTS_N_INSNS (42), /* SI */
+ COSTS_N_INSNS (74), /* DI */
+ COSTS_N_INSNS (74)}, /* other */
+ COSTS_N_INSNS (1), /* cost of movsx */
+ COSTS_N_INSNS (1), /* cost of movzx */
+ 8, /* "large" insn */
+ 9, /* MOVE_RATIO */
+ 4, /* cost for loading QImode using movzbl */
+ {3, 4, 3}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {3, 4, 3}, /* cost of storing integer registers */
+ 4, /* cost of reg,reg fld/fst */
+ {4, 4, 12}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {6, 6, 8}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+ 2, /* cost of moving MMX register */
+ {4, 4}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {4, 4}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 2, /* cost of moving SSE register */
+ {4, 4, 6}, /* cost of loading SSE registers
+ in SImode, DImode and TImode */
+ {4, 4, 5}, /* cost of storing SSE registers
+ in SImode, DImode and TImode */
+ 5, /* MMX or SSE register to integer */
+ 64, /* size of prefetch block */
+ 6, /* number of parallel prefetches */
+ 5, /* Branch cost */
+ COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
+ COSTS_N_INSNS (4), /* cost of FMUL instruction. */
+ COSTS_N_INSNS (24), /* cost of FDIV instruction. */
+ COSTS_N_INSNS (2), /* cost of FABS instruction. */
+ COSTS_N_INSNS (2), /* cost of FCHS instruction. */
+ COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
+};
+
+static const
+struct processor_costs k8_cost = {
+ COSTS_N_INSNS (1), /* cost of an add instruction */
+ COSTS_N_INSNS (2), /* cost of a lea instruction */
+ COSTS_N_INSNS (1), /* variable shift costs */
+ COSTS_N_INSNS (1), /* constant shift costs */
+ {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
+ COSTS_N_INSNS (4), /* HI */
+ COSTS_N_INSNS (3), /* SI */
+ COSTS_N_INSNS (4), /* DI */
+ COSTS_N_INSNS (5)}, /* other */
+ 0, /* cost of multiply per each bit set */
+ {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
+ COSTS_N_INSNS (26), /* HI */
+ COSTS_N_INSNS (42), /* SI */
+ COSTS_N_INSNS (74), /* DI */
+ COSTS_N_INSNS (74)}, /* other */
+ COSTS_N_INSNS (1), /* cost of movsx */
+ COSTS_N_INSNS (1), /* cost of movzx */
+ 8, /* "large" insn */
+ 9, /* MOVE_RATIO */
+ 4, /* cost for loading QImode using movzbl */
+ {3, 4, 3}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {3, 4, 3}, /* cost of storing integer registers */
+ 4, /* cost of reg,reg fld/fst */
+ {4, 4, 12}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {6, 6, 8}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+ 2, /* cost of moving MMX register */
+ {3, 3}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {4, 4}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 2, /* cost of moving SSE register */
+ {4, 3, 6}, /* cost of loading SSE registers
+ in SImode, DImode and TImode */
+ {4, 4, 5}, /* cost of storing SSE registers
+ in SImode, DImode and TImode */
+ 5, /* MMX or SSE register to integer */
+ 64, /* size of prefetch block */
+ 6, /* number of parallel prefetches */
+ 5, /* Branch cost */
+ COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
+ COSTS_N_INSNS (4), /* cost of FMUL instruction. */
+ COSTS_N_INSNS (19), /* cost of FDIV instruction. */
+ COSTS_N_INSNS (2), /* cost of FABS instruction. */
+ COSTS_N_INSNS (2), /* cost of FCHS instruction. */
+ COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
+};
+
+static const
+struct processor_costs pentium4_cost = {
+ COSTS_N_INSNS (1), /* cost of an add instruction */
+ COSTS_N_INSNS (3), /* cost of a lea instruction */
+ COSTS_N_INSNS (4), /* variable shift costs */
+ COSTS_N_INSNS (4), /* constant shift costs */
+ {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
+ COSTS_N_INSNS (15), /* HI */
+ COSTS_N_INSNS (15), /* SI */
+ COSTS_N_INSNS (15), /* DI */
+ COSTS_N_INSNS (15)}, /* other */
+ 0, /* cost of multiply per each bit set */
+ {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
+ COSTS_N_INSNS (56), /* HI */
+ COSTS_N_INSNS (56), /* SI */
+ COSTS_N_INSNS (56), /* DI */
+ COSTS_N_INSNS (56)}, /* other */
+ COSTS_N_INSNS (1), /* cost of movsx */
+ COSTS_N_INSNS (1), /* cost of movzx */
+ 16, /* "large" insn */
+ 6, /* MOVE_RATIO */
+ 2, /* cost for loading QImode using movzbl */
+ {4, 5, 4}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {2, 3, 2}, /* cost of storing integer registers */
+ 2, /* cost of reg,reg fld/fst */
+ {2, 2, 6}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {4, 4, 6}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+ 2, /* cost of moving MMX register */
+ {2, 2}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {2, 2}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 12, /* cost of moving SSE register */
+ {12, 12, 12}, /* cost of loading SSE registers
+ in SImode, DImode and TImode */
+ {2, 2, 8}, /* cost of storing SSE registers
+ in SImode, DImode and TImode */
+ 10, /* MMX or SSE register to integer */
+ 64, /* size of prefetch block */
+ 6, /* number of parallel prefetches */
+ 2, /* Branch cost */
+ COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
+ COSTS_N_INSNS (7), /* cost of FMUL instruction. */
+ COSTS_N_INSNS (43), /* cost of FDIV instruction. */
+ COSTS_N_INSNS (2), /* cost of FABS instruction. */
+ COSTS_N_INSNS (2), /* cost of FCHS instruction. */
+ COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
+};
+
+static const
+struct processor_costs nocona_cost = {
+ COSTS_N_INSNS (1), /* cost of an add instruction */
+ COSTS_N_INSNS (1), /* cost of a lea instruction */
+ COSTS_N_INSNS (1), /* variable shift costs */
+ COSTS_N_INSNS (1), /* constant shift costs */
+ {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
+ COSTS_N_INSNS (10), /* HI */
+ COSTS_N_INSNS (10), /* SI */
+ COSTS_N_INSNS (10), /* DI */
+ COSTS_N_INSNS (10)}, /* other */
+ 0, /* cost of multiply per each bit set */
+ {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
+ COSTS_N_INSNS (66), /* HI */
+ COSTS_N_INSNS (66), /* SI */
+ COSTS_N_INSNS (66), /* DI */
+ COSTS_N_INSNS (66)}, /* other */
+ COSTS_N_INSNS (1), /* cost of movsx */
+ COSTS_N_INSNS (1), /* cost of movzx */
+ 16, /* "large" insn */
+ 17, /* MOVE_RATIO */
+ 4, /* cost for loading QImode using movzbl */
+ {4, 4, 4}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {4, 4, 4}, /* cost of storing integer registers */
+ 3, /* cost of reg,reg fld/fst */
+ {12, 12, 12}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {4, 4, 4}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+ 6, /* cost of moving MMX register */
+ {12, 12}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {12, 12}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 6, /* cost of moving SSE register */
+ {12, 12, 12}, /* cost of loading SSE registers
+ in SImode, DImode and TImode */
+ {12, 12, 12}, /* cost of storing SSE registers
+ in SImode, DImode and TImode */
+ 8, /* MMX or SSE register to integer */
+ 128, /* size of prefetch block */
+ 8, /* number of parallel prefetches */
+ 1, /* Branch cost */
+ COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
+ COSTS_N_INSNS (8), /* cost of FMUL instruction. */
+ COSTS_N_INSNS (40), /* cost of FDIV instruction. */
+ COSTS_N_INSNS (3), /* cost of FABS instruction. */
+ COSTS_N_INSNS (3), /* cost of FCHS instruction. */
+ COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
+};
+/* APPLE LOCAL begin mainline */
+static const
+struct processor_costs core2_cost = {
+ COSTS_N_INSNS (1), /* cost of an add instruction */
+ COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
+ COSTS_N_INSNS (1), /* variable shift costs */
+ COSTS_N_INSNS (1), /* constant shift costs */
+ {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
+ COSTS_N_INSNS (3), /* HI */
+ COSTS_N_INSNS (3), /* SI */
+ COSTS_N_INSNS (3), /* DI */
+ COSTS_N_INSNS (3)}, /* other */
+ 0, /* cost of multiply per each bit set */
+ {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
+ COSTS_N_INSNS (22), /* HI */
+ COSTS_N_INSNS (22), /* SI */
+ COSTS_N_INSNS (22), /* DI */
+ COSTS_N_INSNS (22)}, /* other */
+ COSTS_N_INSNS (1), /* cost of movsx */
+ COSTS_N_INSNS (1), /* cost of movzx */
+ 8, /* "large" insn */
+ 16, /* MOVE_RATIO */
+ 2, /* cost for loading QImode using movzbl */
+ {6, 6, 6}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {4, 4, 4}, /* cost of storing integer registers */
+ 2, /* cost of reg,reg fld/fst */
+ {6, 6, 6}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {4, 4, 4}, /* cost of loading integer registers */
+ 2, /* cost of moving MMX register */
+ {6, 6}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {4, 4}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 2, /* cost of moving SSE register */
+ {6, 6, 6}, /* cost of loading SSE registers
+ in SImode, DImode and TImode */
+ {4, 4, 4}, /* cost of storing SSE registers
+ in SImode, DImode and TImode */
+ 2, /* MMX or SSE register to integer */
+ 128, /* size of prefetch block */
+ 8, /* number of parallel prefetches */
+ 3, /* Branch cost */
+ COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
+ COSTS_N_INSNS (5), /* cost of FMUL instruction. */
+ COSTS_N_INSNS (32), /* cost of FDIV instruction. */
+ COSTS_N_INSNS (1), /* cost of FABS instruction. */
+ COSTS_N_INSNS (1), /* cost of FCHS instruction. */
+ COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
+};
+/* APPLE LOCAL end mainline */
+/* Generic64 should produce code tuned for Nocona and K8. */
+static const
+struct processor_costs generic64_cost = {
+ COSTS_N_INSNS (1), /* cost of an add instruction */
+ /* On all chips taken into consideration lea is 2 cycles and more. With
+ this cost however our current implementation of synth_mult results in
+ use of unnecessary temporary registers causing regression on several
+ SPECfp benchmarks. */
+ COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
+ COSTS_N_INSNS (1), /* variable shift costs */
+ COSTS_N_INSNS (1), /* constant shift costs */
+ {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
+ COSTS_N_INSNS (4), /* HI */
+ COSTS_N_INSNS (3), /* SI */
+ COSTS_N_INSNS (4), /* DI */
+ COSTS_N_INSNS (2)}, /* other */
+ 0, /* cost of multiply per each bit set */
+ {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
+ COSTS_N_INSNS (26), /* HI */
+ COSTS_N_INSNS (42), /* SI */
+ COSTS_N_INSNS (74), /* DI */
+ COSTS_N_INSNS (74)}, /* other */
+ COSTS_N_INSNS (1), /* cost of movsx */
+ COSTS_N_INSNS (1), /* cost of movzx */
+ 8, /* "large" insn */
+ 17, /* MOVE_RATIO */
+ 4, /* cost for loading QImode using movzbl */
+ {4, 4, 4}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {4, 4, 4}, /* cost of storing integer registers */
+ 4, /* cost of reg,reg fld/fst */
+ {12, 12, 12}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {6, 6, 8}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+ 2, /* cost of moving MMX register */
+ {8, 8}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {8, 8}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 2, /* cost of moving SSE register */
+ {8, 8, 8}, /* cost of loading SSE registers
+ in SImode, DImode and TImode */
+ {8, 8, 8}, /* cost of storing SSE registers
+ in SImode, DImode and TImode */
+ 5, /* MMX or SSE register to integer */
+ 64, /* size of prefetch block */
+ 6, /* number of parallel prefetches */
+ /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
+ is increased to perhaps more appropriate value of 5. */
+ 3, /* Branch cost */
+ COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
+ COSTS_N_INSNS (8), /* cost of FMUL instruction. */
+ COSTS_N_INSNS (20), /* cost of FDIV instruction. */
+ COSTS_N_INSNS (8), /* cost of FABS instruction. */
+ COSTS_N_INSNS (8), /* cost of FCHS instruction. */
+ COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
+};
+
+/* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
+static const
+struct processor_costs generic32_cost = {
+ COSTS_N_INSNS (1), /* cost of an add instruction */
+ COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
+ COSTS_N_INSNS (1), /* variable shift costs */
+ COSTS_N_INSNS (1), /* constant shift costs */
+ {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
+ COSTS_N_INSNS (4), /* HI */
+ COSTS_N_INSNS (3), /* SI */
+ COSTS_N_INSNS (4), /* DI */
+ COSTS_N_INSNS (2)}, /* other */
+ 0, /* cost of multiply per each bit set */
+ {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
+ COSTS_N_INSNS (26), /* HI */
+ COSTS_N_INSNS (42), /* SI */
+ COSTS_N_INSNS (74), /* DI */
+ COSTS_N_INSNS (74)}, /* other */
+ COSTS_N_INSNS (1), /* cost of movsx */
+ COSTS_N_INSNS (1), /* cost of movzx */
+ 8, /* "large" insn */
+ 17, /* MOVE_RATIO */
+ 4, /* cost for loading QImode using movzbl */
+ {4, 4, 4}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {4, 4, 4}, /* cost of storing integer registers */
+ 4, /* cost of reg,reg fld/fst */
+ {12, 12, 12}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {6, 6, 8}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+ 2, /* cost of moving MMX register */
+ {8, 8}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {8, 8}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 2, /* cost of moving SSE register */
+ {8, 8, 8}, /* cost of loading SSE registers
+ in SImode, DImode and TImode */
+ {8, 8, 8}, /* cost of storing SSE registers
+ in SImode, DImode and TImode */
+ 5, /* MMX or SSE register to integer */
+ 64, /* size of prefetch block */
+ 6, /* number of parallel prefetches */
+ 3, /* Branch cost */
+ COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
+ COSTS_N_INSNS (8), /* cost of FMUL instruction. */
+ COSTS_N_INSNS (20), /* cost of FDIV instruction. */
+ COSTS_N_INSNS (8), /* cost of FABS instruction. */
+ COSTS_N_INSNS (8), /* cost of FCHS instruction. */
+ COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
+};
+
+const struct processor_costs *ix86_cost = &pentium_cost;
+
+/* Processor feature/optimization bitmasks. */
+#define m_386 (1<<PROCESSOR_I386)
+#define m_486 (1<<PROCESSOR_I486)
+#define m_PENT (1<<PROCESSOR_PENTIUM)
+#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
+#define m_K6 (1<<PROCESSOR_K6)
+#define m_ATHLON (1<<PROCESSOR_ATHLON)
+#define m_PENT4 (1<<PROCESSOR_PENTIUM4)
+#define m_K8 (1<<PROCESSOR_K8)
+#define m_ATHLON_K8 (m_K8 | m_ATHLON)
+#define m_NOCONA (1<<PROCESSOR_NOCONA)
+/* APPLE LOCAL mainline */
+#define m_CORE2 (1<<PROCESSOR_CORE2)
+#define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
+#define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
+#define m_GENERIC (m_GENERIC32 | m_GENERIC64)
+
+/* Generic instruction choice should be common subset of supported CPUs
+ (PPro/PENT4/NOCONA/Athlon/K8). */
+
+/* Leave is not affecting Nocona SPEC2000 results negatively, so enabling for
+ Generic64 seems like good code size tradeoff. We can't enable it for 32bit
+ generic because it is not working well with PPro base chips. */
+/* APPLE LOCAL begin mainline */
+const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8 | m_CORE2 | m_GENERIC64;
+const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
+const int x86_zero_extend_with_and = m_486 | m_PENT;
+const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC /* m_386 | m_K6 */;
+const int x86_double_with_add = ~m_386;
+const int x86_use_bit_test = m_386;
+const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6 | m_CORE2 | m_GENERIC;
+const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
+const int x86_3dnow_a = m_ATHLON_K8;
+/* APPLE LOCAL end mainline */
+const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
+/* Branch hints were put in P4 based on simulation result. But
+ after P4 was made, no performance benefit was observed with
+ branch hints. It also increases the code size. As the result,
+ icc never generates branch hints. */
+const int x86_branch_hints = 0;
+const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4 | m_NOCONA | m_GENERIC32; /*m_GENERIC | m_ATHLON_K8 ? */
+/* We probably ought to watch for partial register stalls on Generic32
+ compilation setting as well. However in current implementation the
+ partial register stalls are not eliminated very well - they can
+ be introduced via subregs synthesized by combine and can happen
+ in caller/callee saving sequences.
+ Because this option pays back little on PPro based chips and is in conflict
+ with partial reg. dependencies used by Athlon/P4 based chips, it is better
+ to leave it off for generic32 for now. */
+const int x86_partial_reg_stall = m_PPRO;
+/* APPLE LOCAL begin mainline */
+const int x86_partial_flag_reg_stall = m_CORE2 | m_GENERIC;
+const int x86_use_himode_fiop = m_386 | m_486 | m_K6;
+const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT | m_CORE2 | m_GENERIC);
+const int x86_use_mov0 = m_K6;
+const int x86_use_cltd = ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC);
+const int x86_read_modify_write = ~m_PENT;
+const int x86_read_modify = ~(m_PENT | m_PPRO);
+const int x86_split_long_moves = m_PPRO;
+const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8 | m_CORE2 | m_GENERIC; /* m_PENT4 ? */
+const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
+const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
+const int x86_qimode_math = ~(0);
+const int x86_promote_qi_regs = 0;
+/* On PPro this flag is meant to avoid partial register stalls. Just like
+ the x86_partial_reg_stall this option might be considered for Generic32
+ if our scheme for avoiding partial stalls was more effective. */
+const int x86_himode_math = ~(m_PPRO);
+const int x86_promote_hi_regs = m_PPRO;
+const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 |m_GENERIC;
+const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
+const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
+const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
+const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC);
+const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
+const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
+const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
+const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC;
+const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC;
+const int x86_shift1 = ~m_486;
+const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
+/* In Generic model we have an conflict here in between PPro/Pentium4 based chips
+ that thread 128bit SSE registers as single units versus K8 based chips that
+ divide SSE registers to two 64bit halves.
+ x86_sse_partial_reg_dependency promote all store destinations to be 128bit
+ to allow register renaming on 128bit SSE units, but usually results in one
+ extra microop on 64bit SSE units. Experimental results shows that disabling
+ this option on P4 brings over 20% SPECfp regression, while enabling it on
+ K8 brings roughly 2.4% regression that can be partly masked by careful scheduling
+ of moves. */
+const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
+/* Set for machines where the type and dependencies are resolved on SSE
+ register parts instead of whole registers, so we may maintain just
+ lower part of scalar values in proper format leaving the upper part
+ undefined. */
+const int x86_sse_split_regs = m_ATHLON_K8;
+const int x86_sse_typeless_stores = m_ATHLON_K8;
+const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
+const int x86_use_ffreep = m_ATHLON_K8;
+const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
+const int x86_use_incdec = ~(m_PENT4 | m_NOCONA | m_GENERIC);
+
+/* ??? Allowing interunit moves makes it all too easy for the compiler to put
+ integer data in xmm registers. Which results in pretty abysmal code. */
+/* APPLE LOCAL 5612787 mainline sse4 */
+const int x86_inter_unit_moves = ~(m_ATHLON_K8);
+
+const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_NOCONA | m_CORE2 | m_PPRO | m_GENERIC32;
+/* Some CPU cores are not able to predict more than 4 branch instructions in
+ the 16 byte window. */
+const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
+const int x86_schedule = m_PPRO | m_ATHLON_K8 | m_K6 | m_PENT | m_CORE2 | m_GENERIC;
+const int x86_use_bt = m_ATHLON_K8;
+/* APPLE LOCAL begin */
+/* See comment in darwin override options for what needs fixing.
+ Most of this code has been rewritten in mainline anyhow.
+ All we've done here is remove the const since we assign to
+ them in SUBTARGET_OVERRIDE_OPTIONS. */
+/* Compare and exchange was added for 80486. */
+int x86_cmpxchg = ~m_386;
+/* Compare and exchange 8 bytes was added for pentium. */
+int x86_cmpxchg8b = ~(m_386 | m_486);
+/* Compare and exchange 16 bytes was added for nocona. */
+/* APPLE LOCAL mainline */
+int x86_cmpxchg16b = m_NOCONA | m_CORE2;
+/* Exchange and add was added for 80486. */
+int x86_xadd = ~m_386;
+/* APPLE LOCAL begin mainline bswap */
+/* Byteswap was added for 80486. */
+int x86_bswap = ~m_386;
+/* APPLE LOCAL end mainline bswap */
+/* APPLE LOCAL end */
+const int x86_pad_returns = m_ATHLON_K8 | m_CORE2 | m_GENERIC;
+/* APPLE LOCAL end mainline */
+
+/* In case the average insn count for single function invocation is
+ lower than this constant, emit fast (but longer) prologue and
+ epilogue code. */
+#define FAST_PROLOGUE_INSN_COUNT 20
+
+/* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
+static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
+static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
+static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
+
+/* Array of the smallest class containing reg number REGNO, indexed by
+ REGNO. Used by REGNO_REG_CLASS in i386.h. */
+
+enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
+{
+ /* ax, dx, cx, bx */
+ AREG, DREG, CREG, BREG,
+ /* si, di, bp, sp */
+ SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
+ /* FP registers */
+ FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
+ FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
+ /* arg pointer */
+ NON_Q_REGS,
+ /* flags, fpsr, dirflag, frame */
+ NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
+ SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
+ SSE_REGS, SSE_REGS,
+ MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
+ MMX_REGS, MMX_REGS,
+ NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
+ NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
+ SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
+ SSE_REGS, SSE_REGS,
+};
+
+/* The "default" register map used in 32bit mode. */
+
+int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
+{
+ 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
+ 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
+ -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
+ 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
+ 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
+ -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
+ -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
+};
+
+static int const x86_64_int_parameter_registers[6] =
+{
+ 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
+ FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
+};
+
+static int const x86_64_int_return_registers[4] =
+{
+ 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
+};
+
+/* The "default" register map used in 64bit mode. */
+int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
+{
+ 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
+ 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
+ -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
+ 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
+ 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
+ 8,9,10,11,12,13,14,15, /* extended integer registers */
+ 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
+};
+
+/* Define the register numbers to be used in Dwarf debugging information.
+ The SVR4 reference port C compiler uses the following register numbers
+ in its Dwarf output code:
+ 0 for %eax (gcc regno = 0)
+ 1 for %ecx (gcc regno = 2)
+ 2 for %edx (gcc regno = 1)
+ 3 for %ebx (gcc regno = 3)
+ 4 for %esp (gcc regno = 7)
+ 5 for %ebp (gcc regno = 6)
+ 6 for %esi (gcc regno = 4)
+ 7 for %edi (gcc regno = 5)
+ The following three DWARF register numbers are never generated by
+ the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
+ believes these numbers have these meanings.
+ 8 for %eip (no gcc equivalent)
+ 9 for %eflags (gcc regno = 17)
+ 10 for %trapno (no gcc equivalent)
+ It is not at all clear how we should number the FP stack registers
+ for the x86 architecture. If the version of SDB on x86/svr4 were
+ a bit less brain dead with respect to floating-point then we would
+ have a precedent to follow with respect to DWARF register numbers
+ for x86 FP registers, but the SDB on x86/svr4 is so completely
+ broken with respect to FP registers that it is hardly worth thinking
+ of it as something to strive for compatibility with.
+ The version of x86/svr4 SDB I have at the moment does (partially)
+ seem to believe that DWARF register number 11 is associated with
+ the x86 register %st(0), but that's about all. Higher DWARF
+ register numbers don't seem to be associated with anything in
+ particular, and even for DWARF regno 11, SDB only seems to under-
+ stand that it should say that a variable lives in %st(0) (when
+ asked via an `=' command) if we said it was in DWARF regno 11,
+ but SDB still prints garbage when asked for the value of the
+ variable in question (via a `/' command).
+ (Also note that the labels SDB prints for various FP stack regs
+ when doing an `x' command are all wrong.)
+ Note that these problems generally don't affect the native SVR4
+ C compiler because it doesn't allow the use of -O with -g and
+ because when it is *not* optimizing, it allocates a memory
+ location for each floating-point variable, and the memory
+ location is what gets described in the DWARF AT_location
+ attribute for the variable in question.
+ Regardless of the severe mental illness of the x86/svr4 SDB, we
+ do something sensible here and we use the following DWARF
+ register numbers. Note that these are all stack-top-relative
+ numbers.
+ 11 for %st(0) (gcc regno = 8)
+ 12 for %st(1) (gcc regno = 9)
+ 13 for %st(2) (gcc regno = 10)
+ 14 for %st(3) (gcc regno = 11)
+ 15 for %st(4) (gcc regno = 12)
+ 16 for %st(5) (gcc regno = 13)
+ 17 for %st(6) (gcc regno = 14)
+ 18 for %st(7) (gcc regno = 15)
+*/
+int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
+{
+ 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
+ 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
+ -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
+ 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
+ 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
+ -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
+ -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
+};
+
+/* Test and compare insns in i386.md store the information needed to
+ generate branch and scc insns here. */
+
+rtx ix86_compare_op0 = NULL_RTX;
+rtx ix86_compare_op1 = NULL_RTX;
+rtx ix86_compare_emitted = NULL_RTX;
+
+/* Size of the register save area. */
+#define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
+
+/* Define the structure for the machine field in struct function. */
+
+struct stack_local_entry GTY(())
+{
+ unsigned short mode;
+ unsigned short n;
+ rtx rtl;
+ struct stack_local_entry *next;
+};
+
+/* Structure describing stack frame layout.
+ Stack grows downward:
+
+ [arguments]
+ <- ARG_POINTER
+ saved pc
+
+ saved frame pointer if frame_pointer_needed
+ <- HARD_FRAME_POINTER
+ [saved regs]
+
+ [padding1] \
+ )
+ [va_arg registers] (
+ > to_allocate <- FRAME_POINTER
+ [frame] (
+ )
+ [padding2] /
+ */
+struct ix86_frame
+{
+ int nregs;
+ int padding1;
+ int va_arg_size;
+ HOST_WIDE_INT frame;
+ int padding2;
+ int outgoing_arguments_size;
+ int red_zone_size;
+
+ HOST_WIDE_INT to_allocate;
+ /* The offsets relative to ARG_POINTER. */
+ HOST_WIDE_INT frame_pointer_offset;
+ HOST_WIDE_INT hard_frame_pointer_offset;
+ HOST_WIDE_INT stack_pointer_offset;
+
+ /* When save_regs_using_mov is set, emit prologue using
+ move instead of push instructions. */
+ bool save_regs_using_mov;
+};
+
+/* Code model option. */
+enum cmodel ix86_cmodel;
+/* Asm dialect. */
+enum asm_dialect ix86_asm_dialect = ASM_ATT;
+/* TLS dialects. */
+enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
+
+/* Which unit we are generating floating point math for. */
+enum fpmath_unit ix86_fpmath;
+
+/* Which cpu are we scheduling for. */
+enum processor_type ix86_tune;
+/* Which instruction set architecture to use. */
+enum processor_type ix86_arch;
+
+/* true if sse prefetch instruction is not NOOP. */
+int x86_prefetch_sse;
+
+/* ix86_regparm_string as a number */
+static int ix86_regparm;
+
+/* APPLE LOCAL begin 5612787 mainline sse4 */
+/* True if SSE population count insn supported. */
+int x86_popcnt;
+/* APPLE LOCAL end 5612787 mainline sse4 */
+
+/* -mstackrealign option */
+extern int ix86_force_align_arg_pointer;
+static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
+
+/* Preferred alignment for stack boundary in bits. */
+unsigned int ix86_preferred_stack_boundary;
+/* APPLE LOCAL begin radar 4216496, 4229407, 4120689, 4095567 */
+unsigned int ix86_save_preferred_stack_boundary;
+/* APPLE LOCAL end radar 4216496, 4229407, 4120689, 4095567 */
+
+/* Values 1-5: see jump.c */
+int ix86_branch_cost;
+
+/* Variables which are this size or smaller are put in the data/bss
+ or ldata/lbss sections. */
+
+int ix86_section_threshold = 65536;
+
+/* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
+char internal_label_prefix[16];
+int internal_label_prefix_len;
+
+static bool ix86_handle_option (size_t, const char *, int);
+static void output_pic_addr_const (FILE *, rtx, int);
+static void put_condition_code (enum rtx_code, enum machine_mode,
+ int, int, FILE *);
+static const char *get_some_local_dynamic_name (void);
+static int get_some_local_dynamic_name_1 (rtx *, void *);
+static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
+static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
+ rtx *);
+static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
+static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
+ enum machine_mode);
+static rtx get_thread_pointer (int);
+static rtx legitimize_tls_address (rtx, enum tls_model, int);
+static void get_pc_thunk_name (char [32], unsigned int);
+static rtx gen_push (rtx);
+static int ix86_flags_dependent (rtx, rtx, enum attr_type);
+static int ix86_agi_dependent (rtx, rtx, enum attr_type);
+static struct machine_function * ix86_init_machine_status (void);
+static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
+static int ix86_nsaved_regs (void);
+static void ix86_emit_save_regs (void);
+static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
+static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
+static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
+static HOST_WIDE_INT ix86_GOT_alias_set (void);
+static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
+static rtx ix86_expand_aligntest (rtx, int);
+static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
+static int ix86_issue_rate (void);
+static int ix86_adjust_cost (rtx, rtx, rtx, int);
+static int ia32_multipass_dfa_lookahead (void);
+static void ix86_init_mmx_sse_builtins (void);
+static rtx x86_this_parameter (tree);
+static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
+ HOST_WIDE_INT, tree);
+static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
+static void x86_file_start (void);
+static void ix86_reorg (void);
+static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
+static tree ix86_build_builtin_va_list (void);
+static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
+ tree, int *, int);
+static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
+static bool ix86_scalar_mode_supported_p (enum machine_mode);
+static bool ix86_vector_mode_supported_p (enum machine_mode);
+
+static int ix86_address_cost (rtx);
+static bool ix86_cannot_force_const_mem (rtx);
+static rtx ix86_delegitimize_address (rtx);
+
+static void i386_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
+
+struct builtin_description;
+static rtx ix86_expand_sse_comi (const struct builtin_description *,
+ tree, rtx);
+static rtx ix86_expand_sse_compare (const struct builtin_description *,
+ tree, rtx);
+static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
+static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
+static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
+static rtx ix86_expand_store_builtin (enum insn_code, tree);
+static rtx safe_vector_operand (rtx, enum machine_mode);
+static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
+static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
+static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
+static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
+static int ix86_fp_comparison_cost (enum rtx_code code);
+static unsigned int ix86_select_alt_pic_regnum (void);
+static int ix86_save_reg (unsigned int, int);
+static void ix86_compute_frame_layout (struct ix86_frame *);
+static int ix86_comp_type_attributes (tree, tree);
+static int ix86_function_regparm (tree, tree);
+const struct attribute_spec ix86_attribute_table[];
+static bool ix86_function_ok_for_sibcall (tree, tree);
+static tree ix86_handle_cconv_attribute (tree *, tree, tree, int, bool *);
+static int ix86_value_regno (enum machine_mode, tree, tree);
+static bool contains_128bit_aligned_vector_p (tree);
+static rtx ix86_struct_value_rtx (tree, int);
+static bool ix86_ms_bitfield_layout_p (tree);
+static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
+static int extended_reg_mentioned_1 (rtx *, void *);
+static bool ix86_rtx_costs (rtx, int, int, int *);
+static int min_insn_size (rtx);
+static tree ix86_md_asm_clobbers (tree outputs, tree inputs, tree clobbers);
+static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
+static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
+ tree, bool);
+static void ix86_init_builtins (void);
+static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
+/* APPLE LOCAL mangle_type 7105099 */
+static const char *ix86_mangle_type (tree);
+static tree ix86_stack_protect_fail (void);
+static rtx ix86_internal_arg_pointer (void);
+static void ix86_dwarf_handle_frame_unspec (const char *, rtx, int);
+
+/* This function is only used on Solaris. */
+static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
+ ATTRIBUTE_UNUSED;
+
+/* Register class used for passing given 64bit part of the argument.
+ These represent classes as documented by the PS ABI, with the exception
+ of SSESF, SSEDF classes, that are basically SSE class, just gcc will
+ use SF or DFmode move instead of DImode to avoid reformatting penalties.
+
+ Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
+ whenever possible (upper half does contain padding).
+ */
+enum x86_64_reg_class
+ {
+ X86_64_NO_CLASS,
+ X86_64_INTEGER_CLASS,
+ X86_64_INTEGERSI_CLASS,
+ X86_64_SSE_CLASS,
+ X86_64_SSESF_CLASS,
+ X86_64_SSEDF_CLASS,
+ X86_64_SSEUP_CLASS,
+ X86_64_X87_CLASS,
+ X86_64_X87UP_CLASS,
+ X86_64_COMPLEX_X87_CLASS,
+ X86_64_MEMORY_CLASS
+ };
+static const char * const x86_64_reg_class_name[] = {
+ "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
+ "sseup", "x87", "x87up", "cplx87", "no"
+};
+
+#define MAX_CLASSES 4
+
+/* Table of constants used by fldpi, fldln2, etc.... */
+static REAL_VALUE_TYPE ext_80387_constants_table [5];
+static bool ext_80387_constants_init = 0;
+static void init_ext_80387_constants (void);
+static bool ix86_in_large_data_p (tree) ATTRIBUTE_UNUSED;
+static void ix86_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
+static void x86_64_elf_unique_section (tree decl, int reloc) ATTRIBUTE_UNUSED;
+static section *x86_64_elf_select_section (tree decl, int reloc,
+ unsigned HOST_WIDE_INT align)
+ ATTRIBUTE_UNUSED;
+
+/* Initialize the GCC target structure. */
+#undef TARGET_ATTRIBUTE_TABLE
+#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
+#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
+# undef TARGET_MERGE_DECL_ATTRIBUTES
+# define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
+#endif
+
+#undef TARGET_COMP_TYPE_ATTRIBUTES
+#define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
+
+#undef TARGET_INIT_BUILTINS
+#define TARGET_INIT_BUILTINS ix86_init_builtins
+#undef TARGET_EXPAND_BUILTIN
+#define TARGET_EXPAND_BUILTIN ix86_expand_builtin
+
+#undef TARGET_ASM_FUNCTION_EPILOGUE
+#define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
+
+#undef TARGET_ENCODE_SECTION_INFO
+#ifndef SUBTARGET_ENCODE_SECTION_INFO
+#define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
+#else
+#define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
+#endif
+
+#undef TARGET_ASM_OPEN_PAREN
+#define TARGET_ASM_OPEN_PAREN ""
+#undef TARGET_ASM_CLOSE_PAREN
+#define TARGET_ASM_CLOSE_PAREN ""
+
+#undef TARGET_ASM_ALIGNED_HI_OP
+#define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
+#undef TARGET_ASM_ALIGNED_SI_OP
+#define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
+#ifdef ASM_QUAD
+#undef TARGET_ASM_ALIGNED_DI_OP
+#define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
+#endif
+
+#undef TARGET_ASM_UNALIGNED_HI_OP
+#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
+#undef TARGET_ASM_UNALIGNED_SI_OP
+#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
+#undef TARGET_ASM_UNALIGNED_DI_OP
+#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
+
+#undef TARGET_SCHED_ADJUST_COST
+#define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
+#undef TARGET_SCHED_ISSUE_RATE
+#define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
+#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
+#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
+ ia32_multipass_dfa_lookahead
+
+#undef TARGET_FUNCTION_OK_FOR_SIBCALL
+#define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
+
+#ifdef HAVE_AS_TLS
+#undef TARGET_HAVE_TLS
+#define TARGET_HAVE_TLS true
+#endif
+#undef TARGET_CANNOT_FORCE_CONST_MEM
+#define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
+#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
+#define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
+
+#undef TARGET_DELEGITIMIZE_ADDRESS
+#define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
+
+#undef TARGET_MS_BITFIELD_LAYOUT_P
+#define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
+
+#if TARGET_MACHO
+#undef TARGET_BINDS_LOCAL_P
+#define TARGET_BINDS_LOCAL_P darwin_binds_local_p
+#endif
+
+#undef TARGET_ASM_OUTPUT_MI_THUNK
+#define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
+#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
+#define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
+
+#undef TARGET_ASM_FILE_START
+#define TARGET_ASM_FILE_START x86_file_start
+
+#undef TARGET_DEFAULT_TARGET_FLAGS
+#define TARGET_DEFAULT_TARGET_FLAGS \
+ (TARGET_DEFAULT \
+ | TARGET_64BIT_DEFAULT \
+ | TARGET_SUBTARGET_DEFAULT \
+ | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
+
+#undef TARGET_HANDLE_OPTION
+#define TARGET_HANDLE_OPTION ix86_handle_option
+
+#undef TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS ix86_rtx_costs
+#undef TARGET_ADDRESS_COST
+#define TARGET_ADDRESS_COST ix86_address_cost
+
+#undef TARGET_FIXED_CONDITION_CODE_REGS
+#define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
+#undef TARGET_CC_MODES_COMPATIBLE
+#define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
+
+#undef TARGET_MACHINE_DEPENDENT_REORG
+#define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
+
+#undef TARGET_BUILD_BUILTIN_VA_LIST
+#define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
+
+#undef TARGET_MD_ASM_CLOBBERS
+#define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
+
+#undef TARGET_PROMOTE_PROTOTYPES
+#define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
+#undef TARGET_STRUCT_VALUE_RTX
+#define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
+#undef TARGET_SETUP_INCOMING_VARARGS
+#define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
+#undef TARGET_MUST_PASS_IN_STACK
+#define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
+#undef TARGET_PASS_BY_REFERENCE
+#define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
+#undef TARGET_INTERNAL_ARG_POINTER
+#define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
+#undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
+#define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
+
+#undef TARGET_GIMPLIFY_VA_ARG_EXPR
+#define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
+
+#undef TARGET_SCALAR_MODE_SUPPORTED_P
+#define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
+
+#undef TARGET_VECTOR_MODE_SUPPORTED_P
+#define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
+
+#ifdef HAVE_AS_TLS
+#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
+#define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
+#endif
+
+#ifdef SUBTARGET_INSERT_ATTRIBUTES
+#undef TARGET_INSERT_ATTRIBUTES
+#define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
+#endif
+
+/* APPLE LOCAL begin mangle_type 7105099 */
+#undef TARGET_MANGLE_TYPE
+#define TARGET_MANGLE_TYPE ix86_mangle_type
+/* APPLE LOCAL end mangle_type 7105099 */
+
+#undef TARGET_STACK_PROTECT_FAIL
+#define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
+
+#undef TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE ix86_function_value
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+
+/* The svr4 ABI for the i386 says that records and unions are returned
+ in memory. */
+#ifndef DEFAULT_PCC_STRUCT_RETURN
+#define DEFAULT_PCC_STRUCT_RETURN 1
+#endif
+
+/* Implement TARGET_HANDLE_OPTION. */
+
+static bool
+ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
+{
+ switch (code)
+ {
+ case OPT_m3dnow:
+ if (!value)
+ {
+ target_flags &= ~MASK_3DNOW_A;
+ target_flags_explicit |= MASK_3DNOW_A;
+ }
+ return true;
+
+ case OPT_mmmx:
+ if (!value)
+ {
+ target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A);
+ target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A;
+ }
+ return true;
+
+ case OPT_msse:
+ if (!value)
+ {
+ target_flags &= ~(MASK_SSE2 | MASK_SSE3);
+ target_flags_explicit |= MASK_SSE2 | MASK_SSE3;
+ }
+ return true;
+
+ case OPT_msse2:
+ if (!value)
+ {
+ target_flags &= ~MASK_SSE3;
+ target_flags_explicit |= MASK_SSE3;
+ }
+ return true;
+
+ default:
+ return true;
+ }
+}
+
+/* APPLE LOCAL begin 4760857 optimization pragmas. */
+/* Hoisted so it can be used by reset_optimization_options. */
+static struct ptt
+ {
+ const struct processor_costs *cost; /* Processor costs */
+ const int target_enable; /* Target flags to enable. */
+ const int target_disable; /* Target flags to disable. */
+ const int align_loop; /* Default alignments. */
+ const int align_loop_max_skip;
+ const int align_jump;
+ const int align_jump_max_skip;
+ const int align_func;
+ }
+const processor_target_table[PROCESSOR_max] =
+ {
+ {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
+ {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
+ {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
+ {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
+ {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
+ {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
+ {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
+ {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
+ {&nocona_cost, 0, 0, 0, 0, 0, 0, 0},
+ /* APPLE LOCAL mainline */
+ {&core2_cost, 0, 0, 16, 7, 16, 7, 16},
+ {&generic32_cost, 0, 0, 16, 7, 16, 7, 16},
+ {&generic64_cost, 0, 0, 16, 7, 16, 7, 16}
+ };
+/* APPLE LOCAL end 4760857 optimization pragmas. */
+
+/* Sometimes certain combinations of command options do not make
+ sense on a particular target machine. You can define a macro
+ `OVERRIDE_OPTIONS' to take account of this. This macro, if
+ defined, is executed once just after all the command options have
+ been parsed.
+
+ Don't use this macro to turn on various extra optimizations for
+ `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
+
+void
+override_options (void)
+{
+ int i;
+ int ix86_tune_defaulted = 0;
+ /* APPLE LOCAL mainline */
+ int ix86_arch_specified = 0;
+
+ /* Comes from final.c -- no real reason to change it. */
+#define MAX_CODE_ALIGN 16
+
+ /* APPLE LOCAL begin 4760857 optimization pragmas. */
+ /* processor_target_table moved to file scope. */
+ /* APPLE LOCAL end 4760857 optimization pragmas. */
+
+ static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
+ static struct pta
+ {
+ const char *const name; /* processor name or nickname. */
+ const enum processor_type processor;
+ const enum pta_flags
+ {
+ PTA_SSE = 1,
+ PTA_SSE2 = 2,
+ PTA_SSE3 = 4,
+ PTA_MMX = 8,
+ PTA_PREFETCH_SSE = 16,
+ PTA_3DNOW = 32,
+ PTA_3DNOW_A = 64,
+ /* APPLE LOCAL begin 5612787 mainline sse4 */
+ /* APPLE LOCAL begin mainline */
+ PTA_64BIT = 128,
+ PTA_SSSE3 = 256,
+ /* APPLE LOCAL end mainline */
+ PTA_CX16 = 1 << 9,
+ PTA_POPCNT = 1 << 10,
+ PTA_ABM = 1 << 11,
+ PTA_SSE4A = 1 << 12,
+ PTA_NO_SAHF = 1 << 13,
+ PTA_SSE4_1 = 1 << 14,
+ PTA_SSE4_2 = 1 << 15
+ /* APPLE LOCAL end 5612787 mainline sse4 */
+ } flags;
+ }
+ const processor_alias_table[] =
+ {
+ {"i386", PROCESSOR_I386, 0},
+ {"i486", PROCESSOR_I486, 0},
+ {"i586", PROCESSOR_PENTIUM, 0},
+ {"pentium", PROCESSOR_PENTIUM, 0},
+ {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
+ {"winchip-c6", PROCESSOR_I486, PTA_MMX},
+ {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
+ {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
+ {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
+ {"i686", PROCESSOR_PENTIUMPRO, 0},
+ {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
+ {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
+ {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
+ {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
+ {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
+ {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
+ | PTA_MMX | PTA_PREFETCH_SSE},
+ {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
+ | PTA_MMX | PTA_PREFETCH_SSE},
+ {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
+ | PTA_MMX | PTA_PREFETCH_SSE},
+ {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
+ | PTA_MMX | PTA_PREFETCH_SSE},
+ /* APPLE LOCAL begin mainline */
+ {"core2", PROCESSOR_CORE2, PTA_SSE | PTA_SSE2 | PTA_SSE3
+ | PTA_64BIT | PTA_MMX
+ | PTA_PREFETCH_SSE},
+ /* APPLE LOCAL end mainline */
+ {"k6", PROCESSOR_K6, PTA_MMX},
+ {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
+ {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
+ {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
+ | PTA_3DNOW_A},
+ {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
+ | PTA_3DNOW | PTA_3DNOW_A},
+ {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
+ | PTA_3DNOW_A | PTA_SSE},
+ {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
+ | PTA_3DNOW_A | PTA_SSE},
+ {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
+ | PTA_3DNOW_A | PTA_SSE},
+ {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
+ | PTA_SSE | PTA_SSE2 },
+ {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
+ | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
+ {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
+ | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
+ {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
+ | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
+ {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
+ | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
+ {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
+ {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
+ };
+
+ int const pta_size = ARRAY_SIZE (processor_alias_table);
+
+#ifdef SUBTARGET_OVERRIDE_OPTIONS
+ SUBTARGET_OVERRIDE_OPTIONS;
+#endif
+
+#ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
+ SUBSUBTARGET_OVERRIDE_OPTIONS;
+#endif
+
+ /* -fPIC is the default for x86_64. */
+ if (TARGET_MACHO && TARGET_64BIT)
+ flag_pic = 2;
+
+ /* Set the default values for switches whose default depends on TARGET_64BIT
+ in case they weren't overwritten by command line options. */
+ if (TARGET_64BIT)
+ {
+ /* Mach-O doesn't support omitting the frame pointer for now. */
+ if (flag_omit_frame_pointer == 2)
+ flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
+ if (flag_asynchronous_unwind_tables == 2)
+ flag_asynchronous_unwind_tables = 1;
+ if (flag_pcc_struct_return == 2)
+ flag_pcc_struct_return = 0;
+ }
+ else
+ {
+ if (flag_omit_frame_pointer == 2)
+ flag_omit_frame_pointer = 0;
+ if (flag_asynchronous_unwind_tables == 2)
+ flag_asynchronous_unwind_tables = 0;
+ if (flag_pcc_struct_return == 2)
+ flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
+ }
+
+ /* Need to check -mtune=generic first. */
+ if (ix86_tune_string)
+ {
+ if (!strcmp (ix86_tune_string, "generic")
+ || !strcmp (ix86_tune_string, "i686")
+ /* As special support for cross compilers we read -mtune=native
+ as -mtune=generic. With native compilers we won't see the
+ -mtune=native, as it was changed by the driver. */
+ || !strcmp (ix86_tune_string, "native"))
+ {
+ if (TARGET_64BIT)
+ ix86_tune_string = "generic64";
+ else
+ ix86_tune_string = "generic32";
+ }
+ else if (!strncmp (ix86_tune_string, "generic", 7))
+ error ("bad value (%s) for -mtune= switch", ix86_tune_string);
+ }
+ else
+ {
+ if (ix86_arch_string)
+ ix86_tune_string = ix86_arch_string;
+ if (!ix86_tune_string)
+ {
+ ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
+ ix86_tune_defaulted = 1;
+ }
+
+ /* ix86_tune_string is set to ix86_arch_string or defaulted. We
+ need to use a sensible tune option. */
+ if (!strcmp (ix86_tune_string, "generic")
+ || !strcmp (ix86_tune_string, "x86-64")
+ || !strcmp (ix86_tune_string, "i686"))
+ {
+ if (TARGET_64BIT)
+ ix86_tune_string = "generic64";
+ else
+ ix86_tune_string = "generic32";
+ }
+ }
+ if (!strcmp (ix86_tune_string, "x86-64"))
+ warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
+ "-mtune=generic instead as appropriate.");
+
+ if (!ix86_arch_string)
+ ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
+ /* APPLE LOCAL begin mainline */
+ else
+ ix86_arch_specified = 1;
+ /* APPLE LOCAL end mainline */
+ if (!strcmp (ix86_arch_string, "generic"))
+ error ("generic CPU can be used only for -mtune= switch");
+ if (!strncmp (ix86_arch_string, "generic", 7))
+ error ("bad value (%s) for -march= switch", ix86_arch_string);
+
+ if (ix86_cmodel_string != 0)
+ {
+ if (!strcmp (ix86_cmodel_string, "small"))
+ ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
+ else if (!strcmp (ix86_cmodel_string, "medium"))
+ ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
+ else if (flag_pic)
+ sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
+ else if (!strcmp (ix86_cmodel_string, "32"))
+ ix86_cmodel = CM_32;
+ else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
+ ix86_cmodel = CM_KERNEL;
+ else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
+ ix86_cmodel = CM_LARGE;
+ else
+ error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
+ }
+ else
+ {
+ ix86_cmodel = CM_32;
+ if (TARGET_64BIT)
+ ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
+ }
+ if (ix86_asm_string != 0)
+ {
+ if (! TARGET_MACHO
+ && !strcmp (ix86_asm_string, "intel"))
+ ix86_asm_dialect = ASM_INTEL;
+ else if (!strcmp (ix86_asm_string, "att"))
+ ix86_asm_dialect = ASM_ATT;
+ else
+ error ("bad value (%s) for -masm= switch", ix86_asm_string);
+ }
+ if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
+ error ("code model %qs not supported in the %s bit mode",
+ ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
+ if (ix86_cmodel == CM_LARGE)
+ sorry ("code model %<large%> not supported yet");
+ if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
+ sorry ("%i-bit mode not compiled in",
+ (target_flags & MASK_64BIT) ? 64 : 32);
+
+ for (i = 0; i < pta_size; i++)
+ if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
+ {
+ ix86_arch = processor_alias_table[i].processor;
+ /* Default cpu tuning to the architecture. */
+ ix86_tune = ix86_arch;
+ if (processor_alias_table[i].flags & PTA_MMX
+ && !(target_flags_explicit & MASK_MMX))
+ target_flags |= MASK_MMX;
+ if (processor_alias_table[i].flags & PTA_3DNOW
+ && !(target_flags_explicit & MASK_3DNOW))
+ target_flags |= MASK_3DNOW;
+ if (processor_alias_table[i].flags & PTA_3DNOW_A
+ && !(target_flags_explicit & MASK_3DNOW_A))
+ target_flags |= MASK_3DNOW_A;
+ if (processor_alias_table[i].flags & PTA_SSE
+ && !(target_flags_explicit & MASK_SSE))
+ target_flags |= MASK_SSE;
+ if (processor_alias_table[i].flags & PTA_SSE2
+ && !(target_flags_explicit & MASK_SSE2))
+ target_flags |= MASK_SSE2;
+ if (processor_alias_table[i].flags & PTA_SSE3
+ && !(target_flags_explicit & MASK_SSE3))
+ target_flags |= MASK_SSE3;
+ /* APPLE LOCAL begin mainline */
+ if (processor_alias_table[i].flags & PTA_SSSE3
+ && !(target_flags_explicit & MASK_SSSE3))
+ target_flags |= MASK_SSSE3;
+ /* APPLE LOCAL end mainline */
+ /* APPLE LOCAL begin 5612787 mainline sse4 */
+ if (processor_alias_table[i].flags & PTA_SSE4_1
+ && !(target_flags_explicit & MASK_SSE4_1))
+ target_flags |= MASK_SSE4_1;
+ if (processor_alias_table[i].flags & PTA_SSE4_2
+ && !(target_flags_explicit & MASK_SSE4_2))
+ target_flags |= MASK_SSE4_2;
+ if (processor_alias_table[i].flags & PTA_SSE4A
+ && !(target_flags_explicit & MASK_SSE4A))
+ target_flags |= MASK_SSE4A;
+ /* APPLE LOCAL end 5612787 mainline sse4 */
+ if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
+ x86_prefetch_sse = true;
+ if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
+ error ("CPU you selected does not support x86-64 "
+ "instruction set");
+ break;
+ }
+
+ if (i == pta_size)
+ error ("bad value (%s) for -march= switch", ix86_arch_string);
+
+ for (i = 0; i < pta_size; i++)
+ if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
+ {
+ ix86_tune = processor_alias_table[i].processor;
+ if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
+ {
+ if (ix86_tune_defaulted)
+ {
+ ix86_tune_string = "x86-64";
+ for (i = 0; i < pta_size; i++)
+ if (! strcmp (ix86_tune_string,
+ processor_alias_table[i].name))
+ break;
+ ix86_tune = processor_alias_table[i].processor;
+ }
+ else
+ error ("CPU you selected does not support x86-64 "
+ "instruction set");
+ }
+ /* Intel CPUs have always interpreted SSE prefetch instructions as
+ NOPs; so, we can enable SSE prefetch instructions even when
+ -mtune (rather than -march) points us to a processor that has them.
+ However, the VIA C3 gives a SIGILL, so we only do that for i686 and
+ higher processors. */
+ if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
+ x86_prefetch_sse = true;
+ break;
+ }
+ if (i == pta_size)
+ error ("bad value (%s) for -mtune= switch", ix86_tune_string);
+
+ if (optimize_size)
+ ix86_cost = &size_cost;
+ else
+ ix86_cost = processor_target_table[ix86_tune].cost;
+ target_flags |= processor_target_table[ix86_tune].target_enable;
+ target_flags &= ~processor_target_table[ix86_tune].target_disable;
+
+ /* Arrange to set up i386_stack_locals for all functions. */
+ init_machine_status = ix86_init_machine_status;
+
+ /* Validate -mregparm= value. */
+ if (ix86_regparm_string)
+ {
+ i = atoi (ix86_regparm_string);
+ if (i < 0 || i > REGPARM_MAX)
+ error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
+ else
+ ix86_regparm = i;
+ }
+ else
+ if (TARGET_64BIT)
+ ix86_regparm = REGPARM_MAX;
+
+ /* If the user has provided any of the -malign-* options,
+ warn and use that value only if -falign-* is not set.
+ Remove this code in GCC 3.2 or later. */
+ if (ix86_align_loops_string)
+ {
+ warning (0, "-malign-loops is obsolete, use -falign-loops");
+ if (align_loops == 0)
+ {
+ i = atoi (ix86_align_loops_string);
+ if (i < 0 || i > MAX_CODE_ALIGN)
+ error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
+ else
+ align_loops = 1 << i;
+ }
+ }
+
+ if (ix86_align_jumps_string)
+ {
+ warning (0, "-malign-jumps is obsolete, use -falign-jumps");
+ if (align_jumps == 0)
+ {
+ i = atoi (ix86_align_jumps_string);
+ if (i < 0 || i > MAX_CODE_ALIGN)
+ error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
+ else
+ align_jumps = 1 << i;
+ }
+ }
+
+ if (ix86_align_funcs_string)
+ {
+ warning (0, "-malign-functions is obsolete, use -falign-functions");
+ if (align_functions == 0)
+ {
+ i = atoi (ix86_align_funcs_string);
+ if (i < 0 || i > MAX_CODE_ALIGN)
+ error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
+ else
+ align_functions = 1 << i;
+ }
+ }
+
+ /* Default align_* from the processor table. */
+ if (align_loops == 0)
+ {
+ align_loops = processor_target_table[ix86_tune].align_loop;
+ align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
+ }
+ if (align_jumps == 0)
+ {
+ align_jumps = processor_target_table[ix86_tune].align_jump;
+ align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
+ }
+ if (align_functions == 0)
+ {
+ align_functions = processor_target_table[ix86_tune].align_func;
+ }
+
+ /* Validate -mbranch-cost= value, or provide default. */
+ ix86_branch_cost = ix86_cost->branch_cost;
+ if (ix86_branch_cost_string)
+ {
+ i = atoi (ix86_branch_cost_string);
+ if (i < 0 || i > 5)
+ error ("-mbranch-cost=%d is not between 0 and 5", i);
+ else
+ ix86_branch_cost = i;
+ }
+ if (ix86_section_threshold_string)
+ {
+ i = atoi (ix86_section_threshold_string);
+ if (i < 0)
+ error ("-mlarge-data-threshold=%d is negative", i);
+ else
+ ix86_section_threshold = i;
+ }
+
+ if (ix86_tls_dialect_string)
+ {
+ if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
+ ix86_tls_dialect = TLS_DIALECT_GNU;
+ else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
+ ix86_tls_dialect = TLS_DIALECT_GNU2;
+ else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
+ ix86_tls_dialect = TLS_DIALECT_SUN;
+ else
+ error ("bad value (%s) for -mtls-dialect= switch",
+ ix86_tls_dialect_string);
+ }
+ /* APPLE LOCAL begin mainline */
+ if (TARGET_64BIT)
+ {
+ if (TARGET_ALIGN_DOUBLE)
+ error ("-malign-double makes no sense in the 64bit mode");
+ if (TARGET_RTD)
+ error ("-mrtd calling convention not supported in the 64bit mode");
+ /* APPLE LOCAL begin radar 4877693 */
+ if (ix86_force_align_arg_pointer)
+ error ("-mstackrealign not supported in the 64bit mode");
+ /* APPLE LOCAL end radar 4877693 */
+
+ /* Enable by default the SSE and MMX builtins. Do allow the user to
+ explicitly disable any of these. In particular, disabling SSE and
+ MMX for kernel code is extremely useful. */
+ if (!ix86_arch_specified)
+ target_flags
+ |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE
+ | TARGET_SUBTARGET64_DEFAULT) & ~target_flags_explicit);
+ /* APPLE LOCAL begin mainline candidate */
+ /* Disable the red zone for kernel compilation.
+ ??? Why aren't we using -mcmodel=kernel? */
+ if (TARGET_MACHO
+ && (flag_mkernel || flag_apple_kext))
+ target_flags |= MASK_NO_RED_ZONE;
+ /* APPLE LOCAL end mainline candidate */
+ }
+ else
+ {
+ if (!ix86_arch_specified)
+ target_flags |= (TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit);
+
+ /* i386 ABI does not specify red zone. It still makes sense to use it
+ when programmer takes care to stack from being destroyed. */
+ if (!(target_flags_explicit & MASK_NO_RED_ZONE))
+ target_flags |= MASK_NO_RED_ZONE;
+ }
+
+ /* APPLE LOCAL end mainline */
+ /* Keep nonleaf frame pointers. */
+ if (flag_omit_frame_pointer)
+ target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
+ else if (TARGET_OMIT_LEAF_FRAME_POINTER)
+ flag_omit_frame_pointer = 1;
+
+ /* If we're doing fast math, we don't care about comparison order
+ wrt NaNs. This lets us use a shorter comparison sequence. */
+ if (flag_finite_math_only)
+ target_flags &= ~MASK_IEEE_FP;
+
+ /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
+ since the insns won't need emulation. */
+ if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
+ target_flags &= ~MASK_NO_FANCY_MATH_387;
+
+ /* Likewise, if the target doesn't have a 387, or we've specified
+ software floating point, don't use 387 inline intrinsics. */
+ if (!TARGET_80387)
+ target_flags |= MASK_NO_FANCY_MATH_387;
+ /* APPLE LOCAL begin 5612787 mainline sse4 */
+ /* Turn on SSE4.1 builtins for -msse4.2. */
+ if (TARGET_SSE4_2)
+ target_flags |= MASK_SSE4_1;
+ /* Turn on SSSE3 builtins for -msse4.1. */
+ if (TARGET_SSE4_1)
+ target_flags |= MASK_SSSE3;
+ /* Turn on SSE3 builtins for -msse4a. */
+ if (TARGET_SSE4A)
+ target_flags |= MASK_SSE3;
+ /* APPLE LOCAL end 5612787 mainline sse4 */
+ /* APPLE LOCAL begin mainline */
+ /* Turn on SSE3 builtins for -mssse3. */
+ if (TARGET_SSSE3)
+ target_flags |= MASK_SSE3;
+ /* APPLE LOCAL end mainline */
+ /* Turn on SSE2 builtins for -msse3. */
+ if (TARGET_SSE3)
+ target_flags |= MASK_SSE2;
+
+ /* Turn on SSE builtins for -msse2. */
+ if (TARGET_SSE2)
+ target_flags |= MASK_SSE;
+
+ /* Turn on MMX builtins for -msse. */
+ if (TARGET_SSE)
+ {
+ target_flags |= MASK_MMX & ~target_flags_explicit;
+ x86_prefetch_sse = true;
+ }
+
+ /* Turn on MMX builtins for 3Dnow. */
+ if (TARGET_3DNOW)
+ target_flags |= MASK_MMX;
+
+ /* APPLE LOCAL mainline */
+ /* Moved this up... */
+ /* Validate -mpreferred-stack-boundary= value, or provide default.
+ The default of 128 bits is for Pentium III's SSE __m128. We can't
+ change it because of optimize_size. Otherwise, we can't mix object
+ files compiled with -Os and -On. */
+ ix86_preferred_stack_boundary = 128;
+ if (ix86_preferred_stack_boundary_string)
+ {
+ i = atoi (ix86_preferred_stack_boundary_string);
+ if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
+ error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
+ TARGET_64BIT ? 4 : 2);
+ else
+ ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
+ }
+
+ /* Accept -msseregparm only if at least SSE support is enabled. */
+ if (TARGET_SSEREGPARM
+ && ! TARGET_SSE)
+ error ("-msseregparm used without SSE enabled");
+
+ ix86_fpmath = TARGET_FPMATH_DEFAULT;
+
+ if (ix86_fpmath_string != 0)
+ {
+ if (! strcmp (ix86_fpmath_string, "387"))
+ ix86_fpmath = FPMATH_387;
+ else if (! strcmp (ix86_fpmath_string, "sse"))
+ {
+ if (!TARGET_SSE)
+ {
+ warning (0, "SSE instruction set disabled, using 387 arithmetics");
+ ix86_fpmath = FPMATH_387;
+ }
+ else
+ ix86_fpmath = FPMATH_SSE;
+ }
+ else if (! strcmp (ix86_fpmath_string, "387,sse")
+ || ! strcmp (ix86_fpmath_string, "sse,387"))
+ {
+ if (!TARGET_SSE)
+ {
+ warning (0, "SSE instruction set disabled, using 387 arithmetics");
+ ix86_fpmath = FPMATH_387;
+ }
+ else if (!TARGET_80387)
+ {
+ warning (0, "387 instruction set disabled, using SSE arithmetics");
+ ix86_fpmath = FPMATH_SSE;
+ }
+ else
+ ix86_fpmath = FPMATH_SSE | FPMATH_387;
+ }
+ else
+ error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
+ }
+
+ /* If the i387 is disabled, then do not return values in it. */
+ if (!TARGET_80387)
+ target_flags &= ~MASK_FLOAT_RETURNS;
+
+ if ((x86_accumulate_outgoing_args & TUNEMASK)
+ && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
+ && !optimize_size)
+ target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
+
+ /* ??? Unwind info is not correct around the CFG unless either a frame
+ pointer is present or M_A_O_A is set. Fixing this requires rewriting
+ unwind info generation to be aware of the CFG and propagating states
+ around edges. */
+ if ((flag_unwind_tables || flag_asynchronous_unwind_tables
+ || flag_exceptions || flag_non_call_exceptions)
+ && flag_omit_frame_pointer
+ && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
+ {
+ if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
+ warning (0, "unwind tables currently require either a frame pointer "
+ "or -maccumulate-outgoing-args for correctness");
+ target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
+ }
+
+ /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
+ {
+ char *p;
+ ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
+ p = strchr (internal_label_prefix, 'X');
+ internal_label_prefix_len = p - internal_label_prefix;
+ *p = '\0';
+ }
+
+ /* When scheduling description is not available, disable scheduler pass
+ so it won't slow down the compilation and make x87 code slower. */
+ /* APPLE LOCAL 5591571 */
+ if (1 || !TARGET_SCHEDULE)
+ flag_schedule_insns_after_reload = flag_schedule_insns = 0;
+
+ /* APPLE LOCAL begin dynamic-no-pic */
+#if TARGET_MACHO
+ if (MACHO_DYNAMIC_NO_PIC_P)
+ {
+ if (flag_pic)
+ warning (0, "-mdynamic-no-pic overrides -fpic or -fPIC");
+ flag_pic = 0;
+ }
+ else
+#endif
+ if (flag_pic == 1)
+ {
+ /* Darwin's -fpic is -fPIC. */
+ flag_pic = 2;
+ }
+ /* APPLE LOCAL end dynamic-no-pic */
+ /* APPLE LOCAL begin 4812082 -fast */
+ /* These flags were the best on the software H264 codec, and have therefore
+ been lumped into -fast per 4812082. They have not been evaluated on
+ any other code, except that -fno-tree-pre is known to lose on the
+ hardware accelerated version of the codec. */
+ if (flag_fast || flag_fastf || flag_fastcp)
+ {
+ flag_omit_frame_pointer = 1;
+ flag_strict_aliasing = 1;
+ flag_tree_pre = 0;
+ target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
+ align_loops = processor_target_table[ix86_tune].align_loop;
+ }
+ /* APPLE LOCAL end 4812082 -fast */
+}
+
+/* switch to the appropriate section for output of DECL.
+ DECL is either a `VAR_DECL' node or a constant of some sort.
+ RELOC indicates whether forming the initial value of DECL requires
+ link-time relocations. */
+
+static section *
+x86_64_elf_select_section (tree decl, int reloc,
+ unsigned HOST_WIDE_INT align)
+{
+ if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
+ && ix86_in_large_data_p (decl))
+ {
+ const char *sname = NULL;
+ unsigned int flags = SECTION_WRITE;
+ switch (categorize_decl_for_section (decl, reloc))
+ {
+ case SECCAT_DATA:
+ sname = ".ldata";
+ break;
+ case SECCAT_DATA_REL:
+ sname = ".ldata.rel";
+ break;
+ case SECCAT_DATA_REL_LOCAL:
+ sname = ".ldata.rel.local";
+ break;
+ case SECCAT_DATA_REL_RO:
+ sname = ".ldata.rel.ro";
+ break;
+ case SECCAT_DATA_REL_RO_LOCAL:
+ sname = ".ldata.rel.ro.local";
+ break;
+ case SECCAT_BSS:
+ sname = ".lbss";
+ flags |= SECTION_BSS;
+ break;
+ case SECCAT_RODATA:
+ case SECCAT_RODATA_MERGE_STR:
+ case SECCAT_RODATA_MERGE_STR_INIT:
+ case SECCAT_RODATA_MERGE_CONST:
+ sname = ".lrodata";
+ flags = 0;
+ break;
+ case SECCAT_SRODATA:
+ case SECCAT_SDATA:
+ case SECCAT_SBSS:
+ gcc_unreachable ();
+ case SECCAT_TEXT:
+ case SECCAT_TDATA:
+ case SECCAT_TBSS:
+ /* We don't split these for medium model. Place them into
+ default sections and hope for best. */
+ break;
+ }
+ if (sname)
+ {
+ /* We might get called with string constants, but get_named_section
+ doesn't like them as they are not DECLs. Also, we need to set
+ flags in that case. */
+ if (!DECL_P (decl))
+ return get_section (sname, flags, NULL);
+ return get_named_section (decl, sname, reloc);
+ }
+ }
+ return default_elf_select_section (decl, reloc, align);
+}
+
+/* Build up a unique section name, expressed as a
+ STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
+ RELOC indicates whether the initial value of EXP requires
+ link-time relocations. */
+
+static void
+x86_64_elf_unique_section (tree decl, int reloc)
+{
+ if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
+ && ix86_in_large_data_p (decl))
+ {
+ const char *prefix = NULL;
+ /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
+ bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
+
+ switch (categorize_decl_for_section (decl, reloc))
+ {
+ case SECCAT_DATA:
+ case SECCAT_DATA_REL:
+ case SECCAT_DATA_REL_LOCAL:
+ case SECCAT_DATA_REL_RO:
+ case SECCAT_DATA_REL_RO_LOCAL:
+ prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
+ break;
+ case SECCAT_BSS:
+ prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
+ break;
+ case SECCAT_RODATA:
+ case SECCAT_RODATA_MERGE_STR:
+ case SECCAT_RODATA_MERGE_STR_INIT:
+ case SECCAT_RODATA_MERGE_CONST:
+ prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
+ break;
+ case SECCAT_SRODATA:
+ case SECCAT_SDATA:
+ case SECCAT_SBSS:
+ gcc_unreachable ();
+ case SECCAT_TEXT:
+ case SECCAT_TDATA:
+ case SECCAT_TBSS:
+ /* We don't split these for medium model. Place them into
+ default sections and hope for best. */
+ break;
+ }
+ if (prefix)
+ {
+ const char *name;
+ size_t nlen, plen;
+ char *string;
+ plen = strlen (prefix);
+
+ name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
+ name = targetm.strip_name_encoding (name);
+ nlen = strlen (name);
+
+ string = alloca (nlen + plen + 1);
+ memcpy (string, prefix, plen);
+ memcpy (string + plen, name, nlen + 1);
+
+ DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
+ return;
+ }
+ }
+ default_unique_section (decl, reloc);
+}
+
+#ifdef COMMON_ASM_OP
+/* This says how to output assembler code to declare an
+ uninitialized external linkage data object.
+
+ For medium model x86-64 we need to use .largecomm opcode for
+ large objects. */
+void
+x86_elf_aligned_common (FILE *file,
+ const char *name, unsigned HOST_WIDE_INT size,
+ int align)
+{
+ if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
+ && size > (unsigned int)ix86_section_threshold)
+ fprintf (file, ".largecomm\t");
+ else
+ fprintf (file, "%s", COMMON_ASM_OP);
+ assemble_name (file, name);
+ fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
+ size, align / BITS_PER_UNIT);
+}
+
+/* Utility function for targets to use in implementing
+ ASM_OUTPUT_ALIGNED_BSS. */
+
+void
+x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
+ const char *name, unsigned HOST_WIDE_INT size,
+ int align)
+{
+ if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
+ && size > (unsigned int)ix86_section_threshold)
+ switch_to_section (get_named_section (decl, ".lbss", 0));
+ else
+ switch_to_section (bss_section);
+ ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
+#ifdef ASM_DECLARE_OBJECT_NAME
+ last_assemble_variable_decl = decl;
+ ASM_DECLARE_OBJECT_NAME (file, name, decl);
+#else
+ /* Standard thing is just output label for the object. */
+ ASM_OUTPUT_LABEL (file, name);
+#endif /* ASM_DECLARE_OBJECT_NAME */
+ ASM_OUTPUT_SKIP (file, size ? size : 1);
+}
+#endif
+
+void
+optimization_options (int level, int size ATTRIBUTE_UNUSED)
+{
+ /* APPLE LOCAL begin disable strict aliasing; breaks too much existing code. */
+#if TARGET_MACHO
+ flag_strict_aliasing = 0;
+#endif
+ /* APPLE LOCAL end disable strict aliasing; breaks too much existing code. */
+ /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
+ make the problem with not enough registers even worse. */
+#ifdef INSN_SCHEDULING
+ if (level > 1)
+ flag_schedule_insns = 0;
+#endif
+
+ /* APPLE LOCAL begin pragma fenv */
+ /* Trapping math is not needed by many users, and is expensive.
+ C99 permits us to default it off and we do that. It is
+ turned on when <fenv.h> is included (see darwin_pragma_fenv
+ in darwin-c.c). */
+ flag_trapping_math = 0;
+ /* APPLE LOCAL end pragma fenv */
+
+ if (TARGET_MACHO)
+ /* The Darwin libraries never set errno, so we might as well
+ avoid calling them when that's the only reason we would. */
+ flag_errno_math = 0;
+
+ /* The default values of these switches depend on the TARGET_64BIT
+ that is not known at this moment. Mark these values with 2 and
+ let user the to override these. In case there is no command line option
+ specifying them, we will set the defaults in override_options. */
+ if (optimize >= 1)
+ flag_omit_frame_pointer = 2;
+ flag_pcc_struct_return = 2;
+ flag_asynchronous_unwind_tables = 2;
+#ifdef SUBTARGET_OPTIMIZATION_OPTIONS
+ SUBTARGET_OPTIMIZATION_OPTIONS;
+#endif
+ /* APPLE LOCAL begin 4200243 */
+ if (getenv ("RC_FORCE_SSE3"))
+ target_flags |= MASK_SSE3;
+}
+/* APPLE LOCAL end 4200243 */
+
+/* APPLE LOCAL begin optimization pragmas 3124235/3420242 */
+/* Version of the above for use from #pragma optimization_level. Only
+ per-function flags are reset. */
+#if TARGET_MACHO
+void
+reset_optimization_options (int level, int size)
+{
+ /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
+ make the problem with not enough registers even worse. */
+#ifdef INSN_SCHEDULING
+ if (level > 1)
+ flag_schedule_insns = 0;
+#endif
+
+ /* APPLE LOCAL begin pragma fenv */
+ /* Trapping math is not needed by many users, and is expensive.
+ C99 permits us to default it off and we do that. It is
+ turned on when <fenv.h> is included (see darwin_pragma_fenv
+ in darwin-c.c). */
+ flag_trapping_math = 0;
+ /* APPLE LOCAL end pragma fenv */
+
+ /* The default values of these switches depend on TARGET_64BIT
+ which was set earlier and not reset. */
+ if (optimize >= 1)
+ {
+ if (TARGET_64BIT)
+ flag_omit_frame_pointer = 1;
+ else
+ flag_omit_frame_pointer = 0;
+ }
+#ifdef SUBTARGET_OPTIMIZATION_OPTIONS
+ SUBTARGET_OPTIMIZATION_OPTIONS;
+#endif
+ /* APPLE LOCAL begin 4760857 */
+ if (size)
+ ix86_cost = &size_cost;
+ else
+ ix86_cost = processor_target_table[ix86_tune].cost;
+
+ /* Default align_* from the processor table. */
+ if (align_loops == 0)
+ {
+ align_loops = processor_target_table[ix86_tune].align_loop;
+ align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
+ }
+ if (align_jumps == 0)
+ {
+ align_jumps = processor_target_table[ix86_tune].align_jump;
+ align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
+ }
+ /* APPLE LOCAL end 4760857 */
+}
+#endif
+/* APPLE LOCAL end optimization pragmas 3124235/3420242 */
+
+/* Table of valid machine attributes. */
+const struct attribute_spec ix86_attribute_table[] =
+{
+ /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
+ /* Stdcall attribute says callee is responsible for popping arguments
+ if they are not variable. */
+ { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
+ /* Fastcall attribute says callee is responsible for popping arguments
+ if they are not variable. */
+ { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
+ /* Cdecl attribute says the callee is a normal C declaration */
+ { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
+ /* Regparm attribute specifies how many integer arguments are to be
+ passed in registers. */
+ { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
+ /* APPLE LOCAL begin regparmandstackparm */
+ /* regparmandstackparm means two entry points; a traditional stack-based
+ one, and another, with a mangled name, that employs regparm and
+ sseregparm. */
+ { "regparmandstackparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
+ { "regparmandstackparmee", 0, 0, false, true, true, ix86_handle_cconv_attribute },
+ /* APPLE LOCAL end regparmandstackparm */
+ /* Sseregparm attribute says we are using x86_64 calling conventions
+ for FP arguments. */
+ { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
+ /* force_align_arg_pointer says this function realigns the stack at entry. */
+ { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
+ false, true, true, ix86_handle_cconv_attribute },
+#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
+ { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
+ { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
+ { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
+#endif
+ { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
+ { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
+#ifdef SUBTARGET_ATTRIBUTE_TABLE
+ SUBTARGET_ATTRIBUTE_TABLE,
+#endif
+ { NULL, 0, 0, false, false, false, NULL }
+};
+
+/* Decide whether we can make a sibling call to a function. DECL is the
+ declaration of the function being targeted by the call and EXP is the
+ CALL_EXPR representing the call. */
+
+static bool
+ix86_function_ok_for_sibcall (tree decl, tree exp)
+{
+ tree func;
+ rtx a, b;
+
+ /* APPLE LOCAL begin indirect sibcall 4087330 */
+ /* If we are generating position-independent code, we cannot sibcall
+ optimize any indirect call, or a direct call to a global function,
+ as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
+ if (!TARGET_MACHO
+ && !TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
+ return false;
+ /* APPLE LOCAL end indirect sibcall 4087330 */
+
+ if (decl)
+ func = decl;
+ else
+ {
+ func = TREE_TYPE (TREE_OPERAND (exp, 0));
+ if (POINTER_TYPE_P (func))
+ func = TREE_TYPE (func);
+ }
+
+ /* Check that the return value locations are the same. Like
+ if we are returning floats on the 80387 register stack, we cannot
+ make a sibcall from a function that doesn't return a float to a
+ function that does or, conversely, from a function that does return
+ a float to a function that doesn't; the necessary stack adjustment
+ would not be executed. This is also the place we notice
+ differences in the return value ABI. Note that it is ok for one
+ of the functions to have void return type as long as the return
+ value of the other is passed in a register. */
+ a = ix86_function_value (TREE_TYPE (exp), func, false);
+ b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
+ cfun->decl, false);
+ if (STACK_REG_P (a) || STACK_REG_P (b))
+ {
+ if (!rtx_equal_p (a, b))
+ return false;
+ }
+ else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
+ ;
+ else if (!rtx_equal_p (a, b))
+ return false;
+
+ /* If this call is indirect, we'll need to be able to use a call-clobbered
+ register for the address of the target function. Make sure that all
+ such registers are not used for passing parameters. */
+ if (!decl && !TARGET_64BIT)
+ {
+ tree type;
+
+ /* We're looking at the CALL_EXPR, we need the type of the function. */
+ type = TREE_OPERAND (exp, 0); /* pointer expression */
+ type = TREE_TYPE (type); /* pointer type */
+ type = TREE_TYPE (type); /* function type */
+
+ if (ix86_function_regparm (type, NULL) >= 3)
+ {
+ /* ??? Need to count the actual number of registers to be used,
+ not the possible number of registers. Fix later. */
+ return false;
+ }
+ }
+
+#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
+ /* Dllimport'd functions are also called indirectly. */
+ if (decl && DECL_DLLIMPORT_P (decl)
+ && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
+ return false;
+#endif
+
+ /* If we forced aligned the stack, then sibcalling would unalign the
+ stack, which may break the called function. */
+ if (cfun->machine->force_align_arg_pointer)
+ return false;
+
+ /* Otherwise okay. That also includes certain types of indirect calls. */
+ return true;
+}
+
+/* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
+ calling convention attributes;
+ arguments as in struct attribute_spec.handler. */
+
+static tree
+ix86_handle_cconv_attribute (tree *node, tree name,
+ tree args,
+ int flags ATTRIBUTE_UNUSED,
+ bool *no_add_attrs)
+{
+ if (TREE_CODE (*node) != FUNCTION_TYPE
+ && TREE_CODE (*node) != METHOD_TYPE
+ && TREE_CODE (*node) != FIELD_DECL
+ && TREE_CODE (*node) != TYPE_DECL)
+ {
+ warning (OPT_Wattributes, "%qs attribute only applies to functions",
+ IDENTIFIER_POINTER (name));
+ *no_add_attrs = true;
+ return NULL_TREE;
+ }
+
+ /* Can combine regparm with all attributes but fastcall. */
+ if (is_attribute_p ("regparm", name))
+ {
+ tree cst;
+
+ if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
+ {
+ error ("fastcall and regparm attributes are not compatible");
+ }
+
+ /* APPLE LOCAL begin regparmandstackparm */
+ if (!TARGET_64BIT
+ && (lookup_attribute ("regparmandstackparm", TYPE_ATTRIBUTES (*node))
+ || lookup_attribute ("regparmandstackparmee", TYPE_ATTRIBUTES (*node))))
+ {
+ error ("regparmandstackparm and regparm attributes are not compatible");
+ }
+ /* APPLE LOCAL end regparmandstackparm */
+
+ cst = TREE_VALUE (args);
+ if (TREE_CODE (cst) != INTEGER_CST)
+ {
+ warning (OPT_Wattributes,
+ "%qs attribute requires an integer constant argument",
+ IDENTIFIER_POINTER (name));
+ *no_add_attrs = true;
+ }
+ else if (compare_tree_int (cst, REGPARM_MAX) > 0)
+ {
+ warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
+ IDENTIFIER_POINTER (name), REGPARM_MAX);
+ *no_add_attrs = true;
+ }
+
+ if (!TARGET_64BIT
+ && lookup_attribute (ix86_force_align_arg_pointer_string,
+ TYPE_ATTRIBUTES (*node))
+ && compare_tree_int (cst, REGPARM_MAX-1))
+ {
+ error ("%s functions limited to %d register parameters",
+ ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
+ }
+
+ return NULL_TREE;
+ }
+
+ /* APPLE LOCAL begin 5612787 mainline sse4 */
+ /* Turn on popcnt instruction for -msse4.2 or -mabm. */
+ if (TARGET_SSE4_2)
+ x86_popcnt = true;
+ /* APPLE LOCAL end 5612787 mainline sse4 */
+
+ if (TARGET_64BIT)
+ {
+ warning (OPT_Wattributes, "%qs attribute ignored",
+ IDENTIFIER_POINTER (name));
+ *no_add_attrs = true;
+ return NULL_TREE;
+ }
+
+ /* Can combine fastcall with stdcall (redundant) and sseregparm. */
+ /* APPLE LOCAL begin regparmandstackparm */
+ if (is_attribute_p ("fastcall", name)
+ || is_attribute_p ("regparmandstackparm", name))
+ /* APPLE LOCAL end regparmandstackparm */
+ {
+ if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
+ {
+ error ("fastcall and cdecl attributes are not compatible");
+ }
+ if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
+ {
+ error ("fastcall and stdcall attributes are not compatible");
+ }
+ if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
+ {
+ error ("fastcall and regparm attributes are not compatible");
+ }
+ }
+
+ /* Can combine stdcall with fastcall (redundant), regparm and
+ sseregparm. */
+ else if (is_attribute_p ("stdcall", name))
+ {
+ if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
+ {
+ error ("stdcall and cdecl attributes are not compatible");
+ }
+ /* APPLE LOCAL begin regparmandstackparm */
+ if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node))
+ || lookup_attribute ("regparmandstackparm", TYPE_ATTRIBUTES (*node)))
+ /* APPLE LOCAL end regparmandstackparm */
+ {
+ error ("stdcall and fastcall attributes are not compatible");
+ }
+ }
+
+ /* Can combine cdecl with regparm and sseregparm. */
+ else if (is_attribute_p ("cdecl", name))
+ {
+ if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
+ {
+ error ("stdcall and cdecl attributes are not compatible");
+ }
+ if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
+ {
+ error ("fastcall and cdecl attributes are not compatible");
+ }
+ }
+
+ /* Can combine sseregparm with all attributes. */
+
+ return NULL_TREE;
+}
+
+/* Return 0 if the attributes for two types are incompatible, 1 if they
+ are compatible, and 2 if they are nearly compatible (which causes a
+ warning to be generated). */
+
+static int
+ix86_comp_type_attributes (tree type1, tree type2)
+{
+ /* Check for mismatch of non-default calling convention. */
+ const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
+
+ if (TREE_CODE (type1) != FUNCTION_TYPE)
+ return 1;
+
+ /* Check for mismatched fastcall/regparm types. */
+ if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
+ != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
+ || (ix86_function_regparm (type1, NULL)
+ != ix86_function_regparm (type2, NULL)))
+ return 0;
+
+ /* Check for mismatched sseregparm types. */
+ if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
+ != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
+ return 0;
+
+ /* Check for mismatched return types (cdecl vs stdcall). */
+ if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
+ != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
+ return 0;
+
+ return 1;
+}
+
+/* Return the regparm value for a function with the indicated TYPE and DECL.
+ DECL may be NULL when calling function indirectly
+ or considering a libcall. */
+
+static int
+ix86_function_regparm (tree type, tree decl)
+{
+ tree attr;
+ /* APPLE LOCAL begin MERGE FIXME audit to ensure that it's ok
+
+ We had local_regparm but the FSF didn't and there didn't seem to
+ be a merge conflict some something is strange. These seem to be just
+ normal apple local changes. I asked Stuart about them in email. */
+ int regparm = ix86_regparm;
+ bool user_convention = false;
+
+ if (!TARGET_64BIT)
+ {
+ attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
+ if (attr)
+ {
+ regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
+ user_convention = true;
+ }
+
+ /* APPLE LOCAL begin regparmandstackparm */
+ if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type))
+ || lookup_attribute ("regparmandstackparmee", TYPE_ATTRIBUTES (type)))
+ /* APPLE LOCAL end regparmandstackparm */
+ {
+ regparm = 2;
+ user_convention = true;
+ }
+
+ /* Use register calling convention for local functions when possible. */
+ if (!TARGET_64BIT && !user_convention && decl
+ && flag_unit_at_a_time && !profile_flag)
+ {
+ struct cgraph_local_info *i = cgraph_local_info (decl);
+ if (i && i->local)
+ {
+ int local_regparm, globals = 0, regno;
+
+ /* Make sure no regparm register is taken by a global register
+ variable. */
+ for (local_regparm = 0; local_regparm < 3; local_regparm++)
+ if (global_regs[local_regparm])
+ break;
+ /* We can't use regparm(3) for nested functions as these use
+ static chain pointer in third argument. */
+ if (local_regparm == 3
+ /* APPLE LOCAL begin mainline */
+ && (decl_function_context (decl)
+ || ix86_force_align_arg_pointer)
+ /* APPLE LOCAL end mainline */
+ && !DECL_NO_STATIC_CHAIN (decl))
+ local_regparm = 2;
+ /* If the function realigns its stackpointer, the
+ prologue will clobber %ecx. If we've already
+ generated code for the callee, the callee
+ DECL_STRUCT_FUNCTION is gone, so we fall back to
+ scanning the attributes for the self-realigning
+ property. */
+ if ((DECL_STRUCT_FUNCTION (decl)
+ /* MERGE FIXME was in our version, but not in FSF 2006-05-23 */
+ && DECL_STRUCT_FUNCTION (decl)->machine
+ && DECL_STRUCT_FUNCTION (decl)->machine->force_align_arg_pointer)
+ || (!DECL_STRUCT_FUNCTION (decl)
+ && lookup_attribute (ix86_force_align_arg_pointer_string,
+ TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
+ local_regparm = 2;
+ /* Each global register variable increases register preassure,
+ so the more global reg vars there are, the smaller regparm
+ optimization use, unless requested by the user explicitly. */
+ for (regno = 0; regno < 6; regno++)
+ if (global_regs[regno])
+ globals++;
+ local_regparm
+ = globals < local_regparm ? local_regparm - globals : 0;
+
+ if (local_regparm > regparm)
+ regparm = local_regparm;
+ }
+ }
+ }
+ /* APPLE LOCAL end MERGE FIXME audit to ensure that it's ok */
+ return regparm;
+}
+
+/* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
+ DFmode (2) arguments in SSE registers for a function with the
+ indicated TYPE and DECL. DECL may be NULL when calling function
+ indirectly or considering a libcall. Otherwise return 0. */
+
+static int
+ix86_function_sseregparm (tree type, tree decl)
+{
+ /* Use SSE registers to pass SFmode and DFmode arguments if requested
+ by the sseregparm attribute. */
+ if (TARGET_SSEREGPARM
+ || (type
+ && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
+ {
+ if (!TARGET_SSE)
+ {
+ if (decl)
+ error ("Calling %qD with attribute sseregparm without "
+ "SSE/SSE2 enabled", decl);
+ else
+ error ("Calling %qT with attribute sseregparm without "
+ "SSE/SSE2 enabled", type);
+ return 0;
+ }
+
+ return 2;
+ }
+
+ /* APPLE LOCAL begin regparmandstackparm */
+ if (type && lookup_attribute ("regparmandstackparmee", TYPE_ATTRIBUTES (type)))
+ return 2;
+ /* APPLE LOCAL end regparmandstackparm */
+
+ /* For local functions, pass up to SSE_REGPARM_MAX SFmode
+ (and DFmode for SSE2) arguments in SSE registers,
+ even for 32-bit targets. */
+ if (!TARGET_64BIT && decl
+ && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
+ {
+ struct cgraph_local_info *i = cgraph_local_info (decl);
+ if (i && i->local)
+ return TARGET_SSE2 ? 2 : 1;
+ }
+
+ return 0;
+}
+
+/* Return true if EAX is live at the start of the function. Used by
+ ix86_expand_prologue to determine if we need special help before
+ calling allocate_stack_worker. */
+
+static bool
+ix86_eax_live_at_start_p (void)
+{
+ /* Cheat. Don't bother working forward from ix86_function_regparm
+ to the function type to whether an actual argument is located in
+ eax. Instead just look at cfg info, which is still close enough
+ to correct at this point. This gives false positives for broken
+ functions that might use uninitialized data that happens to be
+ allocated in eax, but who cares? */
+ return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end, 0);
+}
+
+/* Value is the number of bytes of arguments automatically
+ popped when returning from a subroutine call.
+ FUNDECL is the declaration node of the function (as a tree),
+ FUNTYPE is the data type of the function (as a tree),
+ or for a library call it is an identifier node for the subroutine name.
+ SIZE is the number of bytes of arguments passed on the stack.
+
+ On the 80386, the RTD insn may be used to pop them if the number
+ of args is fixed, but if the number is variable then the caller
+ must pop them all. RTD can't be used for library calls now
+ because the library is compiled with the Unix compiler.
+ Use of RTD is a selectable option, since it is incompatible with
+ standard Unix calling sequences. If the option is not selected,
+ the caller must always pop the args.
+
+ The attribute stdcall is equivalent to RTD on a per module basis. */
+
+int
+ix86_return_pops_args (tree fundecl, tree funtype, int size)
+{
+ int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
+
+ /* Cdecl functions override -mrtd, and never pop the stack. */
+ if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
+
+ /* Stdcall and fastcall functions will pop the stack if not
+ variable args. */
+ if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
+ || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
+ rtd = 1;
+
+ if (rtd
+ && (TYPE_ARG_TYPES (funtype) == NULL_TREE
+ || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
+ == void_type_node)))
+ return size;
+ }
+
+ /* Lose any fake structure return argument if it is passed on the stack. */
+ if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
+ && !TARGET_64BIT
+ && !KEEP_AGGREGATE_RETURN_POINTER)
+ {
+ int nregs = ix86_function_regparm (funtype, fundecl);
+
+ if (!nregs)
+ return GET_MODE_SIZE (Pmode);
+ }
+
+ return 0;
+}
+
+/* Argument support functions. */
+
+/* Return true when register may be used to pass function parameters. */
+bool
+ix86_function_arg_regno_p (int regno)
+{
+ int i;
+ if (!TARGET_64BIT)
+ {
+ if (TARGET_MACHO)
+ return (regno < REGPARM_MAX
+ || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
+ else
+ return (regno < REGPARM_MAX
+ || (TARGET_MMX && MMX_REGNO_P (regno)
+ && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
+ || (TARGET_SSE && SSE_REGNO_P (regno)
+ && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
+ }
+
+ if (TARGET_MACHO)
+ {
+ if (SSE_REGNO_P (regno) && TARGET_SSE)
+ return true;
+ }
+ else
+ {
+ if (TARGET_SSE && SSE_REGNO_P (regno)
+ && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
+ return true;
+ }
+ /* RAX is used as hidden argument to va_arg functions. */
+ if (!regno)
+ return true;
+ for (i = 0; i < REGPARM_MAX; i++)
+ if (regno == x86_64_int_parameter_registers[i])
+ return true;
+ return false;
+}
+
+/* Return if we do not know how to pass TYPE solely in registers. */
+
+static bool
+ix86_must_pass_in_stack (enum machine_mode mode, tree type)
+{
+ if (must_pass_in_stack_var_size_or_pad (mode, type))
+ return true;
+
+ /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
+ The layout_type routine is crafty and tries to trick us into passing
+ currently unsupported vector types on the stack by using TImode. */
+ return (!TARGET_64BIT && mode == TImode
+ && type && TREE_CODE (type) != VECTOR_TYPE);
+}
+
+/* Initialize a variable CUM of type CUMULATIVE_ARGS
+ for a call to a function whose data type is FNTYPE.
+ For a library call, FNTYPE is 0. */
+
+void
+init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
+ tree fntype, /* tree ptr for function decl */
+ rtx libname, /* SYMBOL_REF of library name or 0 */
+ tree fndecl)
+{
+ static CUMULATIVE_ARGS zero_cum;
+ tree param, next_param;
+
+ if (TARGET_DEBUG_ARG)
+ {
+ fprintf (stderr, "\ninit_cumulative_args (");
+ if (fntype)
+ fprintf (stderr, "fntype code = %s, ret code = %s",
+ tree_code_name[(int) TREE_CODE (fntype)],
+ tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
+ else
+ fprintf (stderr, "no fntype");
+
+ if (libname)
+ fprintf (stderr, ", libname = %s", XSTR (libname, 0));
+ }
+
+ *cum = zero_cum;
+
+ /* Set up the number of registers to use for passing arguments. */
+ cum->nregs = ix86_regparm;
+ if (TARGET_SSE)
+ cum->sse_nregs = SSE_REGPARM_MAX;
+ if (TARGET_MMX)
+ cum->mmx_nregs = MMX_REGPARM_MAX;
+ cum->warn_sse = true;
+ cum->warn_mmx = true;
+ cum->maybe_vaarg = false;
+
+ /* Use ecx and edx registers if function has fastcall attribute,
+ else look for regparm information. */
+ if (fntype && !TARGET_64BIT)
+ {
+ if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
+ {
+ cum->nregs = 2;
+ cum->fastcall = 1;
+ }
+ else
+ cum->nregs = ix86_function_regparm (fntype, fndecl);
+ }
+
+ /* Set up the number of SSE registers used for passing SFmode
+ and DFmode arguments. Warn for mismatching ABI. */
+ cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
+
+ /* Determine if this function has variable arguments. This is
+ indicated by the last argument being 'void_type_mode' if there
+ are no variable arguments. If there are variable arguments, then
+ we won't pass anything in registers in 32-bit mode. */
+
+ if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
+ {
+ for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
+ param != 0; param = next_param)
+ {
+ next_param = TREE_CHAIN (param);
+ if (next_param == 0 && TREE_VALUE (param) != void_type_node)
+ {
+ if (!TARGET_64BIT)
+ {
+ cum->nregs = 0;
+ cum->sse_nregs = 0;
+ cum->mmx_nregs = 0;
+ cum->warn_sse = 0;
+ cum->warn_mmx = 0;
+ cum->fastcall = 0;
+ cum->float_in_sse = 0;
+ }
+ cum->maybe_vaarg = true;
+ }
+ }
+ }
+ if ((!fntype && !libname)
+ || (fntype && !TYPE_ARG_TYPES (fntype)))
+ cum->maybe_vaarg = true;
+
+ if (TARGET_DEBUG_ARG)
+ fprintf (stderr, ", nregs=%d )\n", cum->nregs);
+
+ return;
+}
+
+/* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
+ But in the case of vector types, it is some vector mode.
+
+ When we have only some of our vector isa extensions enabled, then there
+ are some modes for which vector_mode_supported_p is false. For these
+ modes, the generic vector support in gcc will choose some non-vector mode
+ in order to implement the type. By computing the natural mode, we'll
+ select the proper ABI location for the operand and not depend on whatever
+ the middle-end decides to do with these vector types. */
+
+static enum machine_mode
+type_natural_mode (tree type)
+{
+ enum machine_mode mode = TYPE_MODE (type);
+
+ if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
+ {
+ HOST_WIDE_INT size = int_size_in_bytes (type);
+ if ((size == 8 || size == 16)
+ /* ??? Generic code allows us to create width 1 vectors. Ignore. */
+ && TYPE_VECTOR_SUBPARTS (type) > 1)
+ {
+ enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
+
+ if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
+ mode = MIN_MODE_VECTOR_FLOAT;
+ else
+ mode = MIN_MODE_VECTOR_INT;
+
+ /* Get the mode which has this inner mode and number of units. */
+ for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
+ if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
+ && GET_MODE_INNER (mode) == innermode)
+ return mode;
+
+ gcc_unreachable ();
+ }
+ }
+ /* APPLE LOCAL begin 4656532 use V1DImode for _m64 */
+ /* Pass V1DImode objects as DImode. This is for compatibility. */
+ if (TREE_CODE (type) == VECTOR_TYPE && mode == V1DImode && !TARGET_64BIT)
+ return DImode;
+ /* APPLE LOCAL end 4656532 use V1DImode for _m64 */
+
+ return mode;
+}
+
+/* We want to pass a value in REGNO whose "natural" mode is MODE. However,
+ this may not agree with the mode that the type system has chosen for the
+ register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
+ go ahead and use it. Otherwise we have to build a PARALLEL instead. */
+
+static rtx
+gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
+ unsigned int regno)
+{
+ rtx tmp;
+
+ if (orig_mode != BLKmode)
+ tmp = gen_rtx_REG (orig_mode, regno);
+ else
+ {
+ tmp = gen_rtx_REG (mode, regno);
+ tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
+ tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
+ }
+
+ return tmp;
+}
+
+/* x86-64 register passing implementation. See x86-64 ABI for details. Goal
+ of this code is to classify each 8bytes of incoming argument by the register
+ class and assign registers accordingly. */
+
+/* Return the union class of CLASS1 and CLASS2.
+ See the x86-64 PS ABI for details. */
+
+static enum x86_64_reg_class
+merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
+{
+ /* Rule #1: If both classes are equal, this is the resulting class. */
+ if (class1 == class2)
+ return class1;
+
+ /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
+ the other class. */
+ if (class1 == X86_64_NO_CLASS)
+ return class2;
+ if (class2 == X86_64_NO_CLASS)
+ return class1;
+
+ /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
+ if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
+ return X86_64_MEMORY_CLASS;
+
+ /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
+ if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
+ || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
+ return X86_64_INTEGERSI_CLASS;
+ if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
+ || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
+ return X86_64_INTEGER_CLASS;
+
+ /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
+ MEMORY is used. */
+ if (class1 == X86_64_X87_CLASS
+ || class1 == X86_64_X87UP_CLASS
+ || class1 == X86_64_COMPLEX_X87_CLASS
+ || class2 == X86_64_X87_CLASS
+ || class2 == X86_64_X87UP_CLASS
+ || class2 == X86_64_COMPLEX_X87_CLASS)
+ return X86_64_MEMORY_CLASS;
+
+ /* Rule #6: Otherwise class SSE is used. */
+ return X86_64_SSE_CLASS;
+}
+
+/* Classify the argument of type TYPE and mode MODE.
+ CLASSES will be filled by the register class used to pass each word
+ of the operand. The number of words is returned. In case the parameter
+ should be passed in memory, 0 is returned. As a special case for zero
+ sized containers, classes[0] will be NO_CLASS and 1 is returned.
+
+ BIT_OFFSET is used internally for handling records and specifies offset
+ of the offset in bits modulo 256 to avoid overflow cases.
+
+ See the x86-64 PS ABI for details.
+*/
+
+static int
+classify_argument (enum machine_mode mode, tree type,
+ enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
+{
+ HOST_WIDE_INT bytes =
+ (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
+ int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+
+ /* Variable sized entities are always passed/returned in memory. */
+ if (bytes < 0)
+ return 0;
+
+ if (mode != VOIDmode
+ && targetm.calls.must_pass_in_stack (mode, type))
+ return 0;
+
+ if (type && AGGREGATE_TYPE_P (type))
+ {
+ int i;
+ tree field;
+ enum x86_64_reg_class subclasses[MAX_CLASSES];
+
+ /* On x86-64 we pass structures larger than 16 bytes on the stack. */
+ if (bytes > 16)
+ return 0;
+
+ for (i = 0; i < words; i++)
+ classes[i] = X86_64_NO_CLASS;
+
+ /* Zero sized arrays or structures are NO_CLASS. We return 0 to
+ signalize memory class, so handle it as special case. */
+ if (!words)
+ {
+ classes[0] = X86_64_NO_CLASS;
+ return 1;
+ }
+
+ /* Classify each field of record and merge classes. */
+ switch (TREE_CODE (type))
+ {
+ case RECORD_TYPE:
+ /* For classes first merge in the field of the subclasses. */
+ if (TYPE_BINFO (type))
+ {
+ tree binfo, base_binfo;
+ int basenum;
+
+ for (binfo = TYPE_BINFO (type), basenum = 0;
+ BINFO_BASE_ITERATE (binfo, basenum, base_binfo); basenum++)
+ {
+ int num;
+ int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
+ tree type = BINFO_TYPE (base_binfo);
+
+ num = classify_argument (TYPE_MODE (type),
+ type, subclasses,
+ (offset + bit_offset) % 256);
+ if (!num)
+ return 0;
+ for (i = 0; i < num; i++)
+ {
+ int pos = (offset + (bit_offset % 64)) / 8 / 8;
+ classes[i + pos] =
+ merge_classes (subclasses[i], classes[i + pos]);
+ }
+ }
+ }
+ /* And now merge the fields of structure. */
+ for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
+ {
+ if (TREE_CODE (field) == FIELD_DECL)
+ {
+ int num;
+
+ if (TREE_TYPE (field) == error_mark_node)
+ continue;
+
+ /* Bitfields are always classified as integer. Handle them
+ early, since later code would consider them to be
+ misaligned integers. */
+ if (DECL_BIT_FIELD (field))
+ {
+ for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
+ i < ((int_bit_position (field) + (bit_offset % 64))
+ + tree_low_cst (DECL_SIZE (field), 0)
+ + 63) / 8 / 8; i++)
+ classes[i] =
+ merge_classes (X86_64_INTEGER_CLASS,
+ classes[i]);
+ }
+ else
+ {
+ num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
+ TREE_TYPE (field), subclasses,
+ (int_bit_position (field)
+ + bit_offset) % 256);
+ if (!num)
+ return 0;
+ for (i = 0; i < num; i++)
+ {
+ int pos =
+ (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
+ classes[i + pos] =
+ merge_classes (subclasses[i], classes[i + pos]);
+ }
+ }
+ }
+ }
+ break;
+
+ case ARRAY_TYPE:
+ /* Arrays are handled as small records. */
+ {
+ int num;
+ num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
+ TREE_TYPE (type), subclasses, bit_offset);
+ if (!num)
+ return 0;
+
+ /* The partial classes are now full classes. */
+ if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
+ subclasses[0] = X86_64_SSE_CLASS;
+ if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
+ subclasses[0] = X86_64_INTEGER_CLASS;
+
+ for (i = 0; i < words; i++)
+ classes[i] = subclasses[i % num];
+
+ break;
+ }
+ case UNION_TYPE:
+ case QUAL_UNION_TYPE:
+ /* Unions are similar to RECORD_TYPE but offset is always 0.
+ */
+
+ /* Unions are not derived. */
+ gcc_assert (!TYPE_BINFO (type)
+ || !BINFO_N_BASE_BINFOS (TYPE_BINFO (type)));
+ for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
+ {
+ if (TREE_CODE (field) == FIELD_DECL)
+ {
+ int num;
+
+ if (TREE_TYPE (field) == error_mark_node)
+ continue;
+
+ num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
+ TREE_TYPE (field), subclasses,
+ bit_offset);
+ if (!num)
+ return 0;
+ for (i = 0; i < num; i++)
+ classes[i] = merge_classes (subclasses[i], classes[i]);
+ }
+ }
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ /* Final merger cleanup. */
+ for (i = 0; i < words; i++)
+ {
+ /* If one class is MEMORY, everything should be passed in
+ memory. */
+ if (classes[i] == X86_64_MEMORY_CLASS)
+ return 0;
+
+ /* The X86_64_SSEUP_CLASS should be always preceded by
+ X86_64_SSE_CLASS. */
+ if (classes[i] == X86_64_SSEUP_CLASS
+ && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
+ classes[i] = X86_64_SSE_CLASS;
+
+ /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
+ if (classes[i] == X86_64_X87UP_CLASS
+ && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
+ classes[i] = X86_64_SSE_CLASS;
+ }
+ return words;
+ }
+
+ /* Compute alignment needed. We align all types to natural boundaries with
+ exception of XFmode that is aligned to 64bits. */
+ if (mode != VOIDmode && mode != BLKmode)
+ {
+ int mode_alignment = GET_MODE_BITSIZE (mode);
+
+ if (mode == XFmode)
+ mode_alignment = 128;
+ else if (mode == XCmode)
+ mode_alignment = 256;
+ if (COMPLEX_MODE_P (mode))
+ mode_alignment /= 2;
+ /* Misaligned fields are always returned in memory. */
+ if (bit_offset % mode_alignment)
+ return 0;
+ }
+
+ /* for V1xx modes, just use the base mode */
+ if (VECTOR_MODE_P (mode)
+ && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
+ mode = GET_MODE_INNER (mode);
+
+ /* Classification of atomic types. */
+ switch (mode)
+ {
+ case SDmode:
+ case DDmode:
+ classes[0] = X86_64_SSE_CLASS;
+ return 1;
+ case TDmode:
+ classes[0] = X86_64_SSE_CLASS;
+ classes[1] = X86_64_SSEUP_CLASS;
+ return 2;
+ case DImode:
+ case SImode:
+ case HImode:
+ case QImode:
+ case CSImode:
+ case CHImode:
+ case CQImode:
+ if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
+ classes[0] = X86_64_INTEGERSI_CLASS;
+ else
+ classes[0] = X86_64_INTEGER_CLASS;
+ return 1;
+ case CDImode:
+ case TImode:
+ classes[0] = classes[1] = X86_64_INTEGER_CLASS;
+ return 2;
+ case CTImode:
+ return 0;
+ case SFmode:
+ if (!(bit_offset % 64))
+ classes[0] = X86_64_SSESF_CLASS;
+ else
+ classes[0] = X86_64_SSE_CLASS;
+ return 1;
+ case DFmode:
+ classes[0] = X86_64_SSEDF_CLASS;
+ return 1;
+ case XFmode:
+ classes[0] = X86_64_X87_CLASS;
+ classes[1] = X86_64_X87UP_CLASS;
+ return 2;
+ case TFmode:
+ classes[0] = X86_64_SSE_CLASS;
+ classes[1] = X86_64_SSEUP_CLASS;
+ return 2;
+ case SCmode:
+ classes[0] = X86_64_SSE_CLASS;
+ return 1;
+ case DCmode:
+ classes[0] = X86_64_SSEDF_CLASS;
+ classes[1] = X86_64_SSEDF_CLASS;
+ return 2;
+ case XCmode:
+ classes[0] = X86_64_COMPLEX_X87_CLASS;
+ return 1;
+ case TCmode:
+ /* This modes is larger than 16 bytes. */
+ return 0;
+ case V4SFmode:
+ case V4SImode:
+ case V16QImode:
+ case V8HImode:
+ case V2DFmode:
+ case V2DImode:
+ classes[0] = X86_64_SSE_CLASS;
+ classes[1] = X86_64_SSEUP_CLASS;
+ return 2;
+ case V2SFmode:
+ case V2SImode:
+ case V4HImode:
+ case V8QImode:
+ classes[0] = X86_64_SSE_CLASS;
+ return 1;
+ case BLKmode:
+ case VOIDmode:
+ return 0;
+ default:
+ gcc_assert (VECTOR_MODE_P (mode));
+
+ if (bytes > 16)
+ return 0;
+
+ gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
+
+ if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
+ classes[0] = X86_64_INTEGERSI_CLASS;
+ else
+ classes[0] = X86_64_INTEGER_CLASS;
+ classes[1] = X86_64_INTEGER_CLASS;
+ return 1 + (bytes > 8);
+ }
+}
+
+/* Examine the argument and return set number of register required in each
+ class. Return 0 iff parameter should be passed in memory. */
+static int
+examine_argument (enum machine_mode mode, tree type, int in_return,
+ int *int_nregs, int *sse_nregs)
+{
+ enum x86_64_reg_class class[MAX_CLASSES];
+ int n = classify_argument (mode, type, class, 0);
+
+ *int_nregs = 0;
+ *sse_nregs = 0;
+ if (!n)
+ return 0;
+ for (n--; n >= 0; n--)
+ switch (class[n])
+ {
+ case X86_64_INTEGER_CLASS:
+ case X86_64_INTEGERSI_CLASS:
+ (*int_nregs)++;
+ break;
+ case X86_64_SSE_CLASS:
+ case X86_64_SSESF_CLASS:
+ case X86_64_SSEDF_CLASS:
+ (*sse_nregs)++;
+ break;
+ case X86_64_NO_CLASS:
+ case X86_64_SSEUP_CLASS:
+ break;
+ case X86_64_X87_CLASS:
+ case X86_64_X87UP_CLASS:
+ if (!in_return)
+ return 0;
+ break;
+ case X86_64_COMPLEX_X87_CLASS:
+ return in_return ? 2 : 0;
+ case X86_64_MEMORY_CLASS:
+ gcc_unreachable ();
+ }
+ return 1;
+}
+
+/* Construct container for the argument used by GCC interface. See
+ FUNCTION_ARG for the detailed description. */
+
+static rtx
+construct_container (enum machine_mode mode, enum machine_mode orig_mode,
+ tree type, int in_return, int nintregs, int nsseregs,
+ const int *intreg, int sse_regno)
+{
+ /* The following variables hold the static issued_error state. */
+ static bool issued_sse_arg_error;
+ static bool issued_sse_ret_error;
+ static bool issued_x87_ret_error;
+
+ enum machine_mode tmpmode;
+ int bytes =
+ (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
+ enum x86_64_reg_class class[MAX_CLASSES];
+ int n;
+ int i;
+ int nexps = 0;
+ int needed_sseregs, needed_intregs;
+ rtx exp[MAX_CLASSES];
+ rtx ret;
+
+ n = classify_argument (mode, type, class, 0);
+ if (TARGET_DEBUG_ARG)
+ {
+ if (!n)
+ fprintf (stderr, "Memory class\n");
+ else
+ {
+ fprintf (stderr, "Classes:");
+ for (i = 0; i < n; i++)
+ {
+ fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
+ }
+ fprintf (stderr, "\n");
+ }
+ }
+ if (!n)
+ return NULL;
+ if (!examine_argument (mode, type, in_return, &needed_intregs,
+ &needed_sseregs))
+ return NULL;
+ if (needed_intregs > nintregs || needed_sseregs > nsseregs)
+ return NULL;
+
+ /* We allowed the user to turn off SSE for kernel mode. Don't crash if
+ some less clueful developer tries to use floating-point anyway. */
+ if (needed_sseregs && !TARGET_SSE)
+ {
+ if (in_return)
+ {
+ if (!issued_sse_ret_error)
+ {
+ error ("SSE register return with SSE disabled");
+ issued_sse_ret_error = true;
+ }
+ }
+ else if (!issued_sse_arg_error)
+ {
+ error ("SSE register argument with SSE disabled");
+ issued_sse_arg_error = true;
+ }
+ return NULL;
+ }
+
+ /* Likewise, error if the ABI requires us to return values in the
+ x87 registers and the user specified -mno-80387. */
+ if (!TARGET_80387 && in_return)
+ for (i = 0; i < n; i++)
+ if (class[i] == X86_64_X87_CLASS
+ || class[i] == X86_64_X87UP_CLASS
+ || class[i] == X86_64_COMPLEX_X87_CLASS)
+ {
+ if (!issued_x87_ret_error)
+ {
+ error ("x87 register return with x87 disabled");
+ issued_x87_ret_error = true;
+ }
+ return NULL;
+ }
+
+ /* First construct simple cases. Avoid SCmode, since we want to use
+ single register to pass this type. */
+ if (n == 1 && mode != SCmode)
+ switch (class[0])
+ {
+ case X86_64_INTEGER_CLASS:
+ case X86_64_INTEGERSI_CLASS:
+ return gen_rtx_REG (mode, intreg[0]);
+ case X86_64_SSE_CLASS:
+ case X86_64_SSESF_CLASS:
+ case X86_64_SSEDF_CLASS:
+ return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
+ case X86_64_X87_CLASS:
+ case X86_64_COMPLEX_X87_CLASS:
+ return gen_rtx_REG (mode, FIRST_STACK_REG);
+ case X86_64_NO_CLASS:
+ /* Zero sized array, struct or class. */
+ return NULL;
+ default:
+ gcc_unreachable ();
+ }
+ if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
+ && mode != BLKmode)
+ return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
+ if (n == 2
+ && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
+ return gen_rtx_REG (XFmode, FIRST_STACK_REG);
+ if (n == 2 && class[0] == X86_64_INTEGER_CLASS
+ && class[1] == X86_64_INTEGER_CLASS
+ && (mode == CDImode || mode == TImode || mode == TFmode)
+ && intreg[0] + 1 == intreg[1])
+ return gen_rtx_REG (mode, intreg[0]);
+
+ /* Otherwise figure out the entries of the PARALLEL. */
+ for (i = 0; i < n; i++)
+ {
+ switch (class[i])
+ {
+ case X86_64_NO_CLASS:
+ break;
+ case X86_64_INTEGER_CLASS:
+ case X86_64_INTEGERSI_CLASS:
+ /* Merge TImodes on aligned occasions here too. */
+ if (i * 8 + 8 > bytes)
+ tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
+ else if (class[i] == X86_64_INTEGERSI_CLASS)
+ tmpmode = SImode;
+ else
+ tmpmode = DImode;
+ /* We've requested 24 bytes we don't have mode for. Use DImode. */
+ if (tmpmode == BLKmode)
+ tmpmode = DImode;
+ exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
+ gen_rtx_REG (tmpmode, *intreg),
+ GEN_INT (i*8));
+ intreg++;
+ break;
+ case X86_64_SSESF_CLASS:
+ exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
+ gen_rtx_REG (SFmode,
+ SSE_REGNO (sse_regno)),
+ GEN_INT (i*8));
+ sse_regno++;
+ break;
+ case X86_64_SSEDF_CLASS:
+ exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
+ gen_rtx_REG (DFmode,
+ SSE_REGNO (sse_regno)),
+ GEN_INT (i*8));
+ sse_regno++;
+ break;
+ case X86_64_SSE_CLASS:
+ if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
+ tmpmode = TImode;
+ else
+ tmpmode = DImode;
+ exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
+ gen_rtx_REG (tmpmode,
+ SSE_REGNO (sse_regno)),
+ GEN_INT (i*8));
+ if (tmpmode == TImode)
+ i++;
+ sse_regno++;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ }
+
+ /* Empty aligned struct, union or class. */
+ if (nexps == 0)
+ return NULL;
+
+ ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
+ for (i = 0; i < nexps; i++)
+ XVECEXP (ret, 0, i) = exp [i];
+ return ret;
+}
+
+/* Update the data in CUM to advance over an argument
+ of mode MODE and data type TYPE.
+ (TYPE is null for libcalls where that information may not be available.) */
+
+void
+function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+ tree type, int named)
+{
+ int bytes =
+ (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
+ int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+
+ if (type)
+ mode = type_natural_mode (type);
+
+ if (TARGET_DEBUG_ARG)
+ fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
+ "mode=%s, named=%d)\n\n",
+ words, cum->words, cum->nregs, cum->sse_nregs,
+ GET_MODE_NAME (mode), named);
+
+ if (TARGET_64BIT)
+ {
+ int int_nregs, sse_nregs;
+ if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
+ cum->words += words;
+ else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
+ {
+ cum->nregs -= int_nregs;
+ cum->sse_nregs -= sse_nregs;
+ cum->regno += int_nregs;
+ cum->sse_regno += sse_nregs;
+ }
+ else
+ cum->words += words;
+ }
+ else
+ {
+ switch (mode)
+ {
+ default:
+ break;
+
+ case BLKmode:
+ if (bytes < 0)
+ break;
+ /* FALLTHRU */
+
+ case DImode:
+ case SImode:
+ case HImode:
+ case QImode:
+ cum->words += words;
+ cum->nregs -= words;
+ cum->regno += words;
+
+ if (cum->nregs <= 0)
+ {
+ cum->nregs = 0;
+ cum->regno = 0;
+ }
+ break;
+
+ case DFmode:
+ if (cum->float_in_sse < 2)
+ break;
+ case SFmode:
+ if (cum->float_in_sse < 1)
+ break;
+ /* FALLTHRU */
+
+ case TImode:
+ case V16QImode:
+ case V8HImode:
+ case V4SImode:
+ case V2DImode:
+ case V4SFmode:
+ case V2DFmode:
+ if (!type || !AGGREGATE_TYPE_P (type))
+ {
+ cum->sse_words += words;
+ cum->sse_nregs -= 1;
+ cum->sse_regno += 1;
+ if (cum->sse_nregs <= 0)
+ {
+ cum->sse_nregs = 0;
+ cum->sse_regno = 0;
+ }
+ }
+ break;
+
+ case V8QImode:
+ case V4HImode:
+ case V2SImode:
+ case V2SFmode:
+ if (!type || !AGGREGATE_TYPE_P (type))
+ {
+ cum->mmx_words += words;
+ cum->mmx_nregs -= 1;
+ cum->mmx_regno += 1;
+ if (cum->mmx_nregs <= 0)
+ {
+ cum->mmx_nregs = 0;
+ cum->mmx_regno = 0;
+ }
+ }
+ break;
+ }
+ }
+}
+
+/* Define where to put the arguments to a function.
+ Value is zero to push the argument on the stack,
+ or a hard register in which to store the argument.
+
+ MODE is the argument's machine mode.
+ TYPE is the data type of the argument (as a tree).
+ This is null for libcalls where that information may
+ not be available.
+ CUM is a variable of type CUMULATIVE_ARGS which gives info about
+ the preceding args and about the function being called.
+ NAMED is nonzero if this argument is a named parameter
+ (otherwise it is an extra parameter matching an ellipsis). */
+
+rtx
+function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode,
+ tree type, int named)
+{
+ enum machine_mode mode = orig_mode;
+ rtx ret = NULL_RTX;
+ int bytes =
+ (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
+ int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+ static bool warnedsse, warnedmmx;
+
+ /* To simplify the code below, represent vector types with a vector mode
+ even if MMX/SSE are not active. */
+ if (type && TREE_CODE (type) == VECTOR_TYPE)
+ mode = type_natural_mode (type);
+
+ /* Handle a hidden AL argument containing number of registers for varargs
+ x86-64 functions. For i386 ABI just return constm1_rtx to avoid
+ any AL settings. */
+ if (mode == VOIDmode)
+ {
+ if (TARGET_64BIT)
+ return GEN_INT (cum->maybe_vaarg
+ ? (cum->sse_nregs < 0
+ ? SSE_REGPARM_MAX
+ : cum->sse_regno)
+ : -1);
+ else
+ return constm1_rtx;
+ }
+ if (TARGET_64BIT)
+ ret = construct_container (mode, orig_mode, type, 0, cum->nregs,
+ cum->sse_nregs,
+ &x86_64_int_parameter_registers [cum->regno],
+ cum->sse_regno);
+ else
+ switch (mode)
+ {
+ /* For now, pass fp/complex values on the stack. */
+ default:
+ break;
+
+ case BLKmode:
+ if (bytes < 0)
+ break;
+ /* FALLTHRU */
+ case DImode:
+ case SImode:
+ case HImode:
+ case QImode:
+ if (words <= cum->nregs)
+ {
+ int regno = cum->regno;
+
+ /* Fastcall allocates the first two DWORD (SImode) or
+ smaller arguments to ECX and EDX. */
+ if (cum->fastcall)
+ {
+ if (mode == BLKmode || mode == DImode)
+ break;
+
+ /* ECX not EAX is the first allocated register. */
+ if (regno == 0)
+ regno = 2;
+ }
+ ret = gen_rtx_REG (mode, regno);
+ }
+ break;
+ case DFmode:
+ if (cum->float_in_sse < 2)
+ break;
+ case SFmode:
+ if (cum->float_in_sse < 1)
+ break;
+ /* FALLTHRU */
+ case TImode:
+ case V16QImode:
+ case V8HImode:
+ case V4SImode:
+ case V2DImode:
+ case V4SFmode:
+ case V2DFmode:
+ if (!type || !AGGREGATE_TYPE_P (type))
+ {
+ if (!TARGET_SSE && !warnedsse && cum->warn_sse)
+ {
+ warnedsse = true;
+ warning (0, "SSE vector argument without SSE enabled "
+ "changes the ABI");
+ }
+ if (cum->sse_nregs)
+ ret = gen_reg_or_parallel (mode, orig_mode,
+ cum->sse_regno + FIRST_SSE_REG);
+ }
+ break;
+ case V8QImode:
+ case V4HImode:
+ case V2SImode:
+ case V2SFmode:
+ if (!type || !AGGREGATE_TYPE_P (type))
+ {
+ if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
+ {
+ warnedmmx = true;
+ warning (0, "MMX vector argument without MMX enabled "
+ "changes the ABI");
+ }
+ if (cum->mmx_nregs)
+ ret = gen_reg_or_parallel (mode, orig_mode,
+ cum->mmx_regno + FIRST_MMX_REG);
+ }
+ break;
+ }
+
+ if (TARGET_DEBUG_ARG)
+ {
+ fprintf (stderr,
+ "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
+ words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
+
+ if (ret)
+ print_simple_rtl (stderr, ret);
+ else
+ fprintf (stderr, ", stack");
+
+ fprintf (stderr, " )\n");
+ }
+
+ return ret;
+}
+
+/* A C expression that indicates when an argument must be passed by
+ reference. If nonzero for an argument, a copy of that argument is
+ made in memory and a pointer to the argument is passed instead of
+ the argument itself. The pointer is passed in whatever way is
+ appropriate for passing a pointer to that type. */
+
+static bool
+ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
+ enum machine_mode mode ATTRIBUTE_UNUSED,
+ tree type, bool named ATTRIBUTE_UNUSED)
+{
+ if (!TARGET_64BIT)
+ return 0;
+
+ if (type && int_size_in_bytes (type) == -1)
+ {
+ if (TARGET_DEBUG_ARG)
+ fprintf (stderr, "function_arg_pass_by_reference\n");
+ return 1;
+ }
+
+ return 0;
+}
+
+/* Return true when TYPE should be 128bit aligned for 32bit argument passing
+ ABI. Only called if TARGET_SSE. */
+static bool
+contains_128bit_aligned_vector_p (tree type)
+{
+ enum machine_mode mode = TYPE_MODE (type);
+ if (SSE_REG_MODE_P (mode)
+ && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
+ return true;
+ if (TYPE_ALIGN (type) < 128)
+ return false;
+
+ if (AGGREGATE_TYPE_P (type))
+ {
+ /* Walk the aggregates recursively. */
+ switch (TREE_CODE (type))
+ {
+ case RECORD_TYPE:
+ case UNION_TYPE:
+ case QUAL_UNION_TYPE:
+ {
+ tree field;
+
+ if (TYPE_BINFO (type))
+ {
+ tree binfo, base_binfo;
+ int i;
+
+ for (binfo = TYPE_BINFO (type), i = 0;
+ BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
+ if (contains_128bit_aligned_vector_p
+ (BINFO_TYPE (base_binfo)))
+ return true;
+ }
+ /* And now merge the fields of structure. */
+ for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
+ {
+ if (TREE_CODE (field) == FIELD_DECL
+ && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
+ return true;
+ }
+ break;
+ }
+
+ case ARRAY_TYPE:
+ /* Just for use if some languages passes arrays by value. */
+ if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
+ return true;
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+ }
+ return false;
+}
+
+/* Gives the alignment boundary, in bits, of an argument with the
+ specified mode and type. */
+
+int
+ix86_function_arg_boundary (enum machine_mode mode, tree type)
+{
+ int align;
+ if (type)
+ align = TYPE_ALIGN (type);
+ else
+ align = GET_MODE_ALIGNMENT (mode);
+ /* APPLE LOCAL begin unbreak ppc64 abi 5103220 */
+ if (type && integer_zerop (TYPE_SIZE (type)))
+ align = PARM_BOUNDARY;
+ /* APPLE LOCAL end unbreak ppc64 abi 5103220 */
+ if (align < PARM_BOUNDARY)
+ align = PARM_BOUNDARY;
+ if (!TARGET_64BIT)
+ {
+ /* i386 ABI defines all arguments to be 4 byte aligned. We have to
+ make an exception for SSE modes since these require 128bit
+ alignment.
+
+ The handling here differs from field_alignment. ICC aligns MMX
+ arguments to 4 byte boundaries, while structure fields are aligned
+ to 8 byte boundaries. */
+ if (!TARGET_SSE)
+ align = PARM_BOUNDARY;
+ else if (!type)
+ {
+ if (!SSE_REG_MODE_P (mode))
+ align = PARM_BOUNDARY;
+ }
+ else
+ {
+ if (!contains_128bit_aligned_vector_p (type))
+ align = PARM_BOUNDARY;
+ }
+ }
+ if (align > 128)
+ align = 128;
+ return align;
+}
+
+/* Return true if N is a possible register number of function value. */
+bool
+ix86_function_value_regno_p (int regno)
+{
+ if (TARGET_MACHO)
+ {
+ if (!TARGET_64BIT)
+ {
+ return ((regno) == 0
+ || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
+ || ((regno) == FIRST_SSE_REG && TARGET_SSE));
+ }
+ return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
+ || ((regno) == FIRST_SSE_REG && TARGET_SSE)
+ || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
+ }
+ else
+ {
+ if (regno == 0
+ || (regno == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
+ || (regno == FIRST_SSE_REG && TARGET_SSE))
+ return true;
+
+ if (!TARGET_64BIT
+ && (regno == FIRST_MMX_REG && TARGET_MMX))
+ return true;
+
+ return false;
+ }
+}
+
+/* Define how to find the value returned by a function.
+ VALTYPE is the data type of the value (as a tree).
+ If the precise function being called is known, FUNC is its FUNCTION_DECL;
+ otherwise, FUNC is 0. */
+rtx
+ix86_function_value (tree valtype, tree fntype_or_decl,
+ bool outgoing ATTRIBUTE_UNUSED)
+{
+ enum machine_mode natmode = type_natural_mode (valtype);
+
+ if (TARGET_64BIT)
+ {
+ rtx ret = construct_container (natmode, TYPE_MODE (valtype), valtype,
+ 1, REGPARM_MAX, SSE_REGPARM_MAX,
+ x86_64_int_return_registers, 0);
+ /* For zero sized structures, construct_container return NULL, but we
+ need to keep rest of compiler happy by returning meaningful value. */
+ if (!ret)
+ ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
+ return ret;
+ }
+ else
+ {
+ tree fn = NULL_TREE, fntype;
+ if (fntype_or_decl
+ && DECL_P (fntype_or_decl))
+ fn = fntype_or_decl;
+ fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
+ return gen_rtx_REG (TYPE_MODE (valtype),
+ ix86_value_regno (natmode, fn, fntype));
+ }
+}
+
+/* APPLE LOCAL begin radar 4781080 */
+/* Return true iff must generate call to objcMsgSend for an
+ fp-returning method. */
+bool
+ix86_objc_fpreturn_msgcall (tree ret_type, bool no_long_double)
+{
+ if (no_long_double)
+ return TARGET_64BIT && SCALAR_FLOAT_TYPE_P (ret_type)
+ && TYPE_MODE (ret_type) != XFmode;
+ else
+ return SCALAR_FLOAT_TYPE_P (ret_type);
+}
+/* APPLE LOCAL end radar 4781080 */
+
+/* Return true iff type is returned in memory. */
+int
+ix86_return_in_memory (tree type)
+{
+ int needed_intregs, needed_sseregs, size;
+ enum machine_mode mode = type_natural_mode (type);
+
+ if (TARGET_64BIT)
+ return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
+
+ if (mode == BLKmode)
+ return 1;
+
+ size = int_size_in_bytes (type);
+
+ if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
+ return 0;
+
+ if (VECTOR_MODE_P (mode) || mode == TImode)
+ {
+ /* User-created vectors small enough to fit in EAX. */
+ if (size < 8)
+ return 0;
+
+ /* MMX/3dNow values are returned in MM0,
+ except when it doesn't exits. */
+ if (size == 8)
+ /* APPLE LOCAL begin radar 4875125. */
+ /* Undo the mainline patch which broke MACHO ABI compatibility. */
+ return (TARGET_MACHO) ? 1 : (TARGET_MMX ? 0 : 1);
+ /* APPLE LOCAL end radar 4875125. */
+
+ /* SSE values are returned in XMM0, except when it doesn't exist. */
+ if (size == 16)
+ return (TARGET_SSE ? 0 : 1);
+ }
+
+ if (mode == XFmode)
+ return 0;
+
+ if (mode == TDmode)
+ return 1;
+
+ if (size > 12)
+ return 1;
+ return 0;
+}
+
+/* When returning SSE vector types, we have a choice of either
+ (1) being abi incompatible with a -march switch, or
+ (2) generating an error.
+ Given no good solution, I think the safest thing is one warning.
+ The user won't be able to use -Werror, but....
+
+ Choose the STRUCT_VALUE_RTX hook because that's (at present) only
+ called in response to actually generating a caller or callee that
+ uses such a type. As opposed to RETURN_IN_MEMORY, which is called
+ via aggregate_value_p for general type probing from tree-ssa. */
+
+static rtx
+ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
+{
+ static bool warnedsse, warnedmmx;
+
+ if (type)
+ {
+ /* Look at the return type of the function, not the function type. */
+ enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
+
+ if (!TARGET_SSE && !warnedsse)
+ {
+ if (mode == TImode
+ || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
+ {
+ warnedsse = true;
+ warning (0, "SSE vector return without SSE enabled "
+ "changes the ABI");
+ }
+ }
+
+ if (!TARGET_MMX && !warnedmmx)
+ {
+ if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
+ {
+ warnedmmx = true;
+ warning (0, "MMX vector return without MMX enabled "
+ "changes the ABI");
+ }
+ }
+ }
+
+ return NULL;
+}
+
+/* Define how to find the value returned by a library function
+ assuming the value has mode MODE. */
+rtx
+ix86_libcall_value (enum machine_mode mode)
+{
+ if (TARGET_64BIT)
+ {
+ switch (mode)
+ {
+ case SFmode:
+ case SCmode:
+ case DFmode:
+ case DCmode:
+ case TFmode:
+ case SDmode:
+ case DDmode:
+ case TDmode:
+ return gen_rtx_REG (mode, FIRST_SSE_REG);
+ case XFmode:
+ case XCmode:
+ return gen_rtx_REG (mode, FIRST_FLOAT_REG);
+ case TCmode:
+ return NULL;
+ default:
+ return gen_rtx_REG (mode, 0);
+ }
+ }
+ else
+ return gen_rtx_REG (mode, ix86_value_regno (mode, NULL, NULL));
+}
+
+/* Given a mode, return the register to use for a return value. */
+
+static int
+ix86_value_regno (enum machine_mode mode, tree func, tree fntype)
+{
+ gcc_assert (!TARGET_64BIT);
+
+ /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
+ we normally prevent this case when mmx is not available. However
+ some ABIs may require the result to be returned like DImode. */
+ if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
+ return TARGET_MMX ? FIRST_MMX_REG : 0;
+
+ /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
+ we prevent this case when sse is not available. However some ABIs
+ may require the result to be returned like integer TImode. */
+ if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
+ return TARGET_SSE ? FIRST_SSE_REG : 0;
+
+ /* Decimal floating point values can go in %eax, unlike other float modes. */
+ if (DECIMAL_FLOAT_MODE_P (mode))
+ return 0;
+
+ /* APPLE LOCAL begin regparmandstackparm */
+ if (SSE_FLOAT_MODE_P(mode)
+ && fntype && lookup_attribute ("regparmandstackparmee", TYPE_ATTRIBUTES (fntype)))
+ return FIRST_SSE_REG;
+ /* APPLE LOCAL end regparmandstackparm */
+
+ /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */
+ if (!SCALAR_FLOAT_MODE_P (mode) || !TARGET_FLOAT_RETURNS_IN_80387)
+ return 0;
+
+ /* Floating point return values in %st(0), except for local functions when
+ SSE math is enabled or for functions with sseregparm attribute. */
+ if ((func || fntype)
+ && (mode == SFmode || mode == DFmode))
+ {
+ int sse_level = ix86_function_sseregparm (fntype, func);
+ if ((sse_level >= 1 && mode == SFmode)
+ || (sse_level == 2 && mode == DFmode))
+ return FIRST_SSE_REG;
+ }
+
+ return FIRST_FLOAT_REG;
+}
+
+/* Create the va_list data type. */
+
+static tree
+ix86_build_builtin_va_list (void)
+{
+ tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
+
+ /* For i386 we use plain pointer to argument area. */
+ if (!TARGET_64BIT)
+ return build_pointer_type (char_type_node);
+
+ record = (*lang_hooks.types.make_type) (RECORD_TYPE);
+ type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
+
+ f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
+ unsigned_type_node);
+ f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
+ unsigned_type_node);
+ f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
+ ptr_type_node);
+ f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
+ ptr_type_node);
+
+ va_list_gpr_counter_field = f_gpr;
+ va_list_fpr_counter_field = f_fpr;
+
+ DECL_FIELD_CONTEXT (f_gpr) = record;
+ DECL_FIELD_CONTEXT (f_fpr) = record;
+ DECL_FIELD_CONTEXT (f_ovf) = record;
+ DECL_FIELD_CONTEXT (f_sav) = record;
+
+ TREE_CHAIN (record) = type_decl;
+ TYPE_NAME (record) = type_decl;
+ TYPE_FIELDS (record) = f_gpr;
+ TREE_CHAIN (f_gpr) = f_fpr;
+ TREE_CHAIN (f_fpr) = f_ovf;
+ TREE_CHAIN (f_ovf) = f_sav;
+
+ layout_type (record);
+
+ /* The correct type is an array type of one element. */
+ return build_array_type (record, build_index_type (size_zero_node));
+}
+
+/* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
+
+static void
+ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+ tree type, int *pretend_size ATTRIBUTE_UNUSED,
+ int no_rtl)
+{
+ CUMULATIVE_ARGS next_cum;
+ rtx save_area = NULL_RTX, mem;
+ rtx label;
+ rtx label_ref;
+ rtx tmp_reg;
+ rtx nsse_reg;
+ int set;
+ tree fntype;
+ int stdarg_p;
+ int i;
+
+ if (!TARGET_64BIT)
+ return;
+
+ if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
+ return;
+
+ /* Indicate to allocate space on the stack for varargs save area. */
+ ix86_save_varrargs_registers = 1;
+
+ cfun->stack_alignment_needed = 128;
+
+ fntype = TREE_TYPE (current_function_decl);
+ stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
+ && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
+ != void_type_node));
+
+ /* For varargs, we do not want to skip the dummy va_dcl argument.
+ For stdargs, we do want to skip the last named argument. */
+ next_cum = *cum;
+ if (stdarg_p)
+ function_arg_advance (&next_cum, mode, type, 1);
+
+ if (!no_rtl)
+ save_area = frame_pointer_rtx;
+
+ set = get_varargs_alias_set ();
+
+ for (i = next_cum.regno;
+ i < ix86_regparm
+ && i < next_cum.regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
+ i++)
+ {
+ mem = gen_rtx_MEM (Pmode,
+ plus_constant (save_area, i * UNITS_PER_WORD));
+ MEM_NOTRAP_P (mem) = 1;
+ set_mem_alias_set (mem, set);
+ emit_move_insn (mem, gen_rtx_REG (Pmode,
+ x86_64_int_parameter_registers[i]));
+ }
+
+ if (next_cum.sse_nregs && cfun->va_list_fpr_size)
+ {
+ /* Now emit code to save SSE registers. The AX parameter contains number
+ of SSE parameter registers used to call this function. We use
+ sse_prologue_save insn template that produces computed jump across
+ SSE saves. We need some preparation work to get this working. */
+
+ label = gen_label_rtx ();
+ label_ref = gen_rtx_LABEL_REF (Pmode, label);
+
+ /* Compute address to jump to :
+ label - 5*eax + nnamed_sse_arguments*5 */
+ tmp_reg = gen_reg_rtx (Pmode);
+ nsse_reg = gen_reg_rtx (Pmode);
+ emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
+ emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
+ gen_rtx_MULT (Pmode, nsse_reg,
+ GEN_INT (4))));
+ if (next_cum.sse_regno)
+ emit_move_insn
+ (nsse_reg,
+ gen_rtx_CONST (DImode,
+ gen_rtx_PLUS (DImode,
+ label_ref,
+ GEN_INT (next_cum.sse_regno * 4))));
+ else
+ emit_move_insn (nsse_reg, label_ref);
+ emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
+
+ /* Compute address of memory block we save into. We always use pointer
+ pointing 127 bytes after first byte to store - this is needed to keep
+ instruction size limited by 4 bytes. */
+ tmp_reg = gen_reg_rtx (Pmode);
+ emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
+ plus_constant (save_area,
+ 8 * REGPARM_MAX + 127)));
+ mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
+ MEM_NOTRAP_P (mem) = 1;
+ set_mem_alias_set (mem, set);
+ set_mem_align (mem, BITS_PER_WORD);
+
+ /* And finally do the dirty job! */
+ emit_insn (gen_sse_prologue_save (mem, nsse_reg,
+ GEN_INT (next_cum.sse_regno), label));
+ }
+
+}
+
+/* Implement va_start. */
+
+void
+ix86_va_start (tree valist, rtx nextarg)
+{
+ HOST_WIDE_INT words, n_gpr, n_fpr;
+ tree f_gpr, f_fpr, f_ovf, f_sav;
+ tree gpr, fpr, ovf, sav, t;
+ tree type;
+
+ /* Only 64bit target needs something special. */
+ if (!TARGET_64BIT)
+ {
+ std_expand_builtin_va_start (valist, nextarg);
+ return;
+ }
+
+ f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
+ f_fpr = TREE_CHAIN (f_gpr);
+ f_ovf = TREE_CHAIN (f_fpr);
+ f_sav = TREE_CHAIN (f_ovf);
+
+ valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
+ gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
+ fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
+ ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
+ sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
+
+ /* Count number of gp and fp argument registers used. */
+ words = current_function_args_info.words;
+ n_gpr = current_function_args_info.regno;
+ n_fpr = current_function_args_info.sse_regno;
+
+ if (TARGET_DEBUG_ARG)
+ fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
+ (int) words, (int) n_gpr, (int) n_fpr);
+
+ if (cfun->va_list_gpr_size)
+ {
+ type = TREE_TYPE (gpr);
+ t = build2 (MODIFY_EXPR, type, gpr,
+ build_int_cst (type, n_gpr * 8));
+ TREE_SIDE_EFFECTS (t) = 1;
+ expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+ }
+
+ if (cfun->va_list_fpr_size)
+ {
+ type = TREE_TYPE (fpr);
+ t = build2 (MODIFY_EXPR, type, fpr,
+ build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
+ TREE_SIDE_EFFECTS (t) = 1;
+ expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+ }
+
+ /* Find the overflow area. */
+ type = TREE_TYPE (ovf);
+ t = make_tree (type, virtual_incoming_args_rtx);
+ if (words != 0)
+ t = build2 (PLUS_EXPR, type, t,
+ build_int_cst (type, words * UNITS_PER_WORD));
+ t = build2 (MODIFY_EXPR, type, ovf, t);
+ TREE_SIDE_EFFECTS (t) = 1;
+ expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+
+ if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
+ {
+ /* Find the register save area.
+ Prologue of the function save it right above stack frame. */
+ type = TREE_TYPE (sav);
+ t = make_tree (type, frame_pointer_rtx);
+ t = build2 (MODIFY_EXPR, type, sav, t);
+ TREE_SIDE_EFFECTS (t) = 1;
+ expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+ }
+}
+
+/* Implement va_arg. */
+
+tree
+ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
+{
+ static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
+ tree f_gpr, f_fpr, f_ovf, f_sav;
+ tree gpr, fpr, ovf, sav, t;
+ int size, rsize;
+ tree lab_false, lab_over = NULL_TREE;
+ tree addr, t2;
+ rtx container;
+ int indirect_p = 0;
+ tree ptrtype;
+ enum machine_mode nat_mode;
+
+ /* Only 64bit target needs something special. */
+ if (!TARGET_64BIT)
+ return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
+
+ f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
+ f_fpr = TREE_CHAIN (f_gpr);
+ f_ovf = TREE_CHAIN (f_fpr);
+ f_sav = TREE_CHAIN (f_ovf);
+
+ valist = build_va_arg_indirect_ref (valist);
+ gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
+ fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
+ ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
+ sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
+
+ indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
+ if (indirect_p)
+ type = build_pointer_type (type);
+ size = int_size_in_bytes (type);
+ rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+
+ nat_mode = type_natural_mode (type);
+ container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
+ REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
+
+ /* Pull the value out of the saved registers. */
+
+ addr = create_tmp_var (ptr_type_node, "addr");
+ DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
+
+ if (container)
+ {
+ int needed_intregs, needed_sseregs;
+ bool need_temp;
+ tree int_addr, sse_addr;
+
+ lab_false = create_artificial_label ();
+ lab_over = create_artificial_label ();
+
+ examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
+
+ need_temp = (!REG_P (container)
+ && ((needed_intregs && TYPE_ALIGN (type) > 64)
+ || TYPE_ALIGN (type) > 128));
+
+ /* In case we are passing structure, verify that it is consecutive block
+ on the register save area. If not we need to do moves. */
+ if (!need_temp && !REG_P (container))
+ {
+ /* Verify that all registers are strictly consecutive */
+ if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
+ {
+ int i;
+
+ for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
+ {
+ rtx slot = XVECEXP (container, 0, i);
+ if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
+ || INTVAL (XEXP (slot, 1)) != i * 16)
+ need_temp = 1;
+ }
+ }
+ else
+ {
+ int i;
+
+ for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
+ {
+ rtx slot = XVECEXP (container, 0, i);
+ if (REGNO (XEXP (slot, 0)) != (unsigned int) i
+ || INTVAL (XEXP (slot, 1)) != i * 8)
+ need_temp = 1;
+ }
+ }
+ }
+ if (!need_temp)
+ {
+ int_addr = addr;
+ sse_addr = addr;
+ }
+ else
+ {
+ int_addr = create_tmp_var (ptr_type_node, "int_addr");
+ DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
+ sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
+ DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
+ }
+
+ /* First ensure that we fit completely in registers. */
+ if (needed_intregs)
+ {
+ t = build_int_cst (TREE_TYPE (gpr),
+ (REGPARM_MAX - needed_intregs + 1) * 8);
+ t = build2 (GE_EXPR, boolean_type_node, gpr, t);
+ t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
+ t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
+ gimplify_and_add (t, pre_p);
+ }
+ if (needed_sseregs)
+ {
+ t = build_int_cst (TREE_TYPE (fpr),
+ (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
+ + REGPARM_MAX * 8);
+ t = build2 (GE_EXPR, boolean_type_node, fpr, t);
+ t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
+ t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
+ gimplify_and_add (t, pre_p);
+ }
+
+ /* Compute index to start of area used for integer regs. */
+ if (needed_intregs)
+ {
+ /* int_addr = gpr + sav; */
+ t = fold_convert (ptr_type_node, gpr);
+ t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
+ t = build2 (MODIFY_EXPR, void_type_node, int_addr, t);
+ gimplify_and_add (t, pre_p);
+ }
+ if (needed_sseregs)
+ {
+ /* sse_addr = fpr + sav; */
+ t = fold_convert (ptr_type_node, fpr);
+ t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
+ t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t);
+ gimplify_and_add (t, pre_p);
+ }
+ if (need_temp)
+ {
+ int i;
+ tree temp = create_tmp_var (type, "va_arg_tmp");
+
+ /* addr = &temp; */
+ t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
+ t = build2 (MODIFY_EXPR, void_type_node, addr, t);
+ gimplify_and_add (t, pre_p);
+
+ for (i = 0; i < XVECLEN (container, 0); i++)
+ {
+ rtx slot = XVECEXP (container, 0, i);
+ rtx reg = XEXP (slot, 0);
+ enum machine_mode mode = GET_MODE (reg);
+ tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
+ tree addr_type = build_pointer_type (piece_type);
+ tree src_addr, src;
+ int src_offset;
+ tree dest_addr, dest;
+
+ if (SSE_REGNO_P (REGNO (reg)))
+ {
+ src_addr = sse_addr;
+ src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
+ }
+ else
+ {
+ src_addr = int_addr;
+ src_offset = REGNO (reg) * 8;
+ }
+ src_addr = fold_convert (addr_type, src_addr);
+ src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
+ size_int (src_offset)));
+ src = build_va_arg_indirect_ref (src_addr);
+
+ dest_addr = fold_convert (addr_type, addr);
+ dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
+ size_int (INTVAL (XEXP (slot, 1)))));
+ dest = build_va_arg_indirect_ref (dest_addr);
+
+ t = build2 (MODIFY_EXPR, void_type_node, dest, src);
+ gimplify_and_add (t, pre_p);
+ }
+ }
+
+ if (needed_intregs)
+ {
+ t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
+ build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
+ t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
+ gimplify_and_add (t, pre_p);
+ }
+ if (needed_sseregs)
+ {
+ t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
+ build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
+ t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
+ gimplify_and_add (t, pre_p);
+ }
+
+ t = build1 (GOTO_EXPR, void_type_node, lab_over);
+ gimplify_and_add (t, pre_p);
+
+ t = build1 (LABEL_EXPR, void_type_node, lab_false);
+ append_to_statement_list (t, pre_p);
+ }
+
+ /* ... otherwise out of the overflow area. */
+
+ /* Care for on-stack alignment if needed. */
+ if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
+ || integer_zerop (TYPE_SIZE (type)))
+ t = ovf;
+ else
+ {
+ HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
+ t = build2 (PLUS_EXPR, TREE_TYPE (ovf), ovf,
+ build_int_cst (TREE_TYPE (ovf), align - 1));
+ t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
+ build_int_cst (TREE_TYPE (t), -align));
+ }
+ gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
+
+ t2 = build2 (MODIFY_EXPR, void_type_node, addr, t);
+ gimplify_and_add (t2, pre_p);
+
+ t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
+ build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
+ t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
+ gimplify_and_add (t, pre_p);
+
+ if (container)
+ {
+ t = build1 (LABEL_EXPR, void_type_node, lab_over);
+ append_to_statement_list (t, pre_p);
+ }
+
+ ptrtype = build_pointer_type (type);
+ addr = fold_convert (ptrtype, addr);
+
+ if (indirect_p)
+ addr = build_va_arg_indirect_ref (addr);
+ return build_va_arg_indirect_ref (addr);
+}
+
+/* Return nonzero if OPNUM's MEM should be matched
+ in movabs* patterns. */
+
+int
+ix86_check_movabs (rtx insn, int opnum)
+{
+ rtx set, mem;
+
+ set = PATTERN (insn);
+ if (GET_CODE (set) == PARALLEL)
+ set = XVECEXP (set, 0, 0);
+ gcc_assert (GET_CODE (set) == SET);
+ mem = XEXP (set, opnum);
+ while (GET_CODE (mem) == SUBREG)
+ mem = SUBREG_REG (mem);
+ gcc_assert (GET_CODE (mem) == MEM);
+ return (volatile_ok || !MEM_VOLATILE_P (mem));
+}
+
+/* Initialize the table of extra 80387 mathematical constants. */
+
+static void
+init_ext_80387_constants (void)
+{
+ static const char * cst[5] =
+ {
+ "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
+ "0.6931471805599453094286904741849753009", /* 1: fldln2 */
+ "1.4426950408889634073876517827983434472", /* 2: fldl2e */
+ "3.3219280948873623478083405569094566090", /* 3: fldl2t */
+ "3.1415926535897932385128089594061862044", /* 4: fldpi */
+ };
+ int i;
+
+ for (i = 0; i < 5; i++)
+ {
+ real_from_string (&ext_80387_constants_table[i], cst[i]);
+ /* Ensure each constant is rounded to XFmode precision. */
+ real_convert (&ext_80387_constants_table[i],
+ XFmode, &ext_80387_constants_table[i]);
+ }
+
+ ext_80387_constants_init = 1;
+}
+
+/* Return true if the constant is something that can be loaded with
+ a special instruction. */
+
+int
+standard_80387_constant_p (rtx x)
+{
+ if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
+ return -1;
+
+ if (x == CONST0_RTX (GET_MODE (x)))
+ return 1;
+ if (x == CONST1_RTX (GET_MODE (x)))
+ return 2;
+
+ /* For XFmode constants, try to find a special 80387 instruction when
+ optimizing for size or on those CPUs that benefit from them. */
+ if (GET_MODE (x) == XFmode
+ && (optimize_size || x86_ext_80387_constants & TUNEMASK))
+ {
+ REAL_VALUE_TYPE r;
+ int i;
+
+ if (! ext_80387_constants_init)
+ init_ext_80387_constants ();
+
+ REAL_VALUE_FROM_CONST_DOUBLE (r, x);
+ for (i = 0; i < 5; i++)
+ if (real_identical (&r, &ext_80387_constants_table[i]))
+ return i + 3;
+ }
+
+ return 0;
+}
+
+/* Return the opcode of the special instruction to be used to load
+ the constant X. */
+
+const char *
+standard_80387_constant_opcode (rtx x)
+{
+ switch (standard_80387_constant_p (x))
+ {
+ case 1:
+ return "fldz";
+ case 2:
+ return "fld1";
+ case 3:
+ return "fldlg2";
+ case 4:
+ return "fldln2";
+ case 5:
+ return "fldl2e";
+ case 6:
+ return "fldl2t";
+ case 7:
+ return "fldpi";
+ default:
+ gcc_unreachable ();
+ }
+}
+
+/* Return the CONST_DOUBLE representing the 80387 constant that is
+ loaded by the specified special instruction. The argument IDX
+ matches the return value from standard_80387_constant_p. */
+
+rtx
+standard_80387_constant_rtx (int idx)
+{
+ int i;
+
+ if (! ext_80387_constants_init)
+ init_ext_80387_constants ();
+
+ switch (idx)
+ {
+ case 3:
+ case 4:
+ case 5:
+ case 6:
+ case 7:
+ i = idx - 3;
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
+ XFmode);
+}
+
+/* Return 1 if mode is a valid mode for sse. */
+static int
+standard_sse_mode_p (enum machine_mode mode)
+{
+ switch (mode)
+ {
+ case V16QImode:
+ case V8HImode:
+ case V4SImode:
+ case V2DImode:
+ case V4SFmode:
+ case V2DFmode:
+ return 1;
+
+ default:
+ return 0;
+ }
+}
+
+/* Return 1 if X is FP constant we can load to SSE register w/o using memory.
+ */
+int
+standard_sse_constant_p (rtx x)
+{
+ enum machine_mode mode = GET_MODE (x);
+
+ if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
+ return 1;
+ if (vector_all_ones_operand (x, mode)
+ && standard_sse_mode_p (mode))
+ return TARGET_SSE2 ? 2 : -1;
+
+ return 0;
+}
+
+/* Return the opcode of the special instruction to be used to load
+ the constant X. */
+
+const char *
+standard_sse_constant_opcode (rtx insn, rtx x)
+{
+ switch (standard_sse_constant_p (x))
+ {
+ case 1:
+ if (get_attr_mode (insn) == MODE_V4SF)
+ return "xorps\t%0, %0";
+ else if (get_attr_mode (insn) == MODE_V2DF)
+ return "xorpd\t%0, %0";
+ else
+ return "pxor\t%0, %0";
+ case 2:
+ return "pcmpeqd\t%0, %0";
+ }
+ gcc_unreachable ();
+}
+
+/* Returns 1 if OP contains a symbol reference */
+
+int
+symbolic_reference_mentioned_p (rtx op)
+{
+ const char *fmt;
+ int i;
+
+ if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
+ return 1;
+
+ fmt = GET_RTX_FORMAT (GET_CODE (op));
+ for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
+ {
+ if (fmt[i] == 'E')
+ {
+ int j;
+
+ for (j = XVECLEN (op, i) - 1; j >= 0; j--)
+ if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
+ return 1;
+ }
+
+ else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
+ return 1;
+ }
+
+ return 0;
+}
+
+/* Return 1 if it is appropriate to emit `ret' instructions in the
+ body of a function. Do this only if the epilogue is simple, needing a
+ couple of insns. Prior to reloading, we can't tell how many registers
+ must be saved, so return 0 then. Return 0 if there is no frame
+ marker to de-allocate. */
+
+int
+ix86_can_use_return_insn_p (void)
+{
+ struct ix86_frame frame;
+
+ if (! reload_completed || frame_pointer_needed)
+ return 0;
+
+ /* Don't allow more than 32 pop, since that's all we can do
+ with one instruction. */
+ if (current_function_pops_args
+ && current_function_args_size >= 32768)
+ return 0;
+
+ ix86_compute_frame_layout (&frame);
+ return frame.to_allocate == 0 && frame.nregs == 0;
+}
+
+/* Value should be nonzero if functions must have frame pointers.
+ Zero means the frame pointer need not be set up (and parms may
+ be accessed via the stack pointer) in functions that seem suitable. */
+
+int
+ix86_frame_pointer_required (void)
+{
+ /* If we accessed previous frames, then the generated code expects
+ to be able to access the saved ebp value in our frame. */
+ if (cfun->machine->accesses_prev_frame)
+ return 1;
+
+ /* Several x86 os'es need a frame pointer for other reasons,
+ usually pertaining to setjmp. */
+ if (SUBTARGET_FRAME_POINTER_REQUIRED)
+ return 1;
+
+ /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
+ the frame pointer by default. Turn it back on now if we've not
+ got a leaf function. */
+ if (TARGET_OMIT_LEAF_FRAME_POINTER
+ && (!current_function_is_leaf
+ || ix86_current_function_calls_tls_descriptor))
+ return 1;
+
+ if (current_function_profile)
+ return 1;
+
+ return 0;
+}
+
+/* Record that the current function accesses previous call frames. */
+
+void
+ix86_setup_frame_addresses (void)
+{
+ cfun->machine->accesses_prev_frame = 1;
+}
+
+#if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
+# define USE_HIDDEN_LINKONCE 1
+#else
+# define USE_HIDDEN_LINKONCE 0
+#endif
+
+/* APPLE LOCAL 5695218 */
+static GTY(()) int pic_labels_used;
+
+/* Fills in the label name that should be used for a pc thunk for
+ the given register. */
+
+static void
+get_pc_thunk_name (char name[32], unsigned int regno)
+{
+ gcc_assert (!TARGET_64BIT);
+
+ /* APPLE LOCAL deep branch prediction pic-base. */
+ if (USE_HIDDEN_LINKONCE || TARGET_MACHO)
+ sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
+ else
+ ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
+}
+
+
+/* This function generates code for -fpic that loads %ebx with
+ the return address of the caller and then returns. */
+
+void
+ix86_file_end (void)
+{
+ rtx xops[2];
+ int regno;
+
+ for (regno = 0; regno < 8; ++regno)
+ {
+ char name[32];
+
+ if (! ((pic_labels_used >> regno) & 1))
+ continue;
+
+ get_pc_thunk_name (name, regno);
+
+#if TARGET_MACHO
+ if (TARGET_MACHO)
+ {
+ switch_to_section (darwin_sections[text_coal_section]);
+ fputs ("\t.weak_definition\t", asm_out_file);
+ assemble_name (asm_out_file, name);
+ fputs ("\n\t.private_extern\t", asm_out_file);
+ assemble_name (asm_out_file, name);
+ fputs ("\n", asm_out_file);
+ ASM_OUTPUT_LABEL (asm_out_file, name);
+ }
+ else
+#endif
+ if (USE_HIDDEN_LINKONCE)
+ {
+ tree decl;
+
+ decl = build_decl (FUNCTION_DECL, get_identifier (name),
+ error_mark_node);
+ TREE_PUBLIC (decl) = 1;
+ TREE_STATIC (decl) = 1;
+ DECL_ONE_ONLY (decl) = 1;
+
+ (*targetm.asm_out.unique_section) (decl, 0);
+ switch_to_section (get_named_section (decl, NULL, 0));
+
+ (*targetm.asm_out.globalize_label) (asm_out_file, name);
+ fputs ("\t.hidden\t", asm_out_file);
+ assemble_name (asm_out_file, name);
+ fputc ('\n', asm_out_file);
+ ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
+ }
+ /* APPLE LOCAL begin deep branch prediction pic-base */
+#if TARGET_MACHO
+ else if (TARGET_MACHO)
+ {
+ switch_to_section (darwin_sections[text_coal_section]);
+ fputs (".weak_definition\t", asm_out_file);
+ assemble_name (asm_out_file, name);
+ fputs ("\n.private_extern\t", asm_out_file);
+ assemble_name (asm_out_file, name);
+ fputs ("\n", asm_out_file);
+ ASM_OUTPUT_LABEL (asm_out_file, name);
+ }
+#endif
+ /* APPLE LOCAL end deep branch prediction pic-base */
+ else
+ {
+ switch_to_section (text_section);
+ ASM_OUTPUT_LABEL (asm_out_file, name);
+ }
+
+ xops[0] = gen_rtx_REG (SImode, regno);
+ xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
+ output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
+ output_asm_insn ("ret", xops);
+ }
+
+ if (NEED_INDICATE_EXEC_STACK)
+ file_end_indicate_exec_stack ();
+}
+
+/* Emit code for the SET_GOT patterns. */
+
+const char *
+output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
+{
+ rtx xops[3];
+
+ xops[0] = dest;
+ xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
+
+ if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
+ {
+ xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
+
+ if (!flag_pic)
+ output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
+ else
+ /* APPLE LOCAL begin dwarf call/pop 5221468 */
+ {
+ output_asm_insn ("call\t%a2", xops);
+
+ /* If necessary, report the effect that the instruction has on
+ the unwind info. */
+#if defined (DWARF2_UNWIND_INFO)
+ if (flag_asynchronous_unwind_tables
+#if !defined (HAVE_prologue)
+ && !ACCUMULATE_OUTGOING_ARGS
+#endif
+ && dwarf2out_do_frame ())
+ {
+ rtx insn = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+ gen_rtx_PLUS (Pmode, stack_pointer_rtx,
+ GEN_INT (-4)));
+ insn = make_insn_raw (insn);
+ RTX_FRAME_RELATED_P (insn) = 1;
+ dwarf2out_frame_debug (insn, true);
+ }
+#endif
+ }
+ /* APPLE LOCAL end dwarf call/pop 5221468 */
+
+#if TARGET_MACHO
+ /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
+ is what will be referenced by the Mach-O PIC subsystem. */
+ if (!label)
+ ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
+#endif
+
+ (*targetm.asm_out.internal_label) (asm_out_file, "L",
+ CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
+
+ if (flag_pic)
+ /* APPLE LOCAL begin dwarf call/pop 5221468 */
+ {
+ output_asm_insn ("pop{l}\t%0", xops);
+
+ /* If necessary, report the effect that the instruction has on
+ the unwind info. We've already done this for delay slots
+ and call instructions. */
+#if defined (DWARF2_UNWIND_INFO)
+ if (flag_asynchronous_unwind_tables
+#if !defined (HAVE_prologue)
+ && !ACCUMULATE_OUTGOING_ARGS
+#endif
+ && dwarf2out_do_frame ())
+ {
+ rtx insn = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+ gen_rtx_PLUS (Pmode, stack_pointer_rtx,
+ GEN_INT (4)));
+ insn = make_insn_raw (insn);
+ RTX_FRAME_RELATED_P (insn) = 1;
+ dwarf2out_frame_debug (insn, true);
+ }
+#endif
+ }
+ /* APPLE LOCAL end dwarf call/pop 5221468 */
+ }
+ else
+ {
+ char name[32];
+ get_pc_thunk_name (name, REGNO (dest));
+ pic_labels_used |= 1 << REGNO (dest);
+
+ xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
+ xops[2] = gen_rtx_MEM (QImode, xops[2]);
+ output_asm_insn ("call\t%X2", xops);
+ /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
+ is what will be referenced by the Mach-O PIC subsystem. */
+#if TARGET_MACHO
+ if (!label)
+ ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
+ else
+ targetm.asm_out.internal_label (asm_out_file, "L",
+ CODE_LABEL_NUMBER (label));
+#endif
+ }
+
+ if (TARGET_MACHO)
+ return "";
+
+ if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
+ output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
+ else
+ output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
+
+ return "";
+}
+
+/* Generate an "push" pattern for input ARG. */
+
+static rtx
+gen_push (rtx arg)
+{
+ return gen_rtx_SET (VOIDmode,
+ gen_rtx_MEM (Pmode,
+ gen_rtx_PRE_DEC (Pmode,
+ stack_pointer_rtx)),
+ arg);
+}
+
+/* Return >= 0 if there is an unused call-clobbered register available
+ for the entire function. */
+
+static unsigned int
+ix86_select_alt_pic_regnum (void)
+{
+ if (current_function_is_leaf && !current_function_profile
+ && !ix86_current_function_calls_tls_descriptor)
+ {
+ int i;
+ for (i = 2; i >= 0; --i)
+ if (!regs_ever_live[i])
+ return i;
+ }
+
+ return INVALID_REGNUM;
+}
+
+/* APPLE LOCAL begin 5695218 */
+/* Reload may introduce references to the PIC base register
+ that do not directly reference pic_offset_table_rtx.
+ In the rare event we choose an alternate PIC register,
+ walk all the insns and rewrite every reference. */
+/* Run through the insns, changing references to the original
+ PIC_OFFSET_TABLE_REGNUM to our new one. */
+static void
+ix86_globally_replace_pic_reg (unsigned int new_pic_regno)
+{
+ rtx insn;
+ const int nregs = PIC_OFFSET_TABLE_REGNUM + 1;
+ rtx reg_map[FIRST_PSEUDO_REGISTER];
+ memset (reg_map, 0, nregs * sizeof (rtx));
+ pic_offset_table_rtx = gen_rtx_REG (SImode, new_pic_regno);
+ reg_map[REAL_PIC_OFFSET_TABLE_REGNUM] = pic_offset_table_rtx;
+
+ push_topmost_sequence ();
+ for (insn = get_insns (); insn != NULL; insn = NEXT_INSN (insn))
+ {
+ if (GET_CODE (insn) == INSN || GET_CODE (insn) == JUMP_INSN)
+ {
+ replace_regs (PATTERN (insn), reg_map, nregs, 1);
+ replace_regs (REG_NOTES (insn), reg_map, nregs, 1);
+ }
+#if defined (TARGET_TOC)
+ else if (GET_CODE (insn) == CALL_INSN)
+ {
+ if ( !SIBLING_CALL_P (insn))
+ abort ();
+ }
+#endif
+ }
+ pop_topmost_sequence ();
+
+ regs_ever_live[new_pic_regno] = 1;
+ regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 0;
+#if defined (TARGET_TOC)
+ cfun->machine->substitute_pic_base_reg = new_pic_regno;
+#endif
+}
+/* APPLE LOCAL end 5695218 */
+
+/* Return 1 if we need to save REGNO. */
+static int
+ix86_save_reg (unsigned int regno, int maybe_eh_return)
+{
+ /* APPLE LOCAL begin CW asm blocks */
+ /* For an asm function, we don't save any registers, instead, the
+ user is responsible. */
+ if (cfun->iasm_asm_function)
+ return 0;
+ /* APPLE LOCAL end CW asm blocks */
+
+ if (pic_offset_table_rtx
+ && regno == REAL_PIC_OFFSET_TABLE_REGNUM
+ /* APPLE LOCAL begin 5695218 */
+ && (current_function_uses_pic_offset_table
+ || current_function_profile
+ || current_function_calls_eh_return
+ || current_function_uses_const_pool))
+ /* APPLE LOCAL end 5695218 */
+ {
+ if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
+ return 0;
+ return 1;
+ }
+
+ if (current_function_calls_eh_return && maybe_eh_return)
+ {
+ unsigned i;
+ for (i = 0; ; i++)
+ {
+ unsigned test = EH_RETURN_DATA_REGNO (i);
+ if (test == INVALID_REGNUM)
+ break;
+ if (test == regno)
+ return 1;
+ }
+ }
+
+ if (cfun->machine->force_align_arg_pointer
+ && regno == REGNO (cfun->machine->force_align_arg_pointer))
+ return 1;
+
+ /* APPLE LOCAL begin 5695218 */
+ /* In order to get accurate usage info for the PIC register, we've
+ been forced to break and un-break the call_used_regs and
+ fixed_regs vectors. Ignore them when considering the PIC
+ register. */
+ if (regno == REAL_PIC_OFFSET_TABLE_REGNUM
+ && regs_ever_live[regno])
+ return 1;
+ /* APPLE LOCAL end 5695218 */
+
+ return (regs_ever_live[regno]
+ && !call_used_regs[regno]
+ && !fixed_regs[regno]
+ && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
+}
+
+/* Return number of registers to be saved on the stack. */
+
+static int
+ix86_nsaved_regs (void)
+{
+ int nregs = 0;
+ int regno;
+
+ for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
+ if (ix86_save_reg (regno, true))
+ nregs++;
+ return nregs;
+}
+
+/* Return the offset between two registers, one to be eliminated, and the other
+ its replacement, at the start of a routine. */
+
+HOST_WIDE_INT
+ix86_initial_elimination_offset (int from, int to)
+{
+ struct ix86_frame frame;
+ ix86_compute_frame_layout (&frame);
+
+ if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
+ return frame.hard_frame_pointer_offset;
+ else if (from == FRAME_POINTER_REGNUM
+ && to == HARD_FRAME_POINTER_REGNUM)
+ return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
+ else
+ {
+ gcc_assert (to == STACK_POINTER_REGNUM);
+
+ if (from == ARG_POINTER_REGNUM)
+ return frame.stack_pointer_offset;
+
+ gcc_assert (from == FRAME_POINTER_REGNUM);
+ return frame.stack_pointer_offset - frame.frame_pointer_offset;
+ }
+}
+
+/* Fill structure ix86_frame about frame of currently computed function. */
+
+static void
+ix86_compute_frame_layout (struct ix86_frame *frame)
+{
+ HOST_WIDE_INT total_size;
+ unsigned int stack_alignment_needed;
+ HOST_WIDE_INT offset;
+ unsigned int preferred_alignment;
+ HOST_WIDE_INT size = get_frame_size ();
+
+ frame->nregs = ix86_nsaved_regs ();
+ total_size = size;
+
+ stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
+ preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
+
+ /* During reload iteration the amount of registers saved can change.
+ Recompute the value as needed. Do not recompute when amount of registers
+ didn't change as reload does multiple calls to the function and does not
+ expect the decision to change within single iteration. */
+ if (!optimize_size
+ && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
+ {
+ int count = frame->nregs;
+
+ cfun->machine->use_fast_prologue_epilogue_nregs = count;
+ /* The fast prologue uses move instead of push to save registers. This
+ is significantly longer, but also executes faster as modern hardware
+ can execute the moves in parallel, but can't do that for push/pop.
+
+ Be careful about choosing what prologue to emit: When function takes
+ many instructions to execute we may use slow version as well as in
+ case function is known to be outside hot spot (this is known with
+ feedback only). Weight the size of function by number of registers
+ to save as it is cheap to use one or two push instructions but very
+ slow to use many of them. */
+ if (count)
+ count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
+ if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
+ || (flag_branch_probabilities
+ && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
+ cfun->machine->use_fast_prologue_epilogue = false;
+ else
+ cfun->machine->use_fast_prologue_epilogue
+ = !expensive_function_p (count);
+ }
+ if (TARGET_PROLOGUE_USING_MOVE
+ && cfun->machine->use_fast_prologue_epilogue)
+ frame->save_regs_using_mov = true;
+ else
+ frame->save_regs_using_mov = false;
+
+
+ /* Skip return address and saved base pointer. */
+ offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
+
+ frame->hard_frame_pointer_offset = offset;
+
+ /* Do some sanity checking of stack_alignment_needed and
+ preferred_alignment, since i386 port is the only using those features
+ that may break easily. */
+
+ gcc_assert (!size || stack_alignment_needed);
+ gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
+ gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
+ gcc_assert (stack_alignment_needed
+ <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
+
+ if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
+ stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
+
+ /* Register save area */
+ offset += frame->nregs * UNITS_PER_WORD;
+
+ /* Va-arg area */
+ if (ix86_save_varrargs_registers)
+ {
+ offset += X86_64_VARARGS_SIZE;
+ frame->va_arg_size = X86_64_VARARGS_SIZE;
+ }
+ else
+ frame->va_arg_size = 0;
+
+ /* Align start of frame for local function. */
+ frame->padding1 = ((offset + stack_alignment_needed - 1)
+ & -stack_alignment_needed) - offset;
+
+ offset += frame->padding1;
+
+ /* Frame pointer points here. */
+ frame->frame_pointer_offset = offset;
+
+ offset += size;
+
+ /* Add outgoing arguments area. Can be skipped if we eliminated
+ all the function calls as dead code.
+ Skipping is however impossible when function calls alloca. Alloca
+ expander assumes that last current_function_outgoing_args_size
+ of stack frame are unused. */
+ if (ACCUMULATE_OUTGOING_ARGS
+ && (!current_function_is_leaf || current_function_calls_alloca
+ || ix86_current_function_calls_tls_descriptor))
+ {
+ offset += current_function_outgoing_args_size;
+ frame->outgoing_arguments_size = current_function_outgoing_args_size;
+ }
+ else
+ frame->outgoing_arguments_size = 0;
+
+ /* Align stack boundary. Only needed if we're calling another function
+ or using alloca. */
+ if (!current_function_is_leaf || current_function_calls_alloca
+ || ix86_current_function_calls_tls_descriptor)
+ frame->padding2 = ((offset + preferred_alignment - 1)
+ & -preferred_alignment) - offset;
+ else
+ frame->padding2 = 0;
+
+ offset += frame->padding2;
+
+ /* We've reached end of stack frame. */
+ frame->stack_pointer_offset = offset;
+
+ /* Size prologue needs to allocate. */
+ frame->to_allocate =
+ (size + frame->padding1 + frame->padding2
+ + frame->outgoing_arguments_size + frame->va_arg_size);
+
+ if ((!frame->to_allocate && frame->nregs <= 1)
+ || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
+ frame->save_regs_using_mov = false;
+
+ if (TARGET_RED_ZONE && current_function_sp_is_unchanging
+ && current_function_is_leaf
+ && !ix86_current_function_calls_tls_descriptor)
+ {
+ frame->red_zone_size = frame->to_allocate;
+ if (frame->save_regs_using_mov)
+ frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
+ if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
+ frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
+ }
+ else
+ frame->red_zone_size = 0;
+ frame->to_allocate -= frame->red_zone_size;
+ frame->stack_pointer_offset -= frame->red_zone_size;
+#if 0
+ fprintf (stderr, "nregs: %i\n", frame->nregs);
+ fprintf (stderr, "size: %i\n", size);
+ fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
+ fprintf (stderr, "padding1: %i\n", frame->padding1);
+ fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
+ fprintf (stderr, "padding2: %i\n", frame->padding2);
+ fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
+ fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
+ fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
+ fprintf (stderr, "hard_frame_pointer_offset: %i\n",
+ frame->hard_frame_pointer_offset);
+ fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
+#endif
+}
+
+/* Emit code to save registers in the prologue. */
+
+static void
+ix86_emit_save_regs (void)
+{
+ unsigned int regno;
+ rtx insn;
+
+ for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
+ if (ix86_save_reg (regno, true))
+ {
+ insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
+ RTX_FRAME_RELATED_P (insn) = 1;
+ }
+}
+
+/* Emit code to save registers using MOV insns. First register
+ is restored from POINTER + OFFSET. */
+static void
+ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
+{
+ unsigned int regno;
+ rtx insn;
+
+ for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+ if (ix86_save_reg (regno, true))
+ {
+ insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
+ Pmode, offset),
+ gen_rtx_REG (Pmode, regno));
+ RTX_FRAME_RELATED_P (insn) = 1;
+ offset += UNITS_PER_WORD;
+ }
+}
+
+/* Expand prologue or epilogue stack adjustment.
+ The pattern exist to put a dependency on all ebp-based memory accesses.
+ STYLE should be negative if instructions should be marked as frame related,
+ zero if %r11 register is live and cannot be freely used and positive
+ otherwise. */
+
+static void
+pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
+{
+ rtx insn;
+
+ if (! TARGET_64BIT)
+ insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
+ else if (x86_64_immediate_operand (offset, DImode))
+ insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
+ else
+ {
+ rtx r11;
+ /* r11 is used by indirect sibcall return as well, set before the
+ epilogue and used after the epilogue. ATM indirect sibcall
+ shouldn't be used together with huge frame sizes in one
+ function because of the frame_size check in sibcall.c. */
+ gcc_assert (style);
+ r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
+ insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
+ /* APPLE LOCAL async unwind info 5949469 */
+ if (style < 0 /* || flag_asynchronous_unwind_tables*/)
+ RTX_FRAME_RELATED_P (insn) = 1;
+ insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
+ offset));
+ }
+ if (style < 0)
+ RTX_FRAME_RELATED_P (insn) = 1;
+ /* APPLE LOCAL begin async unwind info 5949350 5949469 */
+#if 0
+ else if (flag_asynchronous_unwind_tables
+ && (src == hard_frame_pointer_rtx
+ || src == stack_pointer_rtx))
+ RTX_FRAME_RELATED_P (insn) = 1;
+#endif
+ /* APPLE LOCAL end async unwind info 5949350 5949469 */
+}
+
+/* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
+
+static rtx
+ix86_internal_arg_pointer (void)
+{
+ bool has_force_align_arg_pointer =
+ (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
+ TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
+ if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
+ && DECL_NAME (current_function_decl)
+ && MAIN_NAME_P (DECL_NAME (current_function_decl))
+ && DECL_FILE_SCOPE_P (current_function_decl))
+ || ix86_force_align_arg_pointer
+ || has_force_align_arg_pointer)
+ {
+ /* Nested functions can't realign the stack due to a register
+ conflict. */
+ if (DECL_CONTEXT (current_function_decl)
+ && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
+ {
+ if (ix86_force_align_arg_pointer)
+ warning (0, "-mstackrealign ignored for nested functions");
+ if (has_force_align_arg_pointer)
+ error ("%s not supported for nested functions",
+ ix86_force_align_arg_pointer_string);
+ return virtual_incoming_args_rtx;
+ }
+ cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2);
+ return copy_to_reg (cfun->machine->force_align_arg_pointer);
+ }
+ else
+ return virtual_incoming_args_rtx;
+}
+
+/* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
+ This is called from dwarf2out.c to emit call frame instructions
+ for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
+static void
+ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
+{
+ rtx unspec = SET_SRC (pattern);
+ gcc_assert (GET_CODE (unspec) == UNSPEC);
+
+ switch (index)
+ {
+ case UNSPEC_REG_SAVE:
+ dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
+ SET_DEST (pattern));
+ break;
+ case UNSPEC_DEF_CFA:
+ dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
+ INTVAL (XVECEXP (unspec, 0, 0)));
+ break;
+ default:
+ gcc_unreachable ();
+ }
+}
+
+/* APPLE LOCAL begin 3399553 */
+/* Calculate the value of FLT_ROUNDS into DEST.
+
+ The rounding mode is in bits 11:10 of FPSR, and has the following
+ settings:
+ 00 Round to nearest
+ 01 Round to -inf
+ 10 Round to +inf
+ 11 Round to 0
+
+ FLT_ROUNDS, on the other hand, expects the following:
+ -1 Undefined
+ 0 Round to 0
+ 1 Round to nearest
+ 2 Round to +inf
+ 3 Round to -inf
+
+ To perform the conversion, we do:
+ (((((FPSR & 0x800) >> 11) | ((FPSR & 0x400) >> 9)) + 1) & 3)
+*/
+void
+ix86_expand_flt_rounds (rtx dest)
+{
+ rtx mem = assign_stack_temp (HImode, GET_MODE_SIZE (HImode), 0);
+ rtx temp = gen_reg_rtx (SImode);
+
+ /* Step #1: Read FPSR. Unfortunately, this can only be done into a
+ 16-bit memory location. */
+ emit_insn (gen_x86_fnstcw_1 (mem));
+
+ /* Step #2: Copy into a register. */
+ emit_insn (gen_zero_extendhisi2 (dest, mem));
+
+ /* Step #3: Perform conversion described above. */
+ emit_insn (gen_andsi3 (temp, dest, GEN_INT (0x400)));
+ emit_insn (gen_andsi3 (dest, dest, GEN_INT (0x800)));
+ emit_insn (gen_lshrsi3 (temp, temp, GEN_INT (9)));
+ emit_insn (gen_lshrsi3 (dest, dest, GEN_INT (11)));
+ emit_insn (gen_iorsi3 (dest, dest, temp));
+ emit_insn (gen_addsi3 (dest, dest, const1_rtx));
+ emit_insn (gen_andsi3 (dest, dest, GEN_INT (3)));
+}
+/* APPLE LOCAL end 3399553 */
+
+/* APPLE LOCAL begin fix-and-continue x86 */
+#ifndef TARGET_FIX_AND_CONTINUE
+#define TARGET_FIX_AND_CONTINUE 0
+#endif
+/* APPLE LOCAL end fix-and-continue x86 */
+
+/* Expand the prologue into a bunch of separate insns. */
+
+void
+ix86_expand_prologue (void)
+{
+ rtx insn;
+ bool pic_reg_used;
+ struct ix86_frame frame;
+ HOST_WIDE_INT allocate;
+
+ /* APPLE LOCAL begin fix-and-continue x86 */
+ if (TARGET_FIX_AND_CONTINUE)
+ {
+ /* gdb on darwin arranges to forward a function from the old
+ address by modifying the first 6 instructions of the function
+ to branch to the overriding function. This is necessary to
+ permit function pointers that point to the old function to
+ actually forward to the new function. */
+ emit_insn (gen_nop ());
+ emit_insn (gen_nop ());
+ emit_insn (gen_nop ());
+ emit_insn (gen_nop ());
+ emit_insn (gen_nop ());
+ emit_insn (gen_nop ());
+ }
+ /* APPLE LOCAL end fix-and-continue x86 */
+
+ ix86_compute_frame_layout (&frame);
+
+ if (cfun->machine->force_align_arg_pointer)
+ {
+ rtx x, y;
+
+ /* Grab the argument pointer. */
+ x = plus_constant (stack_pointer_rtx, 4);
+ y = cfun->machine->force_align_arg_pointer;
+ insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
+ RTX_FRAME_RELATED_P (insn) = 1;
+
+ /* The unwind info consists of two parts: install the fafp as the cfa,
+ and record the fafp as the "save register" of the stack pointer.
+ The later is there in order that the unwinder can see where it
+ should restore the stack pointer across the and insn. */
+ x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
+ x = gen_rtx_SET (VOIDmode, y, x);
+ RTX_FRAME_RELATED_P (x) = 1;
+ y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
+ UNSPEC_REG_SAVE);
+ y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
+ RTX_FRAME_RELATED_P (y) = 1;
+ x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
+ x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
+ REG_NOTES (insn) = x;
+
+ /* Align the stack. */
+ emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
+ GEN_INT (-16)));
+
+ /* And here we cheat like madmen with the unwind info. We force the
+ cfa register back to sp+4, which is exactly what it was at the
+ start of the function. Re-pushing the return address results in
+ the return at the same spot relative to the cfa, and thus is
+ correct wrt the unwind info. */
+ x = cfun->machine->force_align_arg_pointer;
+ x = gen_frame_mem (Pmode, plus_constant (x, -4));
+ insn = emit_insn (gen_push (x));
+ RTX_FRAME_RELATED_P (insn) = 1;
+
+ x = GEN_INT (4);
+ x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
+ x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
+ x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
+ REG_NOTES (insn) = x;
+ }
+
+ /* Note: AT&T enter does NOT have reversed args. Enter is probably
+ slower on all targets. Also sdb doesn't like it. */
+
+ if (frame_pointer_needed)
+ {
+ insn = emit_insn (gen_push (hard_frame_pointer_rtx));
+ RTX_FRAME_RELATED_P (insn) = 1;
+
+ insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
+ RTX_FRAME_RELATED_P (insn) = 1;
+ }
+
+ allocate = frame.to_allocate;
+
+ if (!frame.save_regs_using_mov)
+ ix86_emit_save_regs ();
+ else
+ allocate += frame.nregs * UNITS_PER_WORD;
+
+ /* When using red zone we may start register saving before allocating
+ the stack frame saving one cycle of the prologue. */
+ if (TARGET_RED_ZONE && frame.save_regs_using_mov)
+ ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
+ : stack_pointer_rtx,
+ -frame.nregs * UNITS_PER_WORD);
+
+ if (allocate == 0)
+ ;
+ else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
+ /* APPLE LOCAL begin CW asm blocks */
+ {
+ if (! cfun->iasm_asm_function)
+ pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
+ GEN_INT (-allocate), -1);
+ }
+ /* APPLE LOCAL end CW asm blocks */
+ else
+ {
+ /* Only valid for Win32. */
+ rtx eax = gen_rtx_REG (SImode, 0);
+ bool eax_live = ix86_eax_live_at_start_p ();
+ rtx t;
+
+ gcc_assert (!TARGET_64BIT);
+
+ if (eax_live)
+ {
+ emit_insn (gen_push (eax));
+ allocate -= 4;
+ }
+
+ emit_move_insn (eax, GEN_INT (allocate));
+
+ insn = emit_insn (gen_allocate_stack_worker (eax));
+ RTX_FRAME_RELATED_P (insn) = 1;
+ t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
+ t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
+ REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
+ t, REG_NOTES (insn));
+
+ if (eax_live)
+ {
+ if (frame_pointer_needed)
+ t = plus_constant (hard_frame_pointer_rtx,
+ allocate
+ - frame.to_allocate
+ - frame.nregs * UNITS_PER_WORD);
+ else
+ t = plus_constant (stack_pointer_rtx, allocate);
+ emit_move_insn (eax, gen_rtx_MEM (SImode, t));
+ }
+ }
+
+ if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
+ {
+ if (!frame_pointer_needed || !frame.to_allocate)
+ ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
+ else
+ ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
+ -frame.nregs * UNITS_PER_WORD);
+ }
+
+ pic_reg_used = false;
+ /* APPLE LOCAL begin 5695218 */
+ if (pic_offset_table_rtx && regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
+ && !TARGET_64BIT)
+ {
+ unsigned int alt_pic_reg_used;
+
+ alt_pic_reg_used = ix86_select_alt_pic_regnum ();
+ /* APPLE LOCAL end 5695218 */
+
+ if (alt_pic_reg_used != INVALID_REGNUM)
+ /* APPLE LOCAL begin 5695218 */
+ /* REGNO (pic_offset_table_rtx) = alt_pic_reg_used; */
+ ix86_globally_replace_pic_reg (alt_pic_reg_used);
+ /* APPLE LOCAL end 5695218 */
+
+ pic_reg_used = true;
+ }
+
+ if (pic_reg_used)
+ {
+ if (TARGET_64BIT)
+ insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
+ else
+ insn = emit_insn (gen_set_got (pic_offset_table_rtx));
+
+ /* Even with accurate pre-reload life analysis, we can wind up
+ deleting all references to the pic register after reload.
+ Consider if cross-jumping unifies two sides of a branch
+ controlled by a comparison vs the only read from a global.
+ In which case, allow the set_got to be deleted, though we're
+ too late to do anything about the ebx save in the prologue. */
+ REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
+ }
+
+ /* Prevent function calls from be scheduled before the call to mcount.
+ In the pic_reg_used case, make sure that the got load isn't deleted. */
+ if (current_function_profile)
+ emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
+}
+
+/* Emit code to restore saved registers using MOV insns. First register
+ is restored from POINTER + OFFSET. */
+static void
+ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
+ int maybe_eh_return)
+{
+ int regno;
+ rtx base_address = gen_rtx_MEM (Pmode, pointer);
+
+ for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+ if (ix86_save_reg (regno, maybe_eh_return))
+ {
+ /* Ensure that adjust_address won't be forced to produce pointer
+ out of range allowed by x86-64 instruction set. */
+ if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
+ {
+ rtx r11;
+
+ r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
+ emit_move_insn (r11, GEN_INT (offset));
+ emit_insn (gen_adddi3 (r11, r11, pointer));
+ base_address = gen_rtx_MEM (Pmode, r11);
+ offset = 0;
+ }
+ emit_move_insn (gen_rtx_REG (Pmode, regno),
+ adjust_address (base_address, Pmode, offset));
+ offset += UNITS_PER_WORD;
+ }
+}
+
+/* Restore function stack, frame, and registers. */
+
+void
+ix86_expand_epilogue (int style)
+{
+ int regno;
+ int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
+ struct ix86_frame frame;
+ HOST_WIDE_INT offset;
+
+ ix86_compute_frame_layout (&frame);
+
+ /* Calculate start of saved registers relative to ebp. Special care
+ must be taken for the normal return case of a function using
+ eh_return: the eax and edx registers are marked as saved, but not
+ restored along this path. */
+ offset = frame.nregs;
+ if (current_function_calls_eh_return && style != 2)
+ offset -= 2;
+ offset *= -UNITS_PER_WORD;
+
+ /* APPLE LOCAL begin CW asm blocks */
+ /* For an asm function, don't generate an epilogue. */
+ if (cfun->iasm_asm_function)
+ {
+ emit_jump_insn (gen_return_internal ());
+ return;
+ }
+ /* APPLE LOCAL end CW asm blocks */
+
+ /* If we're only restoring one register and sp is not valid then
+ using a move instruction to restore the register since it's
+ less work than reloading sp and popping the register.
+
+ The default code result in stack adjustment using add/lea instruction,
+ while this code results in LEAVE instruction (or discrete equivalent),
+ so it is profitable in some other cases as well. Especially when there
+ are no registers to restore. We also use this code when TARGET_USE_LEAVE
+ and there is exactly one register to pop. This heuristic may need some
+ tuning in future. */
+ if ((!sp_valid && frame.nregs <= 1)
+ || (TARGET_EPILOGUE_USING_MOVE
+ && cfun->machine->use_fast_prologue_epilogue
+ && (frame.nregs > 1 || frame.to_allocate))
+ || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
+ || (frame_pointer_needed && TARGET_USE_LEAVE
+ && cfun->machine->use_fast_prologue_epilogue
+ && frame.nregs == 1)
+ || current_function_calls_eh_return)
+ {
+ /* Restore registers. We can use ebp or esp to address the memory
+ locations. If both are available, default to ebp, since offsets
+ are known to be small. Only exception is esp pointing directly to the
+ end of block of saved registers, where we may simplify addressing
+ mode. */
+
+ if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
+ ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
+ frame.to_allocate, style == 2);
+ else
+ ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
+ offset, style == 2);
+
+ /* eh_return epilogues need %ecx added to the stack pointer. */
+ if (style == 2)
+ {
+ rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
+
+ if (frame_pointer_needed)
+ {
+ tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
+ tmp = plus_constant (tmp, UNITS_PER_WORD);
+ emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
+
+ tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
+ emit_move_insn (hard_frame_pointer_rtx, tmp);
+
+ pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
+ const0_rtx, style);
+ }
+ else
+ {
+ tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
+ tmp = plus_constant (tmp, (frame.to_allocate
+ + frame.nregs * UNITS_PER_WORD));
+ emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
+ }
+ }
+ else if (!frame_pointer_needed)
+ pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
+ GEN_INT (frame.to_allocate
+ + frame.nregs * UNITS_PER_WORD),
+ style);
+ /* If not an i386, mov & pop is faster than "leave". */
+ else if (TARGET_USE_LEAVE || optimize_size
+ || !cfun->machine->use_fast_prologue_epilogue)
+ emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
+ else
+ {
+ pro_epilogue_adjust_stack (stack_pointer_rtx,
+ hard_frame_pointer_rtx,
+ const0_rtx, style);
+ if (TARGET_64BIT)
+ emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
+ else
+ emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
+ }
+ }
+ else
+ {
+ /* First step is to deallocate the stack frame so that we can
+ pop the registers. */
+ if (!sp_valid)
+ {
+ gcc_assert (frame_pointer_needed);
+ pro_epilogue_adjust_stack (stack_pointer_rtx,
+ hard_frame_pointer_rtx,
+ GEN_INT (offset), style);
+ }
+ else if (frame.to_allocate)
+ pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
+ GEN_INT (frame.to_allocate), style);
+
+ for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+ if (ix86_save_reg (regno, false))
+ {
+ if (TARGET_64BIT)
+ emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
+ else
+ emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
+ }
+ if (frame_pointer_needed)
+ {
+ /* Leave results in shorter dependency chains on CPUs that are
+ able to grok it fast. */
+ if (TARGET_USE_LEAVE)
+ emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
+ else if (TARGET_64BIT)
+ emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
+ else
+ emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
+ }
+ }
+
+ if (cfun->machine->force_align_arg_pointer)
+ {
+ emit_insn (gen_addsi3 (stack_pointer_rtx,
+ cfun->machine->force_align_arg_pointer,
+ GEN_INT (-4)));
+ }
+
+ /* Sibcall epilogues don't want a return instruction. */
+ if (style == 0)
+ return;
+
+ if (current_function_pops_args && current_function_args_size)
+ {
+ rtx popc = GEN_INT (current_function_pops_args);
+
+ /* i386 can only pop 64K bytes. If asked to pop more, pop
+ return address, do explicit add, and jump indirectly to the
+ caller. */
+
+ if (current_function_pops_args >= 65536)
+ {
+ rtx ecx = gen_rtx_REG (SImode, 2);
+
+ /* There is no "pascal" calling convention in 64bit ABI. */
+ gcc_assert (!TARGET_64BIT);
+
+ emit_insn (gen_popsi1 (ecx));
+ emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
+ emit_jump_insn (gen_return_indirect_internal (ecx));
+ }
+ else
+ emit_jump_insn (gen_return_pop_internal (popc));
+ }
+ else
+ emit_jump_insn (gen_return_internal ());
+}
+
+/* Reset from the function's potential modifications. */
+
+static void
+ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
+ HOST_WIDE_INT size ATTRIBUTE_UNUSED)
+{
+ if (pic_offset_table_rtx)
+ REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
+#if TARGET_MACHO
+ /* Mach-O doesn't support labels at the end of objects, so if
+ it looks like we might want one, insert a NOP. */
+ {
+ rtx insn = get_last_insn ();
+ while (insn
+ && NOTE_P (insn)
+ && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED_LABEL)
+ insn = PREV_INSN (insn);
+ if (insn
+ && (LABEL_P (insn)
+ || (NOTE_P (insn)
+ && NOTE_LINE_NUMBER (insn) == NOTE_INSN_DELETED_LABEL)))
+ fputs ("\tnop\n", file);
+ }
+#endif
+
+}
+
+/* Extract the parts of an RTL expression that is a valid memory address
+ for an instruction. Return 0 if the structure of the address is
+ grossly off. Return -1 if the address contains ASHIFT, so it is not
+ strictly valid, but still used for computing length of lea instruction. */
+
+int
+ix86_decompose_address (rtx addr, struct ix86_address *out)
+{
+ rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
+ rtx base_reg, index_reg;
+ HOST_WIDE_INT scale = 1;
+ rtx scale_rtx = NULL_RTX;
+ int retval = 1;
+ enum ix86_address_seg seg = SEG_DEFAULT;
+
+ if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
+ base = addr;
+ else if (GET_CODE (addr) == PLUS)
+ {
+ rtx addends[4], op;
+ int n = 0, i;
+
+ op = addr;
+ do
+ {
+ if (n >= 4)
+ return 0;
+ addends[n++] = XEXP (op, 1);
+ op = XEXP (op, 0);
+ }
+ while (GET_CODE (op) == PLUS);
+ if (n >= 4)
+ return 0;
+ addends[n] = op;
+
+ for (i = n; i >= 0; --i)
+ {
+ op = addends[i];
+ switch (GET_CODE (op))
+ {
+ case MULT:
+ if (index)
+ return 0;
+ index = XEXP (op, 0);
+ scale_rtx = XEXP (op, 1);
+ break;
+
+ case UNSPEC:
+ if (XINT (op, 1) == UNSPEC_TP
+ && TARGET_TLS_DIRECT_SEG_REFS
+ && seg == SEG_DEFAULT)
+ seg = TARGET_64BIT ? SEG_FS : SEG_GS;
+ else
+ return 0;
+ break;
+
+ case REG:
+ case SUBREG:
+ if (!base)
+ base = op;
+ else if (!index)
+ index = op;
+ else
+ return 0;
+ break;
+
+ case CONST:
+ case CONST_INT:
+ case SYMBOL_REF:
+ case LABEL_REF:
+ if (disp)
+ return 0;
+ disp = op;
+ break;
+
+ default:
+ return 0;
+ }
+ }
+ }
+ else if (GET_CODE (addr) == MULT)
+ {
+ index = XEXP (addr, 0); /* index*scale */
+ scale_rtx = XEXP (addr, 1);
+ }
+ else if (GET_CODE (addr) == ASHIFT)
+ {
+ rtx tmp;
+
+ /* We're called for lea too, which implements ashift on occasion. */
+ index = XEXP (addr, 0);
+ tmp = XEXP (addr, 1);
+ if (GET_CODE (tmp) != CONST_INT)
+ return 0;
+ scale = INTVAL (tmp);
+ if ((unsigned HOST_WIDE_INT) scale > 3)
+ return 0;
+ scale = 1 << scale;
+ retval = -1;
+ }
+ else
+ disp = addr; /* displacement */
+
+ /* Extract the integral value of scale. */
+ if (scale_rtx)
+ {
+ if (GET_CODE (scale_rtx) != CONST_INT)
+ return 0;
+ scale = INTVAL (scale_rtx);
+ }
+
+ base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
+ index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
+
+ /* Allow arg pointer and stack pointer as index if there is not scaling. */
+ if (base_reg && index_reg && scale == 1
+ && (index_reg == arg_pointer_rtx
+ || index_reg == frame_pointer_rtx
+ || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
+ {
+ rtx tmp;
+ tmp = base, base = index, index = tmp;
+ tmp = base_reg, base_reg = index_reg, index_reg = tmp;
+ }
+
+ /* Special case: %ebp cannot be encoded as a base without a displacement. */
+ if ((base_reg == hard_frame_pointer_rtx
+ || base_reg == frame_pointer_rtx
+ || base_reg == arg_pointer_rtx) && !disp)
+ disp = const0_rtx;
+
+ /* Special case: on K6, [%esi] makes the instruction vector decoded.
+ Avoid this by transforming to [%esi+0]. */
+ if (ix86_tune == PROCESSOR_K6 && !optimize_size
+ && base_reg && !index_reg && !disp
+ && REG_P (base_reg)
+ && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
+ disp = const0_rtx;
+
+ /* Special case: encode reg+reg instead of reg*2. */
+ if (!base && index && scale && scale == 2)
+ base = index, base_reg = index_reg, scale = 1;
+
+ /* Special case: scaling cannot be encoded without base or displacement. */
+ if (!base && !disp && index && scale != 1)
+ disp = const0_rtx;
+
+ out->base = base;
+ out->index = index;
+ out->disp = disp;
+ out->scale = scale;
+ out->seg = seg;
+
+ return retval;
+}
+
+/* Return cost of the memory address x.
+ For i386, it is better to use a complex address than let gcc copy
+ the address into a reg and make a new pseudo. But not if the address
+ requires to two regs - that would mean more pseudos with longer
+ lifetimes. */
+static int
+ix86_address_cost (rtx x)
+{
+ struct ix86_address parts;
+ int cost = 1;
+ int ok = ix86_decompose_address (x, &parts);
+
+ gcc_assert (ok);
+
+ if (parts.base && GET_CODE (parts.base) == SUBREG)
+ parts.base = SUBREG_REG (parts.base);
+ if (parts.index && GET_CODE (parts.index) == SUBREG)
+ parts.index = SUBREG_REG (parts.index);
+
+ /* More complex memory references are better. */
+ if (parts.disp && parts.disp != const0_rtx)
+ cost--;
+ if (parts.seg != SEG_DEFAULT)
+ cost--;
+
+ /* Attempt to minimize number of registers in the address. */
+ if ((parts.base
+ && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
+ || (parts.index
+ && (!REG_P (parts.index)
+ || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
+ cost++;
+
+ if (parts.base
+ && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
+ && parts.index
+ && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
+ && parts.base != parts.index)
+ cost++;
+
+ /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
+ since it's predecode logic can't detect the length of instructions
+ and it degenerates to vector decoded. Increase cost of such
+ addresses here. The penalty is minimally 2 cycles. It may be worthwhile
+ to split such addresses or even refuse such addresses at all.
+
+ Following addressing modes are affected:
+ [base+scale*index]
+ [scale*index+disp]
+ [base+index]
+
+ The first and last case may be avoidable by explicitly coding the zero in
+ memory address, but I don't have AMD-K6 machine handy to check this
+ theory. */
+
+ if (TARGET_K6
+ && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
+ || (parts.disp && !parts.base && parts.index && parts.scale != 1)
+ || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
+ cost += 10;
+
+ return cost;
+}
+
+/* If X is a machine specific address (i.e. a symbol or label being
+ referenced as a displacement from the GOT implemented using an
+ UNSPEC), then return the base term. Otherwise return X. */
+
+rtx
+ix86_find_base_term (rtx x)
+{
+ rtx term;
+
+ if (TARGET_64BIT)
+ {
+ if (GET_CODE (x) != CONST)
+ return x;
+ term = XEXP (x, 0);
+ if (GET_CODE (term) == PLUS
+ && (GET_CODE (XEXP (term, 1)) == CONST_INT
+ || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
+ term = XEXP (term, 0);
+ if (GET_CODE (term) != UNSPEC
+ || XINT (term, 1) != UNSPEC_GOTPCREL)
+ return x;
+
+ term = XVECEXP (term, 0, 0);
+
+ if (GET_CODE (term) != SYMBOL_REF
+ && GET_CODE (term) != LABEL_REF)
+ return x;
+
+ return term;
+ }
+
+ term = ix86_delegitimize_address (x);
+
+ if (GET_CODE (term) != SYMBOL_REF
+ && GET_CODE (term) != LABEL_REF)
+ return x;
+
+ return term;
+}
+
+/* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
+ this is used for to form addresses to local data when -fPIC is in
+ use. */
+
+static bool
+darwin_local_data_pic (rtx disp)
+{
+ if (GET_CODE (disp) == MINUS)
+ {
+ if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
+ || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
+ if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
+ {
+ const char *sym_name = XSTR (XEXP (disp, 1), 0);
+ if (! strcmp (sym_name, "<pic base>"))
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/* Determine if a given RTX is a valid constant. We already know this
+ satisfies CONSTANT_P. */
+
+bool
+legitimate_constant_p (rtx x)
+{
+ switch (GET_CODE (x))
+ {
+ case CONST:
+ x = XEXP (x, 0);
+
+ if (GET_CODE (x) == PLUS)
+ {
+ if (GET_CODE (XEXP (x, 1)) != CONST_INT)
+ return false;
+ x = XEXP (x, 0);
+ }
+
+ if (TARGET_MACHO && darwin_local_data_pic (x))
+ return true;
+
+ /* Only some unspecs are valid as "constants". */
+ if (GET_CODE (x) == UNSPEC)
+ switch (XINT (x, 1))
+ {
+ case UNSPEC_GOTOFF:
+ return TARGET_64BIT;
+ case UNSPEC_TPOFF:
+ case UNSPEC_NTPOFF:
+ x = XVECEXP (x, 0, 0);
+ return (GET_CODE (x) == SYMBOL_REF
+ && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
+ case UNSPEC_DTPOFF:
+ x = XVECEXP (x, 0, 0);
+ return (GET_CODE (x) == SYMBOL_REF
+ && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
+ default:
+ return false;
+ }
+
+ /* We must have drilled down to a symbol. */
+ if (GET_CODE (x) == LABEL_REF)
+ return true;
+ if (GET_CODE (x) != SYMBOL_REF)
+ return false;
+ /* FALLTHRU */
+
+ case SYMBOL_REF:
+ /* TLS symbols are never valid. */
+ if (SYMBOL_REF_TLS_MODEL (x))
+ return false;
+ /* APPLE LOCAL begin dynamic-no-pic */
+#if TARGET_MACHO
+ if (TARGET_MACHO && MACHO_DYNAMIC_NO_PIC_P)
+ return machopic_symbol_defined_p (x);
+#endif
+ break;
+
+ case PLUS:
+ {
+ rtx left = XEXP (x, 0);
+ rtx right = XEXP (x, 1);
+ bool left_is_constant = legitimate_constant_p (left);
+ bool right_is_constant = legitimate_constant_p (right);
+ return left_is_constant && right_is_constant;
+ }
+ break;
+ /* APPLE LOCAL end dynamic-no-pic */
+
+ case CONST_DOUBLE:
+ if (GET_MODE (x) == TImode
+ && x != CONST0_RTX (TImode)
+ && !TARGET_64BIT)
+ return false;
+ break;
+
+ case CONST_VECTOR:
+ /* APPLE LOCAL begin radar 4874197 mainline candidate */
+ if (standard_sse_constant_p (x))
+ /* APPLE LOCAL end radar 4874197 mainline candidate */
+ return true;
+ return false;
+
+ default:
+ break;
+ }
+
+ /* Otherwise we handle everything else in the move patterns. */
+ return true;
+}
+
+/* Determine if it's legal to put X into the constant pool. This
+ is not possible for the address of thread-local symbols, which
+ is checked above. */
+
+static bool
+ix86_cannot_force_const_mem (rtx x)
+{
+ /* We can always put integral constants and vectors in memory. */
+ switch (GET_CODE (x))
+ {
+ case CONST_INT:
+ case CONST_DOUBLE:
+ case CONST_VECTOR:
+ return false;
+
+ default:
+ break;
+ }
+ return !legitimate_constant_p (x);
+}
+
+/* Determine if a given RTX is a valid constant address. */
+
+bool
+constant_address_p (rtx x)
+{
+ return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
+}
+
+/* Nonzero if the constant value X is a legitimate general operand
+ when generating PIC code. It is given that flag_pic is on and
+ that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
+
+bool
+legitimate_pic_operand_p (rtx x)
+{
+ rtx inner;
+
+ switch (GET_CODE (x))
+ {
+ case CONST:
+ inner = XEXP (x, 0);
+ if (GET_CODE (inner) == PLUS
+ && GET_CODE (XEXP (inner, 1)) == CONST_INT)
+ inner = XEXP (inner, 0);
+
+ /* Only some unspecs are valid as "constants". */
+ if (GET_CODE (inner) == UNSPEC)
+ switch (XINT (inner, 1))
+ {
+ case UNSPEC_GOTOFF:
+ return TARGET_64BIT;
+ case UNSPEC_TPOFF:
+ x = XVECEXP (inner, 0, 0);
+ return (GET_CODE (x) == SYMBOL_REF
+ && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
+ default:
+ return false;
+ }
+ /* FALLTHRU */
+
+ case SYMBOL_REF:
+ case LABEL_REF:
+ return legitimate_pic_address_disp_p (x);
+
+ default:
+ return true;
+ }
+}
+
+/* Determine if a given CONST RTX is a valid memory displacement
+ in PIC mode. */
+
+int
+legitimate_pic_address_disp_p (rtx disp)
+{
+ bool saw_plus;
+
+ /* In 64bit mode we can allow direct addresses of symbols and labels
+ when they are not dynamic symbols. */
+ if (TARGET_64BIT)
+ {
+ rtx op0 = disp, op1;
+
+ switch (GET_CODE (disp))
+ {
+ case LABEL_REF:
+ return true;
+
+ case CONST:
+ if (GET_CODE (XEXP (disp, 0)) != PLUS)
+ break;
+ op0 = XEXP (XEXP (disp, 0), 0);
+ op1 = XEXP (XEXP (disp, 0), 1);
+ if (GET_CODE (op1) != CONST_INT
+ || INTVAL (op1) >= 16*1024*1024
+ || INTVAL (op1) < -16*1024*1024)
+ break;
+ if (GET_CODE (op0) == LABEL_REF)
+ return true;
+ if (GET_CODE (op0) != SYMBOL_REF)
+ break;
+ /* FALLTHRU */
+
+ case SYMBOL_REF:
+ /* TLS references should always be enclosed in UNSPEC. */
+ if (SYMBOL_REF_TLS_MODEL (op0))
+ return false;
+ /* APPLE LOCAL begin fix-and-continue 6227434 */
+#if TARGET_MACHO
+ if (machopic_data_defined_p (op0))
+ return true;
+
+ /* Under -mfix-and-continue, even local storage is
+ addressed via the GOT, so that the value of local
+ statics is preserved when a function is "fixed." */
+ if (indirect_data (op0))
+ return false;
+#endif
+ /* APPLE LOCAL end fix-and-continue 6227434 */
+ if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0))
+ return true;
+ break;
+
+ default:
+ break;
+ }
+ }
+ if (GET_CODE (disp) != CONST)
+ return 0;
+ disp = XEXP (disp, 0);
+
+ if (TARGET_64BIT)
+ {
+ /* We are unsafe to allow PLUS expressions. This limit allowed distance
+ of GOT tables. We should not need these anyway. */
+ if (GET_CODE (disp) != UNSPEC
+ || (XINT (disp, 1) != UNSPEC_GOTPCREL
+ && XINT (disp, 1) != UNSPEC_GOTOFF))
+ return 0;
+
+ if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
+ && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
+ return 0;
+ return 1;
+ }
+
+ saw_plus = false;
+ if (GET_CODE (disp) == PLUS)
+ {
+ if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
+ return 0;
+ disp = XEXP (disp, 0);
+ saw_plus = true;
+ }
+
+ if (TARGET_MACHO && darwin_local_data_pic (disp))
+ return 1;
+
+ if (GET_CODE (disp) != UNSPEC)
+ return 0;
+
+ switch (XINT (disp, 1))
+ {
+ case UNSPEC_GOT:
+ if (saw_plus)
+ return false;
+ return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
+ case UNSPEC_GOTOFF:
+ /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
+ While ABI specify also 32bit relocation but we don't produce it in
+ small PIC model at all. */
+ if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
+ || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
+ && !TARGET_64BIT)
+ return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
+ return false;
+ case UNSPEC_GOTTPOFF:
+ case UNSPEC_GOTNTPOFF:
+ case UNSPEC_INDNTPOFF:
+ if (saw_plus)
+ return false;
+ disp = XVECEXP (disp, 0, 0);
+ return (GET_CODE (disp) == SYMBOL_REF
+ && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
+ case UNSPEC_NTPOFF:
+ disp = XVECEXP (disp, 0, 0);
+ return (GET_CODE (disp) == SYMBOL_REF
+ && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
+ case UNSPEC_DTPOFF:
+ disp = XVECEXP (disp, 0, 0);
+ return (GET_CODE (disp) == SYMBOL_REF
+ && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
+ }
+
+ return 0;
+}
+
+/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
+ memory address for an instruction. The MODE argument is the machine mode
+ for the MEM expression that wants to use this address.
+
+ It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
+ convert common non-canonical forms to canonical form so that they will
+ be recognized. */
+
+int
+legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
+{
+ struct ix86_address parts;
+ rtx base, index, disp;
+ HOST_WIDE_INT scale;
+ const char *reason = NULL;
+ rtx reason_rtx = NULL_RTX;
+
+ if (TARGET_DEBUG_ADDR)
+ {
+ fprintf (stderr,
+ "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
+ GET_MODE_NAME (mode), strict);
+ debug_rtx (addr);
+ }
+
+ if (ix86_decompose_address (addr, &parts) <= 0)
+ {
+ reason = "decomposition failed";
+ goto report_error;
+ }
+
+ base = parts.base;
+ index = parts.index;
+ disp = parts.disp;
+ scale = parts.scale;
+
+ /* Validate base register.
+
+ Don't allow SUBREG's that span more than a word here. It can lead to spill
+ failures when the base is one word out of a two word structure, which is
+ represented internally as a DImode int. */
+
+ if (base)
+ {
+ rtx reg;
+ reason_rtx = base;
+
+ if (REG_P (base))
+ reg = base;
+ else if (GET_CODE (base) == SUBREG
+ && REG_P (SUBREG_REG (base))
+ && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
+ <= UNITS_PER_WORD)
+ reg = SUBREG_REG (base);
+ else
+ {
+ reason = "base is not a register";
+ goto report_error;
+ }
+
+ if (GET_MODE (base) != Pmode)
+ {
+ reason = "base is not in Pmode";
+ goto report_error;
+ }
+
+ if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
+ || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
+ {
+ reason = "base is not valid";
+ goto report_error;
+ }
+ }
+
+ /* Validate index register.
+
+ Don't allow SUBREG's that span more than a word here -- same as above. */
+
+ if (index)
+ {
+ rtx reg;
+ reason_rtx = index;
+
+ if (REG_P (index))
+ reg = index;
+ else if (GET_CODE (index) == SUBREG
+ && REG_P (SUBREG_REG (index))
+ && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
+ <= UNITS_PER_WORD)
+ reg = SUBREG_REG (index);
+ else
+ {
+ reason = "index is not a register";
+ goto report_error;
+ }
+
+ if (GET_MODE (index) != Pmode)
+ {
+ reason = "index is not in Pmode";
+ goto report_error;
+ }
+
+ if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
+ || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
+ {
+ reason = "index is not valid";
+ goto report_error;
+ }
+ }
+
+ /* Validate scale factor. */
+ if (scale != 1)
+ {
+ reason_rtx = GEN_INT (scale);
+ if (!index)
+ {
+ reason = "scale without index";
+ goto report_error;
+ }
+
+ if (scale != 2 && scale != 4 && scale != 8)
+ {
+ reason = "scale is not a valid multiplier";
+ goto report_error;
+ }
+ }
+
+ /* Validate displacement. */
+ if (disp)
+ {
+ reason_rtx = disp;
+
+ if (GET_CODE (disp) == CONST
+ && GET_CODE (XEXP (disp, 0)) == UNSPEC)
+ switch (XINT (XEXP (disp, 0), 1))
+ {
+ /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
+ used. While ABI specify also 32bit relocations, we don't produce
+ them at all and use IP relative instead. */
+ case UNSPEC_GOT:
+ case UNSPEC_GOTOFF:
+ gcc_assert (flag_pic);
+ if (!TARGET_64BIT)
+ goto is_legitimate_pic;
+ reason = "64bit address unspec";
+ goto report_error;
+
+ case UNSPEC_GOTPCREL:
+ gcc_assert (flag_pic);
+ goto is_legitimate_pic;
+
+ case UNSPEC_GOTTPOFF:
+ case UNSPEC_GOTNTPOFF:
+ case UNSPEC_INDNTPOFF:
+ case UNSPEC_NTPOFF:
+ case UNSPEC_DTPOFF:
+ break;
+
+ default:
+ reason = "invalid address unspec";
+ goto report_error;
+ }
+
+ else if (SYMBOLIC_CONST (disp)
+ && (flag_pic
+ || (TARGET_MACHO
+#if TARGET_MACHO
+ && MACHOPIC_INDIRECT
+ && !machopic_operand_p (disp)
+#endif
+ )))
+ {
+
+ is_legitimate_pic:
+ if (TARGET_64BIT && (index || base))
+ {
+ /* foo@dtpoff(%rX) is ok. */
+ if (GET_CODE (disp) != CONST
+ || GET_CODE (XEXP (disp, 0)) != PLUS
+ || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
+ || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
+ || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
+ && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
+ {
+ reason = "non-constant pic memory reference";
+ goto report_error;
+ }
+ }
+ /* APPLE LOCAL begin dynamic-no-pic */
+ else if (flag_pic && ! legitimate_pic_address_disp_p (disp))
+ {
+ reason = "displacement is an invalid pic construct";
+ goto report_error;
+ }
+#if TARGET_MACHO
+ else if (MACHO_DYNAMIC_NO_PIC_P && !legitimate_constant_p (disp))
+ {
+ reason = "displacment must be referenced via non_lazy_pointer";
+ goto report_error;
+ }
+#endif
+ /* APPLE LOCAL end dynamic-no-pic */
+
+ /* This code used to verify that a symbolic pic displacement
+ includes the pic_offset_table_rtx register.
+
+ While this is good idea, unfortunately these constructs may
+ be created by "adds using lea" optimization for incorrect
+ code like:
+
+ int a;
+ int foo(int i)
+ {
+ return *(&a+i);
+ }
+
+ This code is nonsensical, but results in addressing
+ GOT table with pic_offset_table_rtx base. We can't
+ just refuse it easily, since it gets matched by
+ "addsi3" pattern, that later gets split to lea in the
+ case output register differs from input. While this
+ can be handled by separate addsi pattern for this case
+ that never results in lea, this seems to be easier and
+ correct fix for crash to disable this test. */
+ }
+ else if (GET_CODE (disp) != LABEL_REF
+ && GET_CODE (disp) != CONST_INT
+ && (GET_CODE (disp) != CONST
+ || !legitimate_constant_p (disp))
+ && (GET_CODE (disp) != SYMBOL_REF
+ || !legitimate_constant_p (disp)))
+ {
+ reason = "displacement is not constant";
+ goto report_error;
+ }
+ else if (TARGET_64BIT
+ && !x86_64_immediate_operand (disp, VOIDmode))
+ {
+ reason = "displacement is out of range";
+ goto report_error;
+ }
+ }
+
+ /* Everything looks valid. */
+ if (TARGET_DEBUG_ADDR)
+ fprintf (stderr, "Success.\n");
+ return TRUE;
+
+ report_error:
+ if (TARGET_DEBUG_ADDR)
+ {
+ fprintf (stderr, "Error: %s\n", reason);
+ debug_rtx (reason_rtx);
+ }
+ return FALSE;
+}
+
+/* Return a unique alias set for the GOT. */
+
+static HOST_WIDE_INT
+ix86_GOT_alias_set (void)
+{
+ static HOST_WIDE_INT set = -1;
+ if (set == -1)
+ set = new_alias_set ();
+ return set;
+}
+
+/* Return a legitimate reference for ORIG (an address) using the
+ register REG. If REG is 0, a new pseudo is generated.
+
+ There are two types of references that must be handled:
+
+ 1. Global data references must load the address from the GOT, via
+ the PIC reg. An insn is emitted to do this load, and the reg is
+ returned.
+
+ 2. Static data references, constant pool addresses, and code labels
+ compute the address as an offset from the GOT, whose base is in
+ the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
+ differentiate them from global data objects. The returned
+ address is the PIC reg + an unspec constant.
+
+ GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
+ reg also appears in the address. */
+
+static rtx
+legitimize_pic_address (rtx orig, rtx reg)
+{
+ rtx addr = orig;
+ rtx new = orig;
+ rtx base;
+
+#if TARGET_MACHO
+ if (TARGET_MACHO && !TARGET_64BIT)
+ {
+ if (reg == 0)
+ reg = gen_reg_rtx (Pmode);
+ /* Use the generic Mach-O PIC machinery. */
+ return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
+ }
+#endif
+
+ if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
+ new = addr;
+ else if (TARGET_64BIT
+ && ix86_cmodel != CM_SMALL_PIC
+ && local_symbolic_operand (addr, Pmode))
+ {
+ rtx tmpreg;
+ /* This symbol may be referenced via a displacement from the PIC
+ base address (@GOTOFF). */
+
+ if (reload_in_progress)
+ regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
+ if (GET_CODE (addr) == CONST)
+ addr = XEXP (addr, 0);
+ if (GET_CODE (addr) == PLUS)
+ {
+ new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
+ new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
+ }
+ else
+ new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
+ new = gen_rtx_CONST (Pmode, new);
+ if (!reg)
+ tmpreg = gen_reg_rtx (Pmode);
+ else
+ tmpreg = reg;
+ emit_move_insn (tmpreg, new);
+
+ if (reg != 0)
+ {
+ new = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
+ tmpreg, 1, OPTAB_DIRECT);
+ new = reg;
+ }
+ else new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
+ }
+ else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
+ {
+ /* This symbol may be referenced via a displacement from the PIC
+ base address (@GOTOFF). */
+
+ if (reload_in_progress)
+ regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
+ if (GET_CODE (addr) == CONST)
+ addr = XEXP (addr, 0);
+ if (GET_CODE (addr) == PLUS)
+ {
+ new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
+ new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
+ }
+ else
+ new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
+ new = gen_rtx_CONST (Pmode, new);
+ new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
+
+ if (reg != 0)
+ {
+ emit_move_insn (reg, new);
+ new = reg;
+ }
+ }
+ else if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
+ {
+ if (TARGET_64BIT)
+ {
+ new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
+ new = gen_rtx_CONST (Pmode, new);
+ new = gen_const_mem (Pmode, new);
+ set_mem_alias_set (new, ix86_GOT_alias_set ());
+
+ if (reg == 0)
+ reg = gen_reg_rtx (Pmode);
+ /* Use directly gen_movsi, otherwise the address is loaded
+ into register for CSE. We don't want to CSE this addresses,
+ instead we CSE addresses from the GOT table, so skip this. */
+ emit_insn (gen_movsi (reg, new));
+ new = reg;
+ }
+ else
+ {
+ /* This symbol must be referenced via a load from the
+ Global Offset Table (@GOT). */
+
+ if (reload_in_progress)
+ regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
+ new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
+ new = gen_rtx_CONST (Pmode, new);
+ new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
+ new = gen_const_mem (Pmode, new);
+ set_mem_alias_set (new, ix86_GOT_alias_set ());
+
+ if (reg == 0)
+ reg = gen_reg_rtx (Pmode);
+ emit_move_insn (reg, new);
+ new = reg;
+ }
+ }
+ else
+ {
+ if (GET_CODE (addr) == CONST_INT
+ && !x86_64_immediate_operand (addr, VOIDmode))
+ {
+ if (reg)
+ {
+ emit_move_insn (reg, addr);
+ new = reg;
+ }
+ else
+ new = force_reg (Pmode, addr);
+ }
+ else if (GET_CODE (addr) == CONST)
+ {
+ addr = XEXP (addr, 0);
+
+ /* We must match stuff we generate before. Assume the only
+ unspecs that can get here are ours. Not that we could do
+ anything with them anyway.... */
+ if (GET_CODE (addr) == UNSPEC
+ || (GET_CODE (addr) == PLUS
+ && GET_CODE (XEXP (addr, 0)) == UNSPEC))
+ return orig;
+ gcc_assert (GET_CODE (addr) == PLUS);
+ }
+ if (GET_CODE (addr) == PLUS)
+ {
+ rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
+
+ /* Check first to see if this is a constant offset from a @GOTOFF
+ symbol reference. */
+ if (local_symbolic_operand (op0, Pmode)
+ && GET_CODE (op1) == CONST_INT)
+ {
+ if (!TARGET_64BIT)
+ {
+ if (reload_in_progress)
+ regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
+ new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
+ UNSPEC_GOTOFF);
+ new = gen_rtx_PLUS (Pmode, new, op1);
+ new = gen_rtx_CONST (Pmode, new);
+ new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
+
+ if (reg != 0)
+ {
+ emit_move_insn (reg, new);
+ new = reg;
+ }
+ }
+ else
+ {
+ if (INTVAL (op1) < -16*1024*1024
+ || INTVAL (op1) >= 16*1024*1024)
+ {
+ if (!x86_64_immediate_operand (op1, Pmode))
+ op1 = force_reg (Pmode, op1);
+ new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
+ }
+ }
+ }
+ else
+ {
+ base = legitimize_pic_address (XEXP (addr, 0), reg);
+ new = legitimize_pic_address (XEXP (addr, 1),
+ base == reg ? NULL_RTX : reg);
+
+ if (GET_CODE (new) == CONST_INT)
+ new = plus_constant (base, INTVAL (new));
+ else
+ {
+ if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
+ {
+ base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
+ new = XEXP (new, 1);
+ }
+ new = gen_rtx_PLUS (Pmode, base, new);
+ /* APPLE LOCAL begin fix-and-continue 6358507 */
+ if (!legitimate_address_p (Pmode, new, FALSE))
+ new = force_reg (Pmode, new);
+ /* APPLE LOCAL end fix-and-continue 6358507 */
+ }
+ }
+ }
+ }
+ return new;
+}
+
+/* Load the thread pointer. If TO_REG is true, force it into a register. */
+
+static rtx
+get_thread_pointer (int to_reg)
+{
+ rtx tp, reg, insn;
+
+ tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
+ if (!to_reg)
+ return tp;
+
+ reg = gen_reg_rtx (Pmode);
+ insn = gen_rtx_SET (VOIDmode, reg, tp);
+ insn = emit_insn (insn);
+
+ return reg;
+}
+
+/* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
+ false if we expect this to be used for a memory address and true if
+ we expect to load the address into a register. */
+
+static rtx
+legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
+{
+ rtx dest, base, off, pic, tp;
+ int type;
+
+ switch (model)
+ {
+ case TLS_MODEL_GLOBAL_DYNAMIC:
+ dest = gen_reg_rtx (Pmode);
+ tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
+
+ if (TARGET_64BIT && ! TARGET_GNU2_TLS)
+ {
+ rtx rax = gen_rtx_REG (Pmode, 0), insns;
+
+ start_sequence ();
+ emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
+ insns = get_insns ();
+ end_sequence ();
+
+ emit_libcall_block (insns, dest, rax, x);
+ }
+ else if (TARGET_64BIT && TARGET_GNU2_TLS)
+ emit_insn (gen_tls_global_dynamic_64 (dest, x));
+ else
+ emit_insn (gen_tls_global_dynamic_32 (dest, x));
+
+ if (TARGET_GNU2_TLS)
+ {
+ dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
+
+ set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
+ }
+ break;
+
+ case TLS_MODEL_LOCAL_DYNAMIC:
+ base = gen_reg_rtx (Pmode);
+ tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
+
+ if (TARGET_64BIT && ! TARGET_GNU2_TLS)
+ {
+ rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
+
+ start_sequence ();
+ emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
+ insns = get_insns ();
+ end_sequence ();
+
+ note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
+ note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
+ emit_libcall_block (insns, base, rax, note);
+ }
+ else if (TARGET_64BIT && TARGET_GNU2_TLS)
+ emit_insn (gen_tls_local_dynamic_base_64 (base));
+ else
+ emit_insn (gen_tls_local_dynamic_base_32 (base));
+
+ if (TARGET_GNU2_TLS)
+ {
+ rtx x = ix86_tls_module_base ();
+
+ set_unique_reg_note (get_last_insn (), REG_EQUIV,
+ gen_rtx_MINUS (Pmode, x, tp));
+ }
+
+ off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
+ off = gen_rtx_CONST (Pmode, off);
+
+ dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
+
+ if (TARGET_GNU2_TLS)
+ {
+ dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
+
+ set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
+ }
+
+ break;
+
+ case TLS_MODEL_INITIAL_EXEC:
+ if (TARGET_64BIT)
+ {
+ pic = NULL;
+ type = UNSPEC_GOTNTPOFF;
+ }
+ else if (flag_pic)
+ {
+ if (reload_in_progress)
+ regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
+ pic = pic_offset_table_rtx;
+ type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
+ }
+ else if (!TARGET_ANY_GNU_TLS)
+ {
+ pic = gen_reg_rtx (Pmode);
+ emit_insn (gen_set_got (pic));
+ type = UNSPEC_GOTTPOFF;
+ }
+ else
+ {
+ pic = NULL;
+ type = UNSPEC_INDNTPOFF;
+ }
+
+ off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
+ off = gen_rtx_CONST (Pmode, off);
+ if (pic)
+ off = gen_rtx_PLUS (Pmode, pic, off);
+ off = gen_const_mem (Pmode, off);
+ set_mem_alias_set (off, ix86_GOT_alias_set ());
+
+ if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
+ {
+ base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
+ off = force_reg (Pmode, off);
+ return gen_rtx_PLUS (Pmode, base, off);
+ }
+ else
+ {
+ base = get_thread_pointer (true);
+ dest = gen_reg_rtx (Pmode);
+ emit_insn (gen_subsi3 (dest, base, off));
+ }
+ break;
+
+ case TLS_MODEL_LOCAL_EXEC:
+ off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
+ (TARGET_64BIT || TARGET_ANY_GNU_TLS)
+ ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
+ off = gen_rtx_CONST (Pmode, off);
+
+ if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
+ {
+ base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
+ return gen_rtx_PLUS (Pmode, base, off);
+ }
+ else
+ {
+ base = get_thread_pointer (true);
+ dest = gen_reg_rtx (Pmode);
+ emit_insn (gen_subsi3 (dest, base, off));
+ }
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ return dest;
+}
+
+/* Try machine-dependent ways of modifying an illegitimate address
+ to be legitimate. If we find one, return the new, valid address.
+ This macro is used in only one place: `memory_address' in explow.c.
+
+ OLDX is the address as it was before break_out_memory_refs was called.
+ In some cases it is useful to look at this to decide what needs to be done.
+
+ MODE and WIN are passed so that this macro can use
+ GO_IF_LEGITIMATE_ADDRESS.
+
+ It is always safe for this macro to do nothing. It exists to recognize
+ opportunities to optimize the output.
+
+ For the 80386, we handle X+REG by loading X into a register R and
+ using R+REG. R will go in a general reg and indexing will be used.
+ However, if REG is a broken-out memory address or multiplication,
+ nothing needs to be done because REG can certainly go in a general reg.
+
+ When -fpic is used, special handling is needed for symbolic references.
+ See comments by legitimize_pic_address in i386.c for details. */
+
+rtx
+legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
+{
+ int changed = 0;
+ unsigned log;
+
+ if (TARGET_DEBUG_ADDR)
+ {
+ fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
+ GET_MODE_NAME (mode));
+ debug_rtx (x);
+ }
+
+ log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
+ if (log)
+ return legitimize_tls_address (x, log, false);
+ if (GET_CODE (x) == CONST
+ && GET_CODE (XEXP (x, 0)) == PLUS
+ && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
+ && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
+ {
+ rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
+ return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
+ }
+
+ if (flag_pic && SYMBOLIC_CONST (x))
+ return legitimize_pic_address (x, 0);
+ /* APPLE LOCAL begin dynamic-no-pic */
+#if TARGET_MACHO
+ if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
+ return machopic_indirect_data_reference (x, 0);
+#endif
+ /* APPLE LOCAL end dynamic-no-pic */
+
+ /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
+ if (GET_CODE (x) == ASHIFT
+ && GET_CODE (XEXP (x, 1)) == CONST_INT
+ && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
+ {
+ changed = 1;
+ log = INTVAL (XEXP (x, 1));
+ x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
+ GEN_INT (1 << log));
+ }
+
+ if (GET_CODE (x) == PLUS)
+ {
+ /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
+
+ if (GET_CODE (XEXP (x, 0)) == ASHIFT
+ && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
+ && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
+ {
+ changed = 1;
+ log = INTVAL (XEXP (XEXP (x, 0), 1));
+ XEXP (x, 0) = gen_rtx_MULT (Pmode,
+ force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
+ GEN_INT (1 << log));
+ }
+
+ if (GET_CODE (XEXP (x, 1)) == ASHIFT
+ && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
+ && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
+ {
+ changed = 1;
+ log = INTVAL (XEXP (XEXP (x, 1), 1));
+ XEXP (x, 1) = gen_rtx_MULT (Pmode,
+ force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
+ GEN_INT (1 << log));
+ }
+
+ /* Put multiply first if it isn't already. */
+ if (GET_CODE (XEXP (x, 1)) == MULT)
+ {
+ rtx tmp = XEXP (x, 0);
+ XEXP (x, 0) = XEXP (x, 1);
+ XEXP (x, 1) = tmp;
+ changed = 1;
+ }
+
+ /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
+ into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
+ created by virtual register instantiation, register elimination, and
+ similar optimizations. */
+ if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
+ {
+ changed = 1;
+ x = gen_rtx_PLUS (Pmode,
+ gen_rtx_PLUS (Pmode, XEXP (x, 0),
+ XEXP (XEXP (x, 1), 0)),
+ XEXP (XEXP (x, 1), 1));
+ }
+
+ /* Canonicalize
+ (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
+ into (plus (plus (mult (reg) (const)) (reg)) (const)). */
+ else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
+ && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
+ && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
+ && CONSTANT_P (XEXP (x, 1)))
+ {
+ rtx constant;
+ rtx other = NULL_RTX;
+
+ if (GET_CODE (XEXP (x, 1)) == CONST_INT)
+ {
+ constant = XEXP (x, 1);
+ other = XEXP (XEXP (XEXP (x, 0), 1), 1);
+ }
+ else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
+ {
+ constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
+ other = XEXP (x, 1);
+ }
+ else
+ constant = 0;
+
+ if (constant)
+ {
+ changed = 1;
+ x = gen_rtx_PLUS (Pmode,
+ gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
+ XEXP (XEXP (XEXP (x, 0), 1), 0)),
+ plus_constant (other, INTVAL (constant)));
+ }
+ }
+
+ if (changed && legitimate_address_p (mode, x, FALSE))
+ return x;
+
+ if (GET_CODE (XEXP (x, 0)) == MULT)
+ {
+ changed = 1;
+ XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
+ }
+
+ if (GET_CODE (XEXP (x, 1)) == MULT)
+ {
+ changed = 1;
+ XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
+ }
+
+ if (changed
+ && GET_CODE (XEXP (x, 1)) == REG
+ && GET_CODE (XEXP (x, 0)) == REG)
+ return x;
+
+ if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
+ {
+ changed = 1;
+ x = legitimize_pic_address (x, 0);
+ }
+
+ if (changed && legitimate_address_p (mode, x, FALSE))
+ return x;
+
+ if (GET_CODE (XEXP (x, 0)) == REG)
+ {
+ rtx temp = gen_reg_rtx (Pmode);
+ rtx val = force_operand (XEXP (x, 1), temp);
+ if (val != temp)
+ emit_move_insn (temp, val);
+
+ XEXP (x, 1) = temp;
+ return x;
+ }
+
+ else if (GET_CODE (XEXP (x, 1)) == REG)
+ {
+ rtx temp = gen_reg_rtx (Pmode);
+ rtx val = force_operand (XEXP (x, 0), temp);
+ if (val != temp)
+ emit_move_insn (temp, val);
+
+ XEXP (x, 0) = temp;
+ return x;
+ }
+ }
+
+ return x;
+}
+
+/* Print an integer constant expression in assembler syntax. Addition
+ and subtraction are the only arithmetic that may appear in these
+ expressions. FILE is the stdio stream to write to, X is the rtx, and
+ CODE is the operand print code from the output string. */
+
+static void
+output_pic_addr_const (FILE *file, rtx x, int code)
+{
+ char buf[256];
+
+ switch (GET_CODE (x))
+ {
+ case PC:
+ gcc_assert (flag_pic);
+ putc ('.', file);
+ break;
+
+ case SYMBOL_REF:
+ /* APPLE LOCAL begin axe stubs 5571540 */
+ if (! TARGET_MACHO ||
+#if TARGET_MACHO
+ ! darwin_stubs ||
+#endif
+ TARGET_64BIT)
+ /* APPLE LOCAL end axe stubs 5571540 */
+ output_addr_const (file, x);
+ else
+ {
+ const char *name = XSTR (x, 0);
+
+ /* Mark the decl as referenced so that cgraph will output the function. */
+ if (SYMBOL_REF_DECL (x))
+ mark_decl_referenced (SYMBOL_REF_DECL (x));
+
+#if TARGET_MACHO
+ if (MACHOPIC_INDIRECT
+ && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
+ name = machopic_indirection_name (x, /*stub_p=*/true);
+#endif
+ assemble_name (file, name);
+ }
+ if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
+ fputs ("@PLT", file);
+ break;
+
+ case LABEL_REF:
+ x = XEXP (x, 0);
+ /* FALLTHRU */
+ case CODE_LABEL:
+ ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
+ assemble_name (asm_out_file, buf);
+ break;
+
+ case CONST_INT:
+ fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
+ break;
+
+ case CONST:
+ /* This used to output parentheses around the expression,
+ but that does not work on the 386 (either ATT or BSD assembler). */
+ output_pic_addr_const (file, XEXP (x, 0), code);
+ break;
+
+ case CONST_DOUBLE:
+ if (GET_MODE (x) == VOIDmode)
+ {
+ /* We can use %d if the number is <32 bits and positive. */
+ if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
+ fprintf (file, "0x%lx%08lx",
+ (unsigned long) CONST_DOUBLE_HIGH (x),
+ (unsigned long) CONST_DOUBLE_LOW (x));
+ else
+ fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
+ }
+ else
+ /* We can't handle floating point constants;
+ PRINT_OPERAND must handle them. */
+ output_operand_lossage ("floating constant misused");
+ break;
+
+ case PLUS:
+ /* Some assemblers need integer constants to appear first. */
+ if (GET_CODE (XEXP (x, 0)) == CONST_INT)
+ {
+ output_pic_addr_const (file, XEXP (x, 0), code);
+ putc ('+', file);
+ output_pic_addr_const (file, XEXP (x, 1), code);
+ }
+ else
+ {
+ gcc_assert (GET_CODE (XEXP (x, 1)) == CONST_INT);
+ output_pic_addr_const (file, XEXP (x, 1), code);
+ putc ('+', file);
+ output_pic_addr_const (file, XEXP (x, 0), code);
+ }
+ break;
+
+ case MINUS:
+ if (!TARGET_MACHO)
+ putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
+ output_pic_addr_const (file, XEXP (x, 0), code);
+ putc ('-', file);
+ output_pic_addr_const (file, XEXP (x, 1), code);
+ if (!TARGET_MACHO)
+ putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
+ break;
+
+ case UNSPEC:
+ gcc_assert (XVECLEN (x, 0) == 1);
+ output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
+ switch (XINT (x, 1))
+ {
+ case UNSPEC_GOT:
+ fputs ("@GOT", file);
+ break;
+ case UNSPEC_GOTOFF:
+ fputs ("@GOTOFF", file);
+ break;
+ case UNSPEC_GOTPCREL:
+ fputs ("@GOTPCREL(%rip)", file);
+ break;
+ case UNSPEC_GOTTPOFF:
+ /* FIXME: This might be @TPOFF in Sun ld too. */
+ fputs ("@GOTTPOFF", file);
+ break;
+ case UNSPEC_TPOFF:
+ fputs ("@TPOFF", file);
+ break;
+ case UNSPEC_NTPOFF:
+ if (TARGET_64BIT)
+ fputs ("@TPOFF", file);
+ else
+ fputs ("@NTPOFF", file);
+ break;
+ case UNSPEC_DTPOFF:
+ fputs ("@DTPOFF", file);
+ break;
+ case UNSPEC_GOTNTPOFF:
+ if (TARGET_64BIT)
+ fputs ("@GOTTPOFF(%rip)", file);
+ else
+ fputs ("@GOTNTPOFF", file);
+ break;
+ case UNSPEC_INDNTPOFF:
+ fputs ("@INDNTPOFF", file);
+ break;
+ default:
+ output_operand_lossage ("invalid UNSPEC as operand");
+ break;
+ }
+ break;
+
+ default:
+ output_operand_lossage ("invalid expression as operand");
+ }
+}
+
+/* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
+ We need to emit DTP-relative relocations. */
+
+static void
+i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
+{
+ fputs (ASM_LONG, file);
+ output_addr_const (file, x);
+ fputs ("@DTPOFF", file);
+ switch (size)
+ {
+ case 4:
+ break;
+ case 8:
+ fputs (", 0", file);
+ break;
+ default:
+ gcc_unreachable ();
+ }
+}
+
+/* In the name of slightly smaller debug output, and to cater to
+ general assembler lossage, recognize PIC+GOTOFF and turn it back
+ into a direct symbol reference.
+
+ On Darwin, this is necessary to avoid a crash, because Darwin
+ has a different PIC label for each routine but the DWARF debugging
+ information is not associated with any particular routine, so it's
+ necessary to remove references to the PIC label from RTL stored by
+ the DWARF output code. */
+
+static rtx
+ix86_delegitimize_address (rtx orig_x)
+{
+ rtx x = orig_x;
+ /* reg_addend is NULL or a multiple of some register. */
+ rtx reg_addend = NULL_RTX;
+ /* const_addend is NULL or a const_int. */
+ rtx const_addend = NULL_RTX;
+ /* This is the result, or NULL. */
+ rtx result = NULL_RTX;
+
+ if (GET_CODE (x) == MEM)
+ x = XEXP (x, 0);
+
+ if (TARGET_64BIT)
+ {
+ if (GET_CODE (x) != CONST
+ || GET_CODE (XEXP (x, 0)) != UNSPEC
+ || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
+ || GET_CODE (orig_x) != MEM)
+ return orig_x;
+ return XVECEXP (XEXP (x, 0), 0, 0);
+ }
+
+ if (GET_CODE (x) != PLUS
+ || GET_CODE (XEXP (x, 1)) != CONST)
+ return orig_x;
+
+ if (GET_CODE (XEXP (x, 0)) == REG
+ && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
+ /* %ebx + GOT/GOTOFF */
+ ;
+ else if (GET_CODE (XEXP (x, 0)) == PLUS)
+ {
+ /* %ebx + %reg * scale + GOT/GOTOFF */
+ reg_addend = XEXP (x, 0);
+ if (GET_CODE (XEXP (reg_addend, 0)) == REG
+ && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
+ reg_addend = XEXP (reg_addend, 1);
+ else if (GET_CODE (XEXP (reg_addend, 1)) == REG
+ && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
+ reg_addend = XEXP (reg_addend, 0);
+ else
+ return orig_x;
+ if (GET_CODE (reg_addend) != REG
+ && GET_CODE (reg_addend) != MULT
+ && GET_CODE (reg_addend) != ASHIFT)
+ return orig_x;
+ }
+ else
+ return orig_x;
+
+ x = XEXP (XEXP (x, 1), 0);
+ if (GET_CODE (x) == PLUS
+ && GET_CODE (XEXP (x, 1)) == CONST_INT)
+ {
+ const_addend = XEXP (x, 1);
+ x = XEXP (x, 0);
+ }
+
+ if (GET_CODE (x) == UNSPEC
+ && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
+ || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
+ result = XVECEXP (x, 0, 0);
+
+ if (TARGET_MACHO && darwin_local_data_pic (x)
+ && GET_CODE (orig_x) != MEM)
+ result = XEXP (x, 0);
+
+ if (! result)
+ return orig_x;
+
+ if (const_addend)
+ result = gen_rtx_PLUS (Pmode, result, const_addend);
+ if (reg_addend)
+ result = gen_rtx_PLUS (Pmode, reg_addend, result);
+ return result;
+}
+
+static void
+put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
+ int fp, FILE *file)
+{
+ const char *suffix;
+
+ if (mode == CCFPmode || mode == CCFPUmode)
+ {
+ enum rtx_code second_code, bypass_code;
+ ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
+ gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
+ code = ix86_fp_compare_code_to_integer (code);
+ mode = CCmode;
+ }
+ if (reverse)
+ code = reverse_condition (code);
+
+ switch (code)
+ {
+ case EQ:
+ suffix = "e";
+ break;
+ case NE:
+ suffix = "ne";
+ break;
+ case GT:
+ gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
+ suffix = "g";
+ break;
+ case GTU:
+ /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
+ Those same assemblers have the same but opposite lossage on cmov. */
+ gcc_assert (mode == CCmode);
+ suffix = fp ? "nbe" : "a";
+ break;
+ case LT:
+ switch (mode)
+ {
+ case CCNOmode:
+ case CCGOCmode:
+ suffix = "s";
+ break;
+
+ case CCmode:
+ case CCGCmode:
+ suffix = "l";
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+ break;
+ case LTU:
+ gcc_assert (mode == CCmode);
+ suffix = "b";
+ break;
+ case GE:
+ switch (mode)
+ {
+ case CCNOmode:
+ case CCGOCmode:
+ suffix = "ns";
+ break;
+
+ case CCmode:
+ case CCGCmode:
+ suffix = "ge";
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+ break;
+ case GEU:
+ /* ??? As above. */
+ gcc_assert (mode == CCmode);
+ suffix = fp ? "nb" : "ae";
+ break;
+ case LE:
+ gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
+ suffix = "le";
+ break;
+ case LEU:
+ gcc_assert (mode == CCmode);
+ suffix = "be";
+ break;
+ case UNORDERED:
+ suffix = fp ? "u" : "p";
+ break;
+ case ORDERED:
+ suffix = fp ? "nu" : "np";
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ fputs (suffix, file);
+}
+
+/* Print the name of register X to FILE based on its machine mode and number.
+ If CODE is 'w', pretend the mode is HImode.
+ If CODE is 'b', pretend the mode is QImode.
+ If CODE is 'k', pretend the mode is SImode.
+ If CODE is 'q', pretend the mode is DImode.
+ If CODE is 'h', pretend the reg is the 'high' byte register.
+ If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
+
+void
+print_reg (rtx x, int code, FILE *file)
+{
+ gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
+ && REGNO (x) != FRAME_POINTER_REGNUM
+ && REGNO (x) != FLAGS_REG
+ && REGNO (x) != FPSR_REG);
+
+ if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
+ putc ('%', file);
+
+ if (code == 'w' || MMX_REG_P (x))
+ code = 2;
+ else if (code == 'b')
+ code = 1;
+ else if (code == 'k')
+ code = 4;
+ else if (code == 'q')
+ code = 8;
+ else if (code == 'y')
+ code = 3;
+ else if (code == 'h')
+ code = 0;
+ else
+ code = GET_MODE_SIZE (GET_MODE (x));
+
+ /* Irritatingly, AMD extended registers use different naming convention
+ from the normal registers. */
+ if (REX_INT_REG_P (x))
+ {
+ gcc_assert (TARGET_64BIT);
+ switch (code)
+ {
+ case 0:
+ error ("extended registers have no high halves");
+ break;
+ case 1:
+ fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
+ break;
+ case 2:
+ fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
+ break;
+ case 4:
+ fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
+ break;
+ case 8:
+ fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
+ break;
+ default:
+ error ("unsupported operand size for extended register");
+ break;
+ }
+ return;
+ }
+ switch (code)
+ {
+ case 3:
+ if (STACK_TOP_P (x))
+ {
+ fputs ("st(0)", file);
+ break;
+ }
+ /* FALLTHRU */
+ case 8:
+ case 4:
+ case 12:
+ if (! ANY_FP_REG_P (x))
+ putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
+ /* FALLTHRU */
+ case 16:
+ case 2:
+ normal:
+ fputs (hi_reg_name[REGNO (x)], file);
+ break;
+ case 1:
+ if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
+ goto normal;
+ fputs (qi_reg_name[REGNO (x)], file);
+ break;
+ case 0:
+ if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
+ goto normal;
+ fputs (qi_high_reg_name[REGNO (x)], file);
+ break;
+ default:
+ gcc_unreachable ();
+ }
+}
+
+/* Locate some local-dynamic symbol still in use by this function
+ so that we can print its name in some tls_local_dynamic_base
+ pattern. */
+
+static const char *
+get_some_local_dynamic_name (void)
+{
+ rtx insn;
+
+ if (cfun->machine->some_ld_name)
+ return cfun->machine->some_ld_name;
+
+ for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
+ if (INSN_P (insn)
+ && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
+ return cfun->machine->some_ld_name;
+
+ gcc_unreachable ();
+}
+
+static int
+get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
+{
+ rtx x = *px;
+
+ if (GET_CODE (x) == SYMBOL_REF
+ && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
+ {
+ cfun->machine->some_ld_name = XSTR (x, 0);
+ return 1;
+ }
+
+ return 0;
+}
+
+/* Meaning of CODE:
+ L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
+ C -- print opcode suffix for set/cmov insn.
+ c -- like C, but print reversed condition
+ F,f -- likewise, but for floating-point.
+ O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
+ otherwise nothing
+ R -- print the prefix for register names.
+ z -- print the opcode suffix for the size of the current operand.
+ * -- print a star (in certain assembler syntax)
+ A -- print an absolute memory reference.
+ w -- print the operand as if it's a "word" (HImode) even if it isn't.
+ s -- print a shift double count, followed by the assemblers argument
+ delimiter.
+ b -- print the QImode name of the register for the indicated operand.
+ %b0 would print %al if operands[0] is reg 0.
+ w -- likewise, print the HImode name of the register.
+ k -- likewise, print the SImode name of the register.
+ q -- likewise, print the DImode name of the register.
+ h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
+ y -- print "st(0)" instead of "st" as a register.
+ D -- print condition for SSE cmp instruction.
+ P -- if PIC, print an @PLT suffix.
+ X -- don't print any sort of PIC '@' suffix for a symbol.
+ & -- print some in-use local-dynamic symbol name.
+ H -- print a memory address offset by 8; used for sse high-parts
+ */
+
+void
+print_operand (FILE *file, rtx x, int code)
+{
+ if (code)
+ {
+ switch (code)
+ {
+ case '*':
+ if (ASSEMBLER_DIALECT == ASM_ATT)
+ putc ('*', file);
+ return;
+
+ case '&':
+ assemble_name (file, get_some_local_dynamic_name ());
+ return;
+
+ case 'A':
+ switch (ASSEMBLER_DIALECT)
+ {
+ case ASM_ATT:
+ putc ('*', file);
+ break;
+
+ case ASM_INTEL:
+ /* Intel syntax. For absolute addresses, registers should not
+ be surrounded by braces. */
+ if (GET_CODE (x) != REG)
+ {
+ putc ('[', file);
+ PRINT_OPERAND (file, x, 0);
+ putc (']', file);
+ return;
+ }
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ PRINT_OPERAND (file, x, 0);
+ return;
+
+
+ case 'L':
+ if (ASSEMBLER_DIALECT == ASM_ATT)
+ putc ('l', file);
+ return;
+
+ case 'W':
+ if (ASSEMBLER_DIALECT == ASM_ATT)
+ putc ('w', file);
+ return;
+
+ case 'B':
+ if (ASSEMBLER_DIALECT == ASM_ATT)
+ putc ('b', file);
+ return;
+
+ case 'Q':
+ if (ASSEMBLER_DIALECT == ASM_ATT)
+ putc ('l', file);
+ return;
+
+ case 'S':
+ if (ASSEMBLER_DIALECT == ASM_ATT)
+ putc ('s', file);
+ return;
+
+ case 'T':
+ if (ASSEMBLER_DIALECT == ASM_ATT)
+ putc ('t', file);
+ return;
+
+ case 'z':
+ /* 387 opcodes don't get size suffixes if the operands are
+ registers. */
+ if (STACK_REG_P (x))
+ return;
+
+ /* Likewise if using Intel opcodes. */
+ if (ASSEMBLER_DIALECT == ASM_INTEL)
+ return;
+
+ /* This is the size of op from size of operand. */
+ switch (GET_MODE_SIZE (GET_MODE (x)))
+ {
+ case 2:
+#ifdef HAVE_GAS_FILDS_FISTS
+ putc ('s', file);
+#endif
+ return;
+
+ case 4:
+ if (GET_MODE (x) == SFmode)
+ {
+ putc ('s', file);
+ return;
+ }
+ else
+ putc ('l', file);
+ return;
+
+ case 12:
+ case 16:
+ putc ('t', file);
+ return;
+
+ case 8:
+ if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
+ {
+#ifdef GAS_MNEMONICS
+ putc ('q', file);
+#else
+ putc ('l', file);
+ putc ('l', file);
+#endif
+ }
+ else
+ putc ('l', file);
+ return;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ case 'b':
+ case 'w':
+ case 'k':
+ case 'q':
+ case 'h':
+ case 'y':
+ case 'X':
+ case 'P':
+ break;
+
+ case 's':
+ if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
+ {
+ PRINT_OPERAND (file, x, 0);
+ putc (',', file);
+ }
+ return;
+
+ case 'D':
+ /* Little bit of braindamage here. The SSE compare instructions
+ does use completely different names for the comparisons that the
+ fp conditional moves. */
+ switch (GET_CODE (x))
+ {
+ case EQ:
+ case UNEQ:
+ fputs ("eq", file);
+ break;
+ case LT:
+ case UNLT:
+ fputs ("lt", file);
+ break;
+ case LE:
+ case UNLE:
+ fputs ("le", file);
+ break;
+ case UNORDERED:
+ fputs ("unord", file);
+ break;
+ case NE:
+ case LTGT:
+ fputs ("neq", file);
+ break;
+ case UNGE:
+ case GE:
+ fputs ("nlt", file);
+ break;
+ case UNGT:
+ case GT:
+ fputs ("nle", file);
+ break;
+ case ORDERED:
+ fputs ("ord", file);
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ return;
+ case 'O':
+#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
+ if (ASSEMBLER_DIALECT == ASM_ATT)
+ {
+ switch (GET_MODE (x))
+ {
+ case HImode: putc ('w', file); break;
+ case SImode:
+ case SFmode: putc ('l', file); break;
+ case DImode:
+ case DFmode: putc ('q', file); break;
+ default: gcc_unreachable ();
+ }
+ putc ('.', file);
+ }
+#endif
+ return;
+ case 'C':
+ put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
+ return;
+ case 'F':
+#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
+ if (ASSEMBLER_DIALECT == ASM_ATT)
+ putc ('.', file);
+#endif
+ put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
+ return;
+
+ /* Like above, but reverse condition */
+ case 'c':
+ /* Check to see if argument to %c is really a constant
+ and not a condition code which needs to be reversed. */
+ if (!COMPARISON_P (x))
+ {
+ output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
+ return;
+ }
+ put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
+ return;
+ case 'f':
+#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
+ if (ASSEMBLER_DIALECT == ASM_ATT)
+ putc ('.', file);
+#endif
+ put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
+ return;
+
+ case 'H':
+ /* It doesn't actually matter what mode we use here, as we're
+ only going to use this for printing. */
+ x = adjust_address_nv (x, DImode, 8);
+ break;
+
+ case '+':
+ {
+ rtx x;
+
+ if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
+ return;
+
+ x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
+ if (x)
+ {
+ int pred_val = INTVAL (XEXP (x, 0));
+
+ if (pred_val < REG_BR_PROB_BASE * 45 / 100
+ || pred_val > REG_BR_PROB_BASE * 55 / 100)
+ {
+ int taken = pred_val > REG_BR_PROB_BASE / 2;
+ int cputaken = final_forward_branch_p (current_output_insn) == 0;
+
+ /* Emit hints only in the case default branch prediction
+ heuristics would fail. */
+ if (taken != cputaken)
+ {
+ /* We use 3e (DS) prefix for taken branches and
+ 2e (CS) prefix for not taken branches. */
+ if (taken)
+ fputs ("ds ; ", file);
+ else
+ fputs ("cs ; ", file);
+ }
+ }
+ }
+ return;
+ }
+ default:
+ output_operand_lossage ("invalid operand code '%c'", code);
+ }
+ }
+
+ if (GET_CODE (x) == REG)
+ print_reg (x, code, file);
+
+ else if (GET_CODE (x) == MEM)
+ {
+ /* No `byte ptr' prefix for call instructions. */
+ if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
+ {
+ const char * size;
+ switch (GET_MODE_SIZE (GET_MODE (x)))
+ {
+ case 1: size = "BYTE"; break;
+ case 2: size = "WORD"; break;
+ case 4: size = "DWORD"; break;
+ case 8: size = "QWORD"; break;
+ case 12: size = "XWORD"; break;
+ case 16: size = "XMMWORD"; break;
+ default:
+ gcc_unreachable ();
+ }
+
+ /* Check for explicit size override (codes 'b', 'w' and 'k') */
+ if (code == 'b')
+ size = "BYTE";
+ else if (code == 'w')
+ size = "WORD";
+ else if (code == 'k')
+ size = "DWORD";
+
+ fputs (size, file);
+ fputs (" PTR ", file);
+ }
+
+ x = XEXP (x, 0);
+ /* Avoid (%rip) for call operands. */
+ if (CONSTANT_ADDRESS_P (x) && code == 'P'
+ && GET_CODE (x) != CONST_INT)
+ output_addr_const (file, x);
+ else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
+ output_operand_lossage ("invalid constraints for operand");
+ else
+ output_address (x);
+ }
+
+ else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
+ {
+ REAL_VALUE_TYPE r;
+ long l;
+
+ REAL_VALUE_FROM_CONST_DOUBLE (r, x);
+ REAL_VALUE_TO_TARGET_SINGLE (r, l);
+
+ if (ASSEMBLER_DIALECT == ASM_ATT)
+ putc ('$', file);
+ fprintf (file, "0x%08lx", l);
+ }
+
+ /* These float cases don't actually occur as immediate operands. */
+ else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
+ {
+ char dstr[30];
+
+ real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
+ fprintf (file, "%s", dstr);
+ }
+
+ else if (GET_CODE (x) == CONST_DOUBLE
+ && GET_MODE (x) == XFmode)
+ {
+ char dstr[30];
+
+ real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
+ fprintf (file, "%s", dstr);
+ }
+
+ else
+ {
+ /* We have patterns that allow zero sets of memory, for instance.
+ In 64-bit mode, we should probably support all 8-byte vectors,
+ since we can in fact encode that into an immediate. */
+ if (GET_CODE (x) == CONST_VECTOR)
+ {
+ gcc_assert (x == CONST0_RTX (GET_MODE (x)));
+ x = const0_rtx;
+ }
+
+ if (code != 'P')
+ {
+ if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
+ {
+ if (ASSEMBLER_DIALECT == ASM_ATT)
+ putc ('$', file);
+ }
+ else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
+ || GET_CODE (x) == LABEL_REF)
+ {
+ if (ASSEMBLER_DIALECT == ASM_ATT)
+ putc ('$', file);
+ else
+ fputs ("OFFSET FLAT:", file);
+ }
+ }
+ if (GET_CODE (x) == CONST_INT)
+ fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
+ /* APPLE LOCAL begin dynamic-no-pic */
+ else if (flag_pic || (TARGET_MACHO && MACHOPIC_INDIRECT))
+ /* APPLE LOCAL end dynamic-no-pic */
+ output_pic_addr_const (file, x, code);
+ else
+ output_addr_const (file, x);
+ }
+}
+
+/* Print a memory operand whose address is ADDR. */
+
+void
+print_operand_address (FILE *file, rtx addr)
+{
+ struct ix86_address parts;
+ rtx base, index, disp;
+ int scale;
+ int ok = ix86_decompose_address (addr, &parts);
+
+ gcc_assert (ok);
+
+ base = parts.base;
+ index = parts.index;
+ disp = parts.disp;
+ scale = parts.scale;
+
+ switch (parts.seg)
+ {
+ case SEG_DEFAULT:
+ break;
+ case SEG_FS:
+ case SEG_GS:
+ if (USER_LABEL_PREFIX[0] == 0)
+ putc ('%', file);
+ fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ if (!base && !index)
+ {
+ /* Displacement only requires special attention. */
+
+ if (GET_CODE (disp) == CONST_INT)
+ {
+ if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
+ {
+ if (USER_LABEL_PREFIX[0] == 0)
+ putc ('%', file);
+ fputs ("ds:", file);
+ }
+ fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
+ }
+ else if (flag_pic)
+ output_pic_addr_const (file, disp, 0);
+ else
+ output_addr_const (file, disp);
+
+ /* Use one byte shorter RIP relative addressing for 64bit mode. */
+ if (TARGET_64BIT)
+ {
+ if (GET_CODE (disp) == CONST
+ && GET_CODE (XEXP (disp, 0)) == PLUS
+ && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
+ disp = XEXP (XEXP (disp, 0), 0);
+ if (GET_CODE (disp) == LABEL_REF
+ || (GET_CODE (disp) == SYMBOL_REF
+ && SYMBOL_REF_TLS_MODEL (disp) == 0))
+ fputs ("(%rip)", file);
+ }
+ }
+ else
+ {
+ if (ASSEMBLER_DIALECT == ASM_ATT)
+ {
+ if (disp)
+ {
+ if (flag_pic)
+ output_pic_addr_const (file, disp, 0);
+ else if (GET_CODE (disp) == LABEL_REF)
+ output_asm_label (disp);
+ else
+ output_addr_const (file, disp);
+ }
+
+ putc ('(', file);
+ if (base)
+ print_reg (base, 0, file);
+ if (index)
+ {
+ putc (',', file);
+ print_reg (index, 0, file);
+ if (scale != 1)
+ fprintf (file, ",%d", scale);
+ }
+ putc (')', file);
+ }
+ else
+ {
+ rtx offset = NULL_RTX;
+
+ if (disp)
+ {
+ /* Pull out the offset of a symbol; print any symbol itself. */
+ if (GET_CODE (disp) == CONST
+ && GET_CODE (XEXP (disp, 0)) == PLUS
+ && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
+ {
+ offset = XEXP (XEXP (disp, 0), 1);
+ disp = gen_rtx_CONST (VOIDmode,
+ XEXP (XEXP (disp, 0), 0));
+ }
+
+ if (flag_pic)
+ output_pic_addr_const (file, disp, 0);
+ else if (GET_CODE (disp) == LABEL_REF)
+ output_asm_label (disp);
+ else if (GET_CODE (disp) == CONST_INT)
+ offset = disp;
+ else
+ output_addr_const (file, disp);
+ }
+
+ putc ('[', file);
+ if (base)
+ {
+ print_reg (base, 0, file);
+ if (offset)
+ {
+ if (INTVAL (offset) >= 0)
+ putc ('+', file);
+ fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
+ }
+ }
+ else if (offset)
+ fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
+ else
+ putc ('0', file);
+
+ if (index)
+ {
+ putc ('+', file);
+ print_reg (index, 0, file);
+ if (scale != 1)
+ fprintf (file, "*%d", scale);
+ }
+ putc (']', file);
+ }
+ }
+}
+
+bool
+output_addr_const_extra (FILE *file, rtx x)
+{
+ rtx op;
+
+ if (GET_CODE (x) != UNSPEC)
+ return false;
+
+ op = XVECEXP (x, 0, 0);
+ switch (XINT (x, 1))
+ {
+ case UNSPEC_GOTTPOFF:
+ output_addr_const (file, op);
+ /* FIXME: This might be @TPOFF in Sun ld. */
+ fputs ("@GOTTPOFF", file);
+ break;
+ case UNSPEC_TPOFF:
+ output_addr_const (file, op);
+ fputs ("@TPOFF", file);
+ break;
+ case UNSPEC_NTPOFF:
+ output_addr_const (file, op);
+ if (TARGET_64BIT)
+ fputs ("@TPOFF", file);
+ else
+ fputs ("@NTPOFF", file);
+ break;
+ case UNSPEC_DTPOFF:
+ output_addr_const (file, op);
+ fputs ("@DTPOFF", file);
+ break;
+ case UNSPEC_GOTNTPOFF:
+ output_addr_const (file, op);
+ if (TARGET_64BIT)
+ fputs ("@GOTTPOFF(%rip)", file);
+ else
+ fputs ("@GOTNTPOFF", file);
+ break;
+ case UNSPEC_INDNTPOFF:
+ output_addr_const (file, op);
+ fputs ("@INDNTPOFF", file);
+ break;
+
+ default:
+ return false;
+ }
+
+ return true;
+}
+
+/* Split one or more DImode RTL references into pairs of SImode
+ references. The RTL can be REG, offsettable MEM, integer constant, or
+ CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
+ split and "num" is its length. lo_half and hi_half are output arrays
+ that parallel "operands". */
+
+void
+split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
+{
+ while (num--)
+ {
+ rtx op = operands[num];
+
+ /* simplify_subreg refuse to split volatile memory addresses,
+ but we still have to handle it. */
+ if (GET_CODE (op) == MEM)
+ {
+ lo_half[num] = adjust_address (op, SImode, 0);
+ hi_half[num] = adjust_address (op, SImode, 4);
+ }
+ else
+ {
+ lo_half[num] = simplify_gen_subreg (SImode, op,
+ GET_MODE (op) == VOIDmode
+ ? DImode : GET_MODE (op), 0);
+ hi_half[num] = simplify_gen_subreg (SImode, op,
+ GET_MODE (op) == VOIDmode
+ ? DImode : GET_MODE (op), 4);
+ }
+ }
+}
+/* Split one or more TImode RTL references into pairs of DImode
+ references. The RTL can be REG, offsettable MEM, integer constant, or
+ CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
+ split and "num" is its length. lo_half and hi_half are output arrays
+ that parallel "operands". */
+
+void
+split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
+{
+ while (num--)
+ {
+ rtx op = operands[num];
+
+ /* simplify_subreg refuse to split volatile memory addresses, but we
+ still have to handle it. */
+ if (GET_CODE (op) == MEM)
+ {
+ lo_half[num] = adjust_address (op, DImode, 0);
+ hi_half[num] = adjust_address (op, DImode, 8);
+ }
+ else
+ {
+ lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
+ hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
+ }
+ }
+}
+
+/* Output code to perform a 387 binary operation in INSN, one of PLUS,
+ MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
+ is the expression of the binary operation. The output may either be
+ emitted here, or returned to the caller, like all output_* functions.
+
+ There is no guarantee that the operands are the same mode, as they
+ might be within FLOAT or FLOAT_EXTEND expressions. */
+
+#ifndef SYSV386_COMPAT
+/* Set to 1 for compatibility with brain-damaged assemblers. No-one
+ wants to fix the assemblers because that causes incompatibility
+ with gcc. No-one wants to fix gcc because that causes
+ incompatibility with assemblers... You can use the option of
+ -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
+#define SYSV386_COMPAT 1
+#endif
+
+const char *
+output_387_binary_op (rtx insn, rtx *operands)
+{
+ static char buf[30];
+ const char *p;
+ const char *ssep;
+ int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
+
+#ifdef ENABLE_CHECKING
+ /* Even if we do not want to check the inputs, this documents input
+ constraints. Which helps in understanding the following code. */
+ if (STACK_REG_P (operands[0])
+ && ((REG_P (operands[1])
+ && REGNO (operands[0]) == REGNO (operands[1])
+ && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
+ || (REG_P (operands[2])
+ && REGNO (operands[0]) == REGNO (operands[2])
+ && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
+ && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
+ ; /* ok */
+ else
+ gcc_assert (is_sse);
+#endif
+
+ switch (GET_CODE (operands[3]))
+ {
+ case PLUS:
+ if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
+ || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
+ p = "fiadd";
+ else
+ p = "fadd";
+ ssep = "add";
+ break;
+
+ case MINUS:
+ if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
+ || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
+ p = "fisub";
+ else
+ p = "fsub";
+ ssep = "sub";
+ break;
+
+ case MULT:
+ if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
+ || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
+ p = "fimul";
+ else
+ p = "fmul";
+ ssep = "mul";
+ break;
+
+ case DIV:
+ if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
+ || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
+ p = "fidiv";
+ else
+ p = "fdiv";
+ ssep = "div";
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ if (is_sse)
+ {
+ strcpy (buf, ssep);
+ if (GET_MODE (operands[0]) == SFmode)
+ strcat (buf, "ss\t{%2, %0|%0, %2}");
+ else
+ strcat (buf, "sd\t{%2, %0|%0, %2}");
+ return buf;
+ }
+ strcpy (buf, p);
+
+ switch (GET_CODE (operands[3]))
+ {
+ case MULT:
+ case PLUS:
+ if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
+ {
+ rtx temp = operands[2];
+ operands[2] = operands[1];
+ operands[1] = temp;
+ }
+
+ /* know operands[0] == operands[1]. */
+
+ if (GET_CODE (operands[2]) == MEM)
+ {
+ p = "%z2\t%2";
+ break;
+ }
+
+ if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
+ {
+ if (STACK_TOP_P (operands[0]))
+ /* How is it that we are storing to a dead operand[2]?
+ Well, presumably operands[1] is dead too. We can't
+ store the result to st(0) as st(0) gets popped on this
+ instruction. Instead store to operands[2] (which I
+ think has to be st(1)). st(1) will be popped later.
+ gcc <= 2.8.1 didn't have this check and generated
+ assembly code that the Unixware assembler rejected. */
+ p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
+ else
+ p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
+ break;
+ }
+
+ if (STACK_TOP_P (operands[0]))
+ p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
+ else
+ p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
+ break;
+
+ case MINUS:
+ case DIV:
+ if (GET_CODE (operands[1]) == MEM)
+ {
+ p = "r%z1\t%1";
+ break;
+ }
+
+ if (GET_CODE (operands[2]) == MEM)
+ {
+ p = "%z2\t%2";
+ break;
+ }
+
+ if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
+ {
+#if SYSV386_COMPAT
+ /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
+ derived assemblers, confusingly reverse the direction of
+ the operation for fsub{r} and fdiv{r} when the
+ destination register is not st(0). The Intel assembler
+ doesn't have this brain damage. Read !SYSV386_COMPAT to
+ figure out what the hardware really does. */
+ if (STACK_TOP_P (operands[0]))
+ p = "{p\t%0, %2|rp\t%2, %0}";
+ else
+ p = "{rp\t%2, %0|p\t%0, %2}";
+#else
+ if (STACK_TOP_P (operands[0]))
+ /* As above for fmul/fadd, we can't store to st(0). */
+ p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
+ else
+ p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
+#endif
+ break;
+ }
+
+ if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
+ {
+#if SYSV386_COMPAT
+ if (STACK_TOP_P (operands[0]))
+ p = "{rp\t%0, %1|p\t%1, %0}";
+ else
+ p = "{p\t%1, %0|rp\t%0, %1}";
+#else
+ if (STACK_TOP_P (operands[0]))
+ p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
+ else
+ p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
+#endif
+ break;
+ }
+
+ if (STACK_TOP_P (operands[0]))
+ {
+ if (STACK_TOP_P (operands[1]))
+ p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
+ else
+ p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
+ break;
+ }
+ else if (STACK_TOP_P (operands[1]))
+ {
+#if SYSV386_COMPAT
+ p = "{\t%1, %0|r\t%0, %1}";
+#else
+ p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
+#endif
+ }
+ else
+ {
+#if SYSV386_COMPAT
+ p = "{r\t%2, %0|\t%0, %2}";
+#else
+ p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
+#endif
+ }
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ strcat (buf, p);
+ return buf;
+}
+
+/* Return needed mode for entity in optimize_mode_switching pass. */
+
+int
+ix86_mode_needed (int entity, rtx insn)
+{
+ enum attr_i387_cw mode;
+
+ /* The mode UNINITIALIZED is used to store control word after a
+ function call or ASM pattern. The mode ANY specify that function
+ has no requirements on the control word and make no changes in the
+ bits we are interested in. */
+
+ if (CALL_P (insn)
+ || (NONJUMP_INSN_P (insn)
+ && (asm_noperands (PATTERN (insn)) >= 0
+ || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
+ return I387_CW_UNINITIALIZED;
+
+ if (recog_memoized (insn) < 0)
+ return I387_CW_ANY;
+
+ mode = get_attr_i387_cw (insn);
+
+ switch (entity)
+ {
+ case I387_TRUNC:
+ if (mode == I387_CW_TRUNC)
+ return mode;
+ break;
+
+ case I387_FLOOR:
+ if (mode == I387_CW_FLOOR)
+ return mode;
+ break;
+
+ case I387_CEIL:
+ if (mode == I387_CW_CEIL)
+ return mode;
+ break;
+
+ case I387_MASK_PM:
+ if (mode == I387_CW_MASK_PM)
+ return mode;
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ return I387_CW_ANY;
+}
+
+/* Output code to initialize control word copies used by trunc?f?i and
+ rounding patterns. CURRENT_MODE is set to current control word,
+ while NEW_MODE is set to new control word. */
+
+void
+emit_i387_cw_initialization (int mode)
+{
+ rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
+ rtx new_mode;
+
+ int slot;
+
+ rtx reg = gen_reg_rtx (HImode);
+
+ emit_insn (gen_x86_fnstcw_1 (stored_mode));
+ emit_move_insn (reg, stored_mode);
+
+ if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
+ {
+ switch (mode)
+ {
+ case I387_CW_TRUNC:
+ /* round toward zero (truncate) */
+ emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
+ slot = SLOT_CW_TRUNC;
+ break;
+
+ case I387_CW_FLOOR:
+ /* round down toward -oo */
+ emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
+ emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
+ slot = SLOT_CW_FLOOR;
+ break;
+
+ case I387_CW_CEIL:
+ /* round up toward +oo */
+ emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
+ emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
+ slot = SLOT_CW_CEIL;
+ break;
+
+ case I387_CW_MASK_PM:
+ /* mask precision exception for nearbyint() */
+ emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
+ slot = SLOT_CW_MASK_PM;
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+ }
+ else
+ {
+ switch (mode)
+ {
+ case I387_CW_TRUNC:
+ /* round toward zero (truncate) */
+ emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
+ slot = SLOT_CW_TRUNC;
+ break;
+
+ case I387_CW_FLOOR:
+ /* round down toward -oo */
+ emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
+ slot = SLOT_CW_FLOOR;
+ break;
+
+ case I387_CW_CEIL:
+ /* round up toward +oo */
+ emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
+ slot = SLOT_CW_CEIL;
+ break;
+
+ case I387_CW_MASK_PM:
+ /* mask precision exception for nearbyint() */
+ emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
+ slot = SLOT_CW_MASK_PM;
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+ }
+
+ gcc_assert (slot < MAX_386_STACK_LOCALS);
+
+ new_mode = assign_386_stack_local (HImode, slot);
+ emit_move_insn (new_mode, reg);
+}
+
+/* Output code for INSN to convert a float to a signed int. OPERANDS
+ are the insn operands. The output may be [HSD]Imode and the input
+ operand may be [SDX]Fmode. */
+
+const char *
+output_fix_trunc (rtx insn, rtx *operands, int fisttp)
+{
+ int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
+ int dimode_p = GET_MODE (operands[0]) == DImode;
+ int round_mode = get_attr_i387_cw (insn);
+
+ /* Jump through a hoop or two for DImode, since the hardware has no
+ non-popping instruction. We used to do this a different way, but
+ that was somewhat fragile and broke with post-reload splitters. */
+ if ((dimode_p || fisttp) && !stack_top_dies)
+ output_asm_insn ("fld\t%y1", operands);
+
+ gcc_assert (STACK_TOP_P (operands[1]));
+ gcc_assert (GET_CODE (operands[0]) == MEM);
+
+ if (fisttp)
+ output_asm_insn ("fisttp%z0\t%0", operands);
+ else
+ {
+ if (round_mode != I387_CW_ANY)
+ output_asm_insn ("fldcw\t%3", operands);
+ if (stack_top_dies || dimode_p)
+ output_asm_insn ("fistp%z0\t%0", operands);
+ else
+ output_asm_insn ("fist%z0\t%0", operands);
+ if (round_mode != I387_CW_ANY)
+ output_asm_insn ("fldcw\t%2", operands);
+ }
+
+ return "";
+}
+
+/* Output code for x87 ffreep insn. The OPNO argument, which may only
+ have the values zero or one, indicates the ffreep insn's operand
+ from the OPERANDS array. */
+
+static const char *
+output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
+{
+ if (TARGET_USE_FFREEP)
+#if HAVE_AS_IX86_FFREEP
+ return opno ? "ffreep\t%y1" : "ffreep\t%y0";
+#else
+ switch (REGNO (operands[opno]))
+ {
+ case FIRST_STACK_REG + 0: return ".word\t0xc0df";
+ case FIRST_STACK_REG + 1: return ".word\t0xc1df";
+ case FIRST_STACK_REG + 2: return ".word\t0xc2df";
+ case FIRST_STACK_REG + 3: return ".word\t0xc3df";
+ case FIRST_STACK_REG + 4: return ".word\t0xc4df";
+ case FIRST_STACK_REG + 5: return ".word\t0xc5df";
+ case FIRST_STACK_REG + 6: return ".word\t0xc6df";
+ case FIRST_STACK_REG + 7: return ".word\t0xc7df";
+ }
+#endif
+
+ return opno ? "fstp\t%y1" : "fstp\t%y0";
+}
+
+
+/* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
+ should be used. UNORDERED_P is true when fucom should be used. */
+
+const char *
+output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
+{
+ int stack_top_dies;
+ rtx cmp_op0, cmp_op1;
+ int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
+
+ if (eflags_p)
+ {
+ cmp_op0 = operands[0];
+ cmp_op1 = operands[1];
+ }
+ else
+ {
+ cmp_op0 = operands[1];
+ cmp_op1 = operands[2];
+ }
+
+ if (is_sse)
+ {
+ if (GET_MODE (operands[0]) == SFmode)
+ if (unordered_p)
+ return "ucomiss\t{%1, %0|%0, %1}";
+ else
+ return "comiss\t{%1, %0|%0, %1}";
+ else
+ if (unordered_p)
+ return "ucomisd\t{%1, %0|%0, %1}";
+ else
+ return "comisd\t{%1, %0|%0, %1}";
+ }
+
+ gcc_assert (STACK_TOP_P (cmp_op0));
+
+ stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
+
+ if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
+ {
+ if (stack_top_dies)
+ {
+ output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
+ return output_387_ffreep (operands, 1);
+ }
+ else
+ return "ftst\n\tfnstsw\t%0";
+ }
+
+ if (STACK_REG_P (cmp_op1)
+ && stack_top_dies
+ && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
+ && REGNO (cmp_op1) != FIRST_STACK_REG)
+ {
+ /* If both the top of the 387 stack dies, and the other operand
+ is also a stack register that dies, then this must be a
+ `fcompp' float compare */
+
+ if (eflags_p)
+ {
+ /* There is no double popping fcomi variant. Fortunately,
+ eflags is immune from the fstp's cc clobbering. */
+ if (unordered_p)
+ output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
+ else
+ output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
+ return output_387_ffreep (operands, 0);
+ }
+ else
+ {
+ if (unordered_p)
+ return "fucompp\n\tfnstsw\t%0";
+ else
+ return "fcompp\n\tfnstsw\t%0";
+ }
+ }
+ else
+ {
+ /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
+
+ static const char * const alt[16] =
+ {
+ "fcom%z2\t%y2\n\tfnstsw\t%0",
+ "fcomp%z2\t%y2\n\tfnstsw\t%0",
+ "fucom%z2\t%y2\n\tfnstsw\t%0",
+ "fucomp%z2\t%y2\n\tfnstsw\t%0",
+
+ "ficom%z2\t%y2\n\tfnstsw\t%0",
+ "ficomp%z2\t%y2\n\tfnstsw\t%0",
+ NULL,
+ NULL,
+
+ "fcomi\t{%y1, %0|%0, %y1}",
+ "fcomip\t{%y1, %0|%0, %y1}",
+ "fucomi\t{%y1, %0|%0, %y1}",
+ "fucomip\t{%y1, %0|%0, %y1}",
+
+ NULL,
+ NULL,
+ NULL,
+ NULL
+ };
+
+ int mask;
+ const char *ret;
+
+ mask = eflags_p << 3;
+ mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
+ mask |= unordered_p << 1;
+ mask |= stack_top_dies;
+
+ gcc_assert (mask < 16);
+ ret = alt[mask];
+ gcc_assert (ret);
+
+ return ret;
+ }
+}
+
+void
+ix86_output_addr_vec_elt (FILE *file, int value)
+{
+ const char *directive = ASM_LONG;
+
+#ifdef ASM_QUAD
+ if (TARGET_64BIT)
+ directive = ASM_QUAD;
+#else
+ gcc_assert (!TARGET_64BIT);
+#endif
+
+ fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
+}
+
+void
+ix86_output_addr_diff_elt (FILE *file, int value, int rel)
+{
+ if (TARGET_64BIT)
+ fprintf (file, "%s%s%d-%s%d\n",
+ ASM_LONG, LPREFIX, value, LPREFIX, rel);
+ else if (HAVE_AS_GOTOFF_IN_DATA)
+ fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
+#if TARGET_MACHO
+ else if (TARGET_MACHO)
+ {
+ fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
+ machopic_output_function_base_name (file);
+ fprintf(file, "\n");
+ }
+#endif
+ else
+ asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
+ ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
+}
+
+/* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
+ for the target. */
+
+void
+ix86_expand_clear (rtx dest)
+{
+ rtx tmp;
+
+ /* We play register width games, which are only valid after reload. */
+ gcc_assert (reload_completed);
+
+ /* Avoid HImode and its attendant prefix byte. */
+ if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
+ dest = gen_rtx_REG (SImode, REGNO (dest));
+
+ tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
+
+ /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
+ if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
+ {
+ rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
+ tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
+ }
+
+ emit_insn (tmp);
+}
+
+/* X is an unchanging MEM. If it is a constant pool reference, return
+ the constant pool rtx, else NULL. */
+
+rtx
+maybe_get_pool_constant (rtx x)
+{
+ x = ix86_delegitimize_address (XEXP (x, 0));
+
+ if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
+ return get_pool_constant (x);
+
+ return NULL_RTX;
+}
+
+void
+ix86_expand_move (enum machine_mode mode, rtx operands[])
+{
+ int strict = (reload_in_progress || reload_completed);
+ /* APPLE LOCAL dynamic-no-pic */
+ rtx insn, op0, op1;
+ enum tls_model model;
+
+ op0 = operands[0];
+ op1 = operands[1];
+
+ if (GET_CODE (op1) == SYMBOL_REF)
+ {
+ model = SYMBOL_REF_TLS_MODEL (op1);
+ if (model)
+ {
+ op1 = legitimize_tls_address (op1, model, true);
+ op1 = force_operand (op1, op0);
+ if (op1 == op0)
+ return;
+ }
+ }
+ else if (GET_CODE (op1) == CONST
+ && GET_CODE (XEXP (op1, 0)) == PLUS
+ && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
+ {
+ model = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1, 0), 0));
+ if (model)
+ {
+ rtx addend = XEXP (XEXP (op1, 0), 1);
+ op1 = legitimize_tls_address (XEXP (XEXP (op1, 0), 0), model, true);
+ op1 = force_operand (op1, NULL);
+ op1 = expand_simple_binop (Pmode, PLUS, op1, addend,
+ op0, 1, OPTAB_DIRECT);
+ if (op1 == op0)
+ return;
+ }
+ }
+
+ /* APPLE LOCAL begin dynamic-no-pic */
+ /* allow macho & macho for x86_64 to coexist */
+ if (((TARGET_MACHO && MACHOPIC_INDIRECT)
+ || flag_pic)
+ && mode == Pmode && symbolic_operand (op1, Pmode))
+ /* APPLE LOCAL end dynamic-no-pic */
+ {
+ if (TARGET_MACHO && !TARGET_64BIT)
+ {
+#if TARGET_MACHO
+ /* APPLE LOCAL begin dynamic-no-pic */
+ if (MACHOPIC_INDIRECT)
+ {
+ rtx temp = ((reload_in_progress
+ || ((op0 && GET_CODE (op0) == REG)
+ && mode == Pmode))
+ ? op0 : gen_reg_rtx (Pmode));
+ op1 = machopic_indirect_data_reference (op1, temp);
+ if (MACHOPIC_PURE)
+ op1 = machopic_legitimize_pic_address (op1, mode,
+ temp == op1 ? 0 : temp);
+ }
+ if (op0 != op1 && GET_CODE (op0) != MEM)
+ {
+ insn = gen_rtx_SET (VOIDmode, op0, op1);
+ emit_insn (insn);
+ return;
+ }
+ if (GET_CODE (op0) == MEM)
+ op1 = force_reg (Pmode, op1);
+ else
+ {
+ rtx temp = op0;
+ if (GET_CODE (temp) != REG)
+ temp = gen_reg_rtx (Pmode);
+ temp = legitimize_pic_address (op1, temp);
+ if (temp == op0)
+ return;
+ op1 = temp;
+ }
+ /* APPLE LOCAL end dynamic-no-pic */
+#endif
+ }
+ else
+ {
+ if (GET_CODE (op0) == MEM)
+ op1 = force_reg (Pmode, op1);
+ else
+ op1 = legitimize_address (op1, op1, Pmode);
+ }
+ }
+ else
+ {
+ if (GET_CODE (op0) == MEM
+ && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
+ || !push_operand (op0, mode))
+ && GET_CODE (op1) == MEM)
+ op1 = force_reg (mode, op1);
+
+ if (push_operand (op0, mode)
+ && ! general_no_elim_operand (op1, mode))
+ op1 = copy_to_mode_reg (mode, op1);
+
+ /* Force large constants in 64bit compilation into register
+ to get them CSEed. */
+ if (TARGET_64BIT && mode == DImode
+ && immediate_operand (op1, mode)
+ && !x86_64_zext_immediate_operand (op1, VOIDmode)
+ && !register_operand (op0, mode)
+ && optimize && !reload_completed && !reload_in_progress)
+ op1 = copy_to_mode_reg (mode, op1);
+
+ if (FLOAT_MODE_P (mode))
+ {
+ /* If we are loading a floating point constant to a register,
+ force the value to memory now, since we'll get better code
+ out the back end. */
+
+ if (strict)
+ ;
+ else if (GET_CODE (op1) == CONST_DOUBLE)
+ {
+ op1 = validize_mem (force_const_mem (mode, op1));
+ if (!register_operand (op0, mode))
+ {
+ rtx temp = gen_reg_rtx (mode);
+ emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
+ emit_move_insn (op0, temp);
+ return;
+ }
+ }
+ }
+ }
+
+ emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
+}
+
+void
+ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
+{
+ rtx op0 = operands[0], op1 = operands[1];
+ /* APPLE LOCAL begin radar 4614623 */
+ cfun->uses_vector = 1;
+ /* APPLE LOCAL end radar 4614623 */
+
+ /* Force constants other than zero into memory. We do not know how
+ the instructions used to build constants modify the upper 64 bits
+ of the register, once we have that information we may be able
+ to handle some of them more efficiently. */
+ if ((reload_in_progress | reload_completed) == 0
+ && register_operand (op0, mode)
+ && CONSTANT_P (op1)
+ && standard_sse_constant_p (op1) <= 0)
+ op1 = validize_mem (force_const_mem (mode, op1));
+
+ /* Make operand1 a register if it isn't already. */
+ if (!no_new_pseudos
+ && !register_operand (op0, mode)
+ && !register_operand (op1, mode))
+ {
+ emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
+ return;
+ }
+
+ emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
+}
+
+/* Implement the movmisalign patterns for SSE. Non-SSE modes go
+ straight to ix86_expand_vector_move. */
+
+void
+ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
+{
+ rtx op0, op1, m;
+
+ op0 = operands[0];
+ op1 = operands[1];
+
+ if (MEM_P (op1))
+ {
+ /* If we're optimizing for size, movups is the smallest. */
+ if (optimize_size)
+ {
+ op0 = gen_lowpart (V4SFmode, op0);
+ op1 = gen_lowpart (V4SFmode, op1);
+ emit_insn (gen_sse_movups (op0, op1));
+ return;
+ }
+
+ /* ??? If we have typed data, then it would appear that using
+ movdqu is the only way to get unaligned data loaded with
+ integer type. */
+ if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
+ {
+ op0 = gen_lowpart (V16QImode, op0);
+ op1 = gen_lowpart (V16QImode, op1);
+ emit_insn (gen_sse2_movdqu (op0, op1));
+ return;
+ }
+
+ if (TARGET_SSE2 && mode == V2DFmode)
+ {
+ rtx zero;
+
+ /* When SSE registers are split into halves, we can avoid
+ writing to the top half twice. */
+ if (TARGET_SSE_SPLIT_REGS)
+ {
+ emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
+ zero = op0;
+ }
+ else
+ {
+ /* ??? Not sure about the best option for the Intel chips.
+ The following would seem to satisfy; the register is
+ entirely cleared, breaking the dependency chain. We
+ then store to the upper half, with a dependency depth
+ of one. A rumor has it that Intel recommends two movsd
+ followed by an unpacklpd, but this is unconfirmed. And
+ given that the dependency depth of the unpacklpd would
+ still be one, I'm not sure why this would be better. */
+ zero = CONST0_RTX (V2DFmode);
+ }
+
+ m = adjust_address (op1, DFmode, 0);
+ emit_insn (gen_sse2_loadlpd (op0, zero, m));
+ m = adjust_address (op1, DFmode, 8);
+ emit_insn (gen_sse2_loadhpd (op0, op0, m));
+ }
+ else
+ {
+ if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
+ emit_move_insn (op0, CONST0_RTX (mode));
+ else
+ emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
+
+ if (mode != V4SFmode)
+ op0 = gen_lowpart (V4SFmode, op0);
+ m = adjust_address (op1, V2SFmode, 0);
+ emit_insn (gen_sse_loadlps (op0, op0, m));
+ m = adjust_address (op1, V2SFmode, 8);
+ emit_insn (gen_sse_loadhps (op0, op0, m));
+ }
+ }
+ else if (MEM_P (op0))
+ {
+ /* If we're optimizing for size, movups is the smallest. */
+ if (optimize_size)
+ {
+ op0 = gen_lowpart (V4SFmode, op0);
+ op1 = gen_lowpart (V4SFmode, op1);
+ emit_insn (gen_sse_movups (op0, op1));
+ return;
+ }
+
+ /* ??? Similar to above, only less clear because of quote
+ typeless stores unquote. */
+ if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
+ && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
+ {
+ op0 = gen_lowpart (V16QImode, op0);
+ op1 = gen_lowpart (V16QImode, op1);
+ emit_insn (gen_sse2_movdqu (op0, op1));
+ return;
+ }
+
+ if (TARGET_SSE2 && mode == V2DFmode)
+ {
+ m = adjust_address (op0, DFmode, 0);
+ emit_insn (gen_sse2_storelpd (m, op1));
+ m = adjust_address (op0, DFmode, 8);
+ emit_insn (gen_sse2_storehpd (m, op1));
+ }
+ else
+ {
+ if (mode != V4SFmode)
+ op1 = gen_lowpart (V4SFmode, op1);
+ m = adjust_address (op0, V2SFmode, 0);
+ emit_insn (gen_sse_storelps (m, op1));
+ m = adjust_address (op0, V2SFmode, 8);
+ emit_insn (gen_sse_storehps (m, op1));
+ }
+ }
+ else
+ gcc_unreachable ();
+}
+
+/* Expand a push in MODE. This is some mode for which we do not support
+ proper push instructions, at least from the registers that we expect
+ the value to live in. */
+
+void
+ix86_expand_push (enum machine_mode mode, rtx x)
+{
+ rtx tmp;
+
+ tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
+ GEN_INT (-GET_MODE_SIZE (mode)),
+ stack_pointer_rtx, 1, OPTAB_DIRECT);
+ if (tmp != stack_pointer_rtx)
+ emit_move_insn (stack_pointer_rtx, tmp);
+
+ tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
+ emit_move_insn (tmp, x);
+}
+
+/* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
+ destination to use for the operation. If different from the true
+ destination in operands[0], a copy operation will be required. */
+
+rtx
+ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
+ rtx operands[])
+{
+ int matching_memory;
+ rtx src1, src2, dst;
+
+ dst = operands[0];
+ src1 = operands[1];
+ src2 = operands[2];
+
+ /* Recognize <var1> = <value> <op> <var1> for commutative operators */
+ if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
+ && (rtx_equal_p (dst, src2)
+ || immediate_operand (src1, mode)))
+ {
+ rtx temp = src1;
+ src1 = src2;
+ src2 = temp;
+ }
+
+ /* If the destination is memory, and we do not have matching source
+ operands, do things in registers. */
+ matching_memory = 0;
+ if (GET_CODE (dst) == MEM)
+ {
+ if (rtx_equal_p (dst, src1))
+ matching_memory = 1;
+ else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
+ && rtx_equal_p (dst, src2))
+ matching_memory = 2;
+ else
+ dst = gen_reg_rtx (mode);
+ }
+
+ /* Both source operands cannot be in memory. */
+ if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
+ {
+ if (matching_memory != 2)
+ src2 = force_reg (mode, src2);
+ else
+ src1 = force_reg (mode, src1);
+ }
+
+ /* If the operation is not commutable, source 1 cannot be a constant
+ or non-matching memory. */
+ if ((CONSTANT_P (src1)
+ || (!matching_memory && GET_CODE (src1) == MEM))
+ && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
+ src1 = force_reg (mode, src1);
+
+ src1 = operands[1] = src1;
+ src2 = operands[2] = src2;
+ return dst;
+}
+
+/* Similarly, but assume that the destination has already been
+ set up properly. */
+
+void
+ix86_fixup_binary_operands_no_copy (enum rtx_code code,
+ enum machine_mode mode, rtx operands[])
+{
+ rtx dst = ix86_fixup_binary_operands (code, mode, operands);
+ gcc_assert (dst == operands[0]);
+}
+
+/* Attempt to expand a binary operator. Make the expansion closer to the
+ actual machine, then just general_operand, which will allow 3 separate
+ memory references (one output, two input) in a single insn. */
+
+void
+ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
+ rtx operands[])
+{
+ rtx src1, src2, dst, op, clob;
+
+ dst = ix86_fixup_binary_operands (code, mode, operands);
+ src1 = operands[1];
+ src2 = operands[2];
+
+ /* Emit the instruction. */
+
+ op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
+ if (reload_in_progress)
+ {
+ /* Reload doesn't know about the flags register, and doesn't know that
+ it doesn't want to clobber it. We can only do this with PLUS. */
+ gcc_assert (code == PLUS);
+ emit_insn (op);
+ }
+ else
+ {
+ clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
+ emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
+ }
+
+ /* Fix up the destination if needed. */
+ if (dst != operands[0])
+ emit_move_insn (operands[0], dst);
+}
+
+/* Return TRUE or FALSE depending on whether the binary operator meets the
+ appropriate constraints. */
+
+int
+ix86_binary_operator_ok (enum rtx_code code,
+ enum machine_mode mode ATTRIBUTE_UNUSED,
+ rtx operands[3])
+{
+ /* Both source operands cannot be in memory. */
+ if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
+ return 0;
+ /* If the operation is not commutable, source 1 cannot be a constant. */
+ if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
+ return 0;
+ /* If the destination is memory, we must have a matching source operand. */
+ if (GET_CODE (operands[0]) == MEM
+ && ! (rtx_equal_p (operands[0], operands[1])
+ || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
+ && rtx_equal_p (operands[0], operands[2]))))
+ return 0;
+ /* If the operation is not commutable and the source 1 is memory, we must
+ have a matching destination. */
+ if (GET_CODE (operands[1]) == MEM
+ && GET_RTX_CLASS (code) != RTX_COMM_ARITH
+ && ! rtx_equal_p (operands[0], operands[1]))
+ return 0;
+ return 1;
+}
+
+/* Attempt to expand a unary operator. Make the expansion closer to the
+ actual machine, then just general_operand, which will allow 2 separate
+ memory references (one output, one input) in a single insn. */
+
+void
+ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
+ rtx operands[])
+{
+ int matching_memory;
+ rtx src, dst, op, clob;
+
+ dst = operands[0];
+ src = operands[1];
+
+ /* If the destination is memory, and we do not have matching source
+ operands, do things in registers. */
+ matching_memory = 0;
+ if (MEM_P (dst))
+ {
+ if (rtx_equal_p (dst, src))
+ matching_memory = 1;
+ else
+ dst = gen_reg_rtx (mode);
+ }
+
+ /* When source operand is memory, destination must match. */
+ if (MEM_P (src) && !matching_memory)
+ src = force_reg (mode, src);
+
+ /* Emit the instruction. */
+
+ op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
+ if (reload_in_progress || code == NOT)
+ {
+ /* Reload doesn't know about the flags register, and doesn't know that
+ it doesn't want to clobber it. */
+ gcc_assert (code == NOT);
+ emit_insn (op);
+ }
+ else
+ {
+ clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
+ emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
+ }
+
+ /* Fix up the destination if needed. */
+ if (dst != operands[0])
+ emit_move_insn (operands[0], dst);
+}
+
+/* Return TRUE or FALSE depending on whether the unary operator meets the
+ appropriate constraints. */
+
+int
+ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
+ enum machine_mode mode ATTRIBUTE_UNUSED,
+ rtx operands[2] ATTRIBUTE_UNUSED)
+{
+ /* If one of operands is memory, source and destination must match. */
+ if ((GET_CODE (operands[0]) == MEM
+ || GET_CODE (operands[1]) == MEM)
+ && ! rtx_equal_p (operands[0], operands[1]))
+ return FALSE;
+ return TRUE;
+}
+
+/* APPLE LOCAL begin 4176531 4424891 */
+static void
+ix86_expand_vector_move2 (enum machine_mode mode, rtx op0, rtx op1)
+{
+ rtx operands[2];
+ operands[0] = op0;
+ operands[1] = op1;
+ ix86_expand_vector_move (mode, operands);
+}
+
+static rtvec
+gen_2_4_rtvec (int scalars_per_vector, rtx val, enum machine_mode mode)
+{
+ rtvec rval;
+ switch (scalars_per_vector)
+ {
+ case 2: rval = gen_rtvec (2, val, CONST0_RTX (mode));
+ break;
+ case 4: rval = gen_rtvec (4, val, CONST0_RTX (mode),
+ CONST0_RTX (mode), CONST0_RTX (mode));
+ break;
+ default: abort ();
+ }
+ return rval;
+}
+
+/* Convert a DFmode value in an SSE register into an unsigned SImode.
+ When -fpmath=387, this is done with an x87 st(0)_FP->signed-int-64
+ conversion, and ignoring the upper 32 bits of the result. On
+ x86_64, there is an equivalent SSE %xmm->signed-int-64 conversion.
+ On x86_32, we don't have the instruction, nor the 64-bit
+ destination register it requires. Do the conversion inline in the
+ SSE registers. Requires SSE2. For x86_32, -mfpmath=sse,
+ !optimize_size only. */
+const char *
+ix86_expand_convert_uns_DF2SI_sse (rtx operands[])
+{
+ rtx int_zero_as_fp, int_maxval_as_fp, int_two31_as_fp;
+ REAL_VALUE_TYPE rvt_zero, rvt_int_maxval, rvt_int_two31;
+ rtx int_zero_as_xmm, int_maxval_as_xmm;
+ rtx fp_value = operands[1];
+ rtx target = operands[0];
+ rtx large_xmm;
+ rtx large_xmm_v2di;
+ rtx le_op;
+ rtx zero_or_two31_xmm;
+ rtx final_result_rtx;
+ rtx v_rtx;
+ rtx incoming_value;
+
+ cfun->uses_vector = 1;
+
+ real_from_integer (&rvt_zero, DFmode, 0ULL, 0ULL, 1);
+ int_zero_as_fp = const_double_from_real_value (rvt_zero, DFmode);
+
+ real_from_integer (&rvt_int_maxval, DFmode, 0xffffffffULL, 0ULL, 1);
+ int_maxval_as_fp = const_double_from_real_value (rvt_int_maxval, DFmode);
+
+ real_from_integer (&rvt_int_two31, DFmode, 0x80000000ULL, 0ULL, 1);
+ int_two31_as_fp = const_double_from_real_value (rvt_int_two31, DFmode);
+
+ incoming_value = force_reg (GET_MODE (operands[1]), operands[1]);
+
+ gcc_assert (ix86_preferred_stack_boundary >= 128);
+
+ fp_value = gen_reg_rtx (V2DFmode);
+ ix86_expand_vector_move2 (V2DFmode, fp_value,
+ gen_rtx_SUBREG (V2DFmode, incoming_value, 0));
+ large_xmm = gen_reg_rtx (V2DFmode);
+
+ v_rtx = gen_rtx_CONST_VECTOR (V2DFmode,
+ gen_2_4_rtvec (2, int_two31_as_fp, DFmode));
+ ix86_expand_vector_move2 (DFmode, large_xmm, v_rtx);
+ le_op = gen_rtx_fmt_ee (LE, V2DFmode,
+ gen_rtx_SUBREG (V2DFmode, fp_value, 0), large_xmm);
+ /* large_xmm = (fp_value >= 2**31) ? -1 : 0 ; */
+ emit_insn (gen_sse2_vmmaskcmpv2df3 (large_xmm, large_xmm, fp_value, le_op));
+
+ int_maxval_as_xmm = gen_reg_rtx (V2DFmode);
+ v_rtx = gen_rtx_CONST_VECTOR (V2DFmode,
+ gen_2_4_rtvec (2, int_maxval_as_fp, DFmode));
+ ix86_expand_vector_move2 (DFmode, int_maxval_as_xmm, v_rtx);
+
+ emit_insn (gen_sse2_vmsminv2df3 (fp_value, fp_value, int_maxval_as_xmm));
+
+ int_zero_as_xmm = gen_reg_rtx (V2DFmode);
+ v_rtx = gen_rtx_CONST_VECTOR (V2DFmode,
+ gen_2_4_rtvec (2, int_zero_as_fp, DFmode));
+
+ ix86_expand_vector_move2 (DFmode, int_zero_as_xmm, v_rtx);
+
+ emit_insn (gen_sse2_vmsmaxv2df3 (fp_value, fp_value, int_zero_as_xmm));
+
+ zero_or_two31_xmm = gen_reg_rtx (V2DFmode);
+ v_rtx = gen_rtx_CONST_VECTOR (V2DFmode,
+ gen_2_4_rtvec (2, int_two31_as_fp, DFmode));
+ ix86_expand_vector_move2 (DFmode, zero_or_two31_xmm, v_rtx);
+
+ /* zero_or_two31 = (large_xmm) ? 2**31 : 0; */
+ emit_insn (gen_andv2df3 (zero_or_two31_xmm, zero_or_two31_xmm, large_xmm));
+ /* if (large_xmm) fp_value -= 2**31; */
+ emit_insn (gen_subv2df3 (fp_value, fp_value, zero_or_two31_xmm));
+ /* assert (0 <= fp_value && fp_value < 2**31);
+ int_result = trunc (fp_value); */
+ final_result_rtx = gen_reg_rtx (V4SImode);
+ emit_insn (gen_sse2_cvttpd2dq (final_result_rtx, fp_value));
+
+ large_xmm_v2di = gen_reg_rtx (V2DImode);
+ emit_move_insn (large_xmm_v2di, gen_rtx_SUBREG (V2DImode, large_xmm, 0));
+ emit_insn (gen_ashlv2di3 (large_xmm_v2di, large_xmm_v2di,
+ gen_rtx_CONST_INT (SImode, 31)));
+
+ emit_insn (gen_xorv4si3 (final_result_rtx, final_result_rtx,
+ gen_rtx_SUBREG (V4SImode, large_xmm_v2di, 0)));
+ if (!rtx_equal_p (target, final_result_rtx))
+ emit_insn (gen_sse2_stored (target, final_result_rtx));
+ return "";
+}
+
+/* Convert a SFmode value in an SSE register into an unsigned DImode.
+ When -fpmath=387, this is done with an x87 st(0)_FP->signed-int-64
+ conversion, and subsequently ignoring the upper 32 bits of the
+ result. On x86_64, there is an equivalent SSE %xmm->signed-int-64
+ conversion. On x86_32, we don't have the instruction, nor the
+ 64-bit destination register it requires. Do the conversion inline
+ in the SSE registers. Requires SSE2. For x86_32, -mfpmath=sse,
+ !optimize_size only. */
+const char *
+ix86_expand_convert_uns_SF2SI_sse (rtx operands[])
+{
+ rtx int_zero_as_fp, int_two31_as_fp, int_two32_as_fp;
+ REAL_VALUE_TYPE rvt_zero, rvt_int_two31, rvt_int_two32;
+ rtx int_zero_as_xmm;
+ rtx fp_value = operands[1];
+ rtx target = operands[0];
+ rtx large_xmm;
+ rtx two31_xmm, two32_xmm;
+ rtx above_two31_xmm, above_two32_xmm;
+ rtx zero_or_two31_SI_xmm;
+ rtx le_op;
+ rtx zero_or_two31_SF_xmm;
+ rtx int_result_xmm;
+ rtx v_rtx;
+ rtx incoming_value;
+
+ cfun->uses_vector = 1;
+
+ real_from_integer (&rvt_zero, SFmode, 0ULL, 0ULL, 1);
+ int_zero_as_fp = const_double_from_real_value (rvt_zero, SFmode);
+
+ real_from_integer (&rvt_int_two31, SFmode, 0x80000000ULL, 0ULL, 1);
+ int_two31_as_fp = const_double_from_real_value (rvt_int_two31, SFmode);
+
+ real_from_integer (&rvt_int_two32, SFmode, (HOST_WIDE_INT)0x100000000ULL,
+ 0ULL, 1);
+ int_two32_as_fp = const_double_from_real_value (rvt_int_two32, SFmode);
+
+ incoming_value = force_reg (GET_MODE (operands[1]), operands[1]);
+
+ gcc_assert (ix86_preferred_stack_boundary >= 128);
+
+ fp_value = gen_reg_rtx (V4SFmode);
+ ix86_expand_vector_move2 (V4SFmode, fp_value,
+ gen_rtx_SUBREG (V4SFmode, incoming_value, 0));
+ large_xmm = gen_reg_rtx (V4SFmode);
+
+ /* fp_value = MAX (fp_value, 0.0); */
+ /* Preclude negative values; truncate at zero. */
+ int_zero_as_xmm = gen_reg_rtx (V4SFmode);
+ v_rtx = gen_rtx_CONST_VECTOR (V4SFmode,
+ gen_2_4_rtvec (4, int_zero_as_fp, SFmode));
+ ix86_expand_vector_move2 (SFmode, int_zero_as_xmm, v_rtx);
+ emit_insn (gen_sse_vmsmaxv4sf3 (fp_value, fp_value, int_zero_as_xmm));
+
+ /* two31_xmm = 0x8000000; */
+ two31_xmm = gen_reg_rtx (V4SFmode);
+ v_rtx = gen_rtx_CONST_VECTOR (V4SFmode,
+ gen_2_4_rtvec (4, int_two31_as_fp, SFmode));
+ ix86_expand_vector_move2 (SFmode, two31_xmm, v_rtx);
+
+ /* zero_or_two31_xmm = 0x8000000; */
+ zero_or_two31_SF_xmm = gen_reg_rtx (V4SFmode);
+ ix86_expand_vector_move2 (SFmode, zero_or_two31_SF_xmm, two31_xmm);
+
+ /* above_two31_xmm = (fp_value >= 2**31) ? 0xffff_ffff : 0 ; */
+ above_two31_xmm = gen_reg_rtx (V4SFmode);
+ ix86_expand_vector_move2 (SFmode, above_two31_xmm, two31_xmm);
+ le_op = gen_rtx_fmt_ee (LE, V4SFmode, above_two31_xmm,
+ gen_rtx_SUBREG (V4SFmode, two31_xmm, 0));
+ emit_insn (gen_sse_vmmaskcmpv4sf3 (above_two31_xmm, above_two31_xmm,
+ fp_value, le_op));
+
+ /* two32_xmm = 0x1_0000_0000; */
+ two32_xmm = gen_reg_rtx (V4SFmode);
+ v_rtx = gen_rtx_CONST_VECTOR (V4SFmode,
+ gen_2_4_rtvec (4, int_two32_as_fp, SFmode));
+ ix86_expand_vector_move2 (SFmode, two32_xmm, v_rtx);
+
+ /* above_two32_xmm = (fp_value >= 2**32) ? 0xffff_ffff : 0 ; */
+ above_two32_xmm = gen_reg_rtx (V4SFmode);
+ ix86_expand_vector_move2 (SFmode, above_two32_xmm, two32_xmm);
+ le_op = gen_rtx_fmt_ee (LE, V4SFmode, above_two32_xmm,
+ gen_rtx_SUBREG (V4SFmode, two32_xmm, 0));
+ emit_insn (gen_sse_vmmaskcmpv4sf3 (above_two32_xmm, above_two32_xmm,
+ fp_value, le_op));
+
+ /* zero_or_two31_SF_xmm = (above_two31_xmm) ? 2**31 : 0; */
+ emit_insn (gen_andv4sf3 (zero_or_two31_SF_xmm, zero_or_two31_SF_xmm,
+ above_two31_xmm));
+
+ /* zero_or_two31_SI_xmm = (above_two31_xmm & 0x8000_0000); */
+ zero_or_two31_SI_xmm = gen_reg_rtx (V4SImode);
+ emit_move_insn (zero_or_two31_SI_xmm,
+ gen_rtx_SUBREG (V4SImode, above_two31_xmm, 0));
+ emit_insn (gen_ashlv4si3 (zero_or_two31_SI_xmm, zero_or_two31_SI_xmm,
+ gen_rtx_CONST_INT (SImode, 31)));
+
+ /* zero_or_two31_SI_xmm = (above_two_31_xmm << 31); */
+ zero_or_two31_SI_xmm = gen_reg_rtx (V4SImode);
+ emit_move_insn (zero_or_two31_SI_xmm,
+ gen_rtx_SUBREG (V4SImode, above_two31_xmm, 0));
+ emit_insn (gen_ashlv4si3 (zero_or_two31_SI_xmm, zero_or_two31_SI_xmm,
+ gen_rtx_CONST_INT (SImode, 31)));
+
+ /* if (above_two31_xmm) fp_value -= 2**31; */
+ /* If the input FP value is greater than 2**31, subtract that amount
+ from the FP value before conversion. We'll re-add that amount as
+ an integer after the conversion. */
+ emit_insn (gen_subv4sf3 (fp_value, fp_value, zero_or_two31_SF_xmm));
+
+ /* assert (0.0 <= fp_value && fp_value < 2**31);
+ int_result_xmm = trunc (fp_value); */
+ /* Apply the SSE double -> signed_int32 conversion to our biased,
+ clamped SF value. */
+ int_result_xmm = gen_reg_rtx (V4SImode);
+ emit_insn (gen_sse2_cvttps2dq (int_result_xmm, fp_value));
+
+ /* int_result_xmm += zero_or_two_31_SI_xmm; */
+ /* Restore the 2**31 bias we may have subtracted earlier. If the
+ input FP value was between 2**31 and 2**32, this will unbias the
+ result.
+
+ input_fp_value < 2**31: this won't change the value
+ 2**31 <= input_fp_value < 2**32:
+ this will restore the 2**31 bias we subtracted earler
+ input_fp_value >= 2**32: this insn doesn't matter;
+ the next insn will clobber this result
+ */
+ emit_insn (gen_addv4si3 (int_result_xmm, int_result_xmm,
+ zero_or_two31_SI_xmm));
+
+ /* int_result_xmm |= above_two32_xmm; */
+ /* If the input value was greater than 2**32, force the integral
+ result to 0xffff_ffff. */
+ emit_insn (gen_iorv4si3 (int_result_xmm, int_result_xmm,
+ gen_rtx_SUBREG (V4SImode, above_two32_xmm, 0)));
+
+ if (!rtx_equal_p (target, int_result_xmm))
+ emit_insn (gen_sse2_stored (target, int_result_xmm));
+ return "";
+}
+
+/* Convert an unsigned DImode value into a DFmode, using only SSE.
+ Expects the 64-bit DImode to be supplied as two 32-bit parts in two
+ SSE %xmm registers; result returned in an %xmm register. Requires
+ SSE2; will use SSE3 if available. For x86_32, -mfpmath=sse,
+ !optimize_size only. */
+const char *
+ix86_expand_convert_uns_DI2DF_sse (rtx operands[])
+{
+ REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
+ rtx bias_lo_rtx, bias_hi_rtx;
+ rtx target = operands[0];
+ rtx fp_value = operands[1];
+ rtx fp_value_hi, fp_value_lo;
+ rtx fp_value_hi_xmm, fp_value_lo_xmm;
+ rtx int_xmm;
+ rtx final_result_xmm, result_lo_xmm;
+ rtx biases, exponents;
+ rtvec biases_rtvec, exponents_rtvec;
+
+ cfun->uses_vector = 1;
+
+ gcc_assert (ix86_preferred_stack_boundary >= 128);
+
+ int_xmm = gen_reg_rtx (V4SImode);
+
+ fp_value = force_reg (GET_MODE (operands[1]), operands[1]);
+
+ fp_value_lo = gen_rtx_SUBREG (SImode, fp_value, 0);
+ fp_value_lo_xmm = gen_reg_rtx (V4SImode);
+ emit_insn (gen_sse2_loadld (fp_value_lo_xmm, CONST0_RTX (V4SImode),
+ fp_value_lo));
+
+ fp_value_hi = gen_rtx_SUBREG (SImode, fp_value, 4);
+ fp_value_hi_xmm = gen_reg_rtx (V4SImode);
+ emit_insn (gen_sse2_loadld (fp_value_hi_xmm, CONST0_RTX (V4SImode),
+ fp_value_hi));
+
+ ix86_expand_vector_move2 (V4SImode, int_xmm, fp_value_hi_xmm);
+ emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, fp_value_lo_xmm));
+
+ exponents_rtvec = gen_rtvec (4, GEN_INT (0x45300000UL),
+ GEN_INT (0x43300000UL),
+ CONST0_RTX (SImode), CONST0_RTX (SImode));
+ exponents = validize_mem (
+ force_const_mem (V4SImode, gen_rtx_CONST_VECTOR (V4SImode,
+ exponents_rtvec)));
+ emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
+
+ final_result_xmm = gen_reg_rtx (V2DFmode);
+ ix86_expand_vector_move2 (V2DFmode, final_result_xmm,
+ gen_rtx_SUBREG (V2DFmode, int_xmm, 0));
+
+ /* Integral versions of the DFmode 'exponents' above. */
+ REAL_VALUE_FROM_INT (bias_lo_rvt, 0x00000000000000ULL, 0x100000ULL, DFmode);
+ REAL_VALUE_FROM_INT (bias_hi_rvt, 0x10000000000000ULL, 0x000000ULL, DFmode);
+ bias_lo_rtx = CONST_DOUBLE_FROM_REAL_VALUE (bias_lo_rvt, DFmode);
+ bias_hi_rtx = CONST_DOUBLE_FROM_REAL_VALUE (bias_hi_rvt, DFmode);
+ biases_rtvec = gen_rtvec (2, bias_lo_rtx, bias_hi_rtx);
+ biases = validize_mem (force_const_mem (V2DFmode,
+ gen_rtx_CONST_VECTOR (V2DFmode,
+ biases_rtvec)));
+ emit_insn (gen_subv2df3 (final_result_xmm, final_result_xmm, biases));
+
+ if (TARGET_SSE3)
+ {
+ emit_insn (gen_sse3_haddv2df3 (final_result_xmm, final_result_xmm,
+ final_result_xmm));
+ }
+ else
+ {
+ result_lo_xmm = gen_reg_rtx (V2DFmode);
+ ix86_expand_vector_move2 (V2DFmode, result_lo_xmm, final_result_xmm);
+ emit_insn (gen_sse2_unpckhpd (final_result_xmm, final_result_xmm,
+ final_result_xmm));
+ emit_insn (gen_addv2df3 (final_result_xmm, final_result_xmm,
+ result_lo_xmm));
+ }
+
+ if (!rtx_equal_p (target, final_result_xmm))
+ emit_move_insn (target, gen_rtx_SUBREG (DFmode, final_result_xmm, 0));
+
+ return "";
+}
+/* APPLE LOCAL end 4176531 4424891 */
+
+/* APPLE LOCAL begin 4424891 */
+/* Convert an unsigned SImode value into a DFmode, using only SSE.
+ Result returned in an %xmm register. For x86_32, -mfpmath=sse,
+ !optimize_size only. */
+const char *
+ix86_expand_convert_uns_SI2DF_sse (rtx operands[])
+{
+ REAL_VALUE_TYPE rvt_int_two31;
+ rtx int_value_reg;
+ rtx fp_value_xmm, fp_value_as_int_xmm;
+ rtx final_result_xmm;
+ rtx int_two31_as_fp, int_two31_as_fp_vec;
+ rtx v_rtx;
+ rtx target = operands[0];
+
+ gcc_assert (ix86_preferred_stack_boundary >= 128);
+ gcc_assert (GET_MODE (operands[1]) == SImode);
+
+ cfun->uses_vector = 1;
+
+ int_value_reg = gen_reg_rtx (SImode);
+ emit_move_insn (int_value_reg, operands[1]);
+ emit_insn (gen_addsi3 (int_value_reg, int_value_reg,
+ GEN_INT (-2147483648LL /* MIN_INT */)));
+
+ fp_value_as_int_xmm = gen_reg_rtx (V4SImode);
+ emit_insn (gen_sse2_loadld (fp_value_as_int_xmm, CONST0_RTX (V4SImode),
+ int_value_reg));
+
+ fp_value_xmm = gen_reg_rtx (V2DFmode);
+ emit_insn (gen_sse2_cvtdq2pd (fp_value_xmm,
+ gen_rtx_SUBREG (V4SImode,
+ fp_value_as_int_xmm, 0)));
+
+ real_from_integer (&rvt_int_two31, DFmode, 0x80000000ULL, 0ULL, 1);
+ int_two31_as_fp = const_double_from_real_value (rvt_int_two31, DFmode);
+ v_rtx = gen_rtx_CONST_VECTOR (V2DFmode,
+ gen_2_4_rtvec (2, int_two31_as_fp, DFmode));
+
+ int_two31_as_fp_vec = validize_mem (force_const_mem (V2DFmode, v_rtx));
+
+ final_result_xmm = gen_reg_rtx (V2DFmode);
+ emit_move_insn (final_result_xmm, fp_value_xmm);
+ emit_insn (gen_sse2_vmaddv2df3 (final_result_xmm, final_result_xmm,
+ int_two31_as_fp_vec));
+
+ if (!rtx_equal_p (target, final_result_xmm))
+ emit_move_insn (target, gen_rtx_SUBREG (DFmode, final_result_xmm, 0));
+
+ return "";
+}
+
+/* Convert a signed DImode value into a DFmode, using only SSE.
+ Result returned in an %xmm register. For x86_32, -mfpmath=sse,
+ !optimize_size only. */
+const char *
+ix86_expand_convert_sign_DI2DF_sse (rtx operands[])
+{
+ rtx my_operands[2];
+ REAL_VALUE_TYPE rvt_int_two32;
+ rtx rvt_int_two32_vec;
+ rtx fp_value_hi_xmm, fp_value_hi_shifted_xmm;
+ rtx final_result_xmm;
+ rtx int_two32_as_fp, int_two32_as_fp_vec;
+ rtx target = operands[0];
+ rtx input = force_reg (DImode, operands[1]);
+
+ gcc_assert (ix86_preferred_stack_boundary >= 128);
+ gcc_assert (GET_MODE (input) == DImode);
+
+ cfun->uses_vector = 1;
+
+ fp_value_hi_xmm = gen_reg_rtx (V2DFmode);
+ emit_insn (gen_sse2_cvtsi2sd (fp_value_hi_xmm, fp_value_hi_xmm,
+ gen_rtx_SUBREG (SImode, input, 4)));
+
+ real_from_integer (&rvt_int_two32, DFmode, 0x100000000ULL, 0ULL, 1);
+ int_two32_as_fp = const_double_from_real_value (rvt_int_two32, DFmode);
+ rvt_int_two32_vec = gen_rtx_CONST_VECTOR (V2DFmode,
+ gen_2_4_rtvec (2, int_two32_as_fp, DFmode));
+
+ int_two32_as_fp_vec = validize_mem (force_const_mem (V2DFmode,
+ rvt_int_two32_vec));
+
+ fp_value_hi_shifted_xmm = gen_reg_rtx (V2DFmode);
+ emit_move_insn (fp_value_hi_shifted_xmm, fp_value_hi_xmm);
+ emit_insn (gen_sse2_vmmulv2df3 (fp_value_hi_shifted_xmm,
+ fp_value_hi_shifted_xmm,
+ int_two32_as_fp_vec));
+
+ my_operands[0] = gen_reg_rtx (DFmode);
+ my_operands[1] = gen_rtx_SUBREG (SImode, input, 0);
+ (void) ix86_expand_convert_uns_SI2DF_sse (my_operands);
+
+ final_result_xmm = REG_P (target) && GET_MODE (target) == V2DFmode
+ ? target : gen_reg_rtx (V2DFmode);
+ emit_move_insn (final_result_xmm, gen_rtx_SUBREG (V2DFmode,
+ my_operands[0], 0));
+ emit_insn (gen_sse2_vmaddv2df3 (final_result_xmm, final_result_xmm,
+ fp_value_hi_shifted_xmm));
+
+ if (!rtx_equal_p (target, final_result_xmm))
+ emit_move_insn (target, gen_rtx_SUBREG (DFmode, final_result_xmm, 0));
+
+ return "";
+}
+/* APPLE LOCAL end 4424891 */
+
+/* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
+ Create a mask for the sign bit in MODE for an SSE register. If VECT is
+ true, then replicate the mask for all elements of the vector register.
+ If INVERT is true, then create a mask excluding the sign bit. */
+
+rtx
+ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
+{
+ enum machine_mode vec_mode;
+ HOST_WIDE_INT hi, lo;
+ int shift = 63;
+ rtvec v;
+ rtx mask;
+
+ /* Find the sign bit, sign extended to 2*HWI. */
+ if (mode == SFmode)
+ lo = 0x80000000, hi = lo < 0;
+ else if (HOST_BITS_PER_WIDE_INT >= 64)
+ lo = (HOST_WIDE_INT)1 << shift, hi = -1;
+ else
+ lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
+
+ if (invert)
+ lo = ~lo, hi = ~hi;
+
+ /* Force this value into the low part of a fp vector constant. */
+ mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
+ mask = gen_lowpart (mode, mask);
+
+ if (mode == SFmode)
+ {
+ if (vect)
+ v = gen_rtvec (4, mask, mask, mask, mask);
+ else
+ v = gen_rtvec (4, mask, CONST0_RTX (SFmode),
+ CONST0_RTX (SFmode), CONST0_RTX (SFmode));
+ vec_mode = V4SFmode;
+ }
+ else
+ {
+ if (vect)
+ v = gen_rtvec (2, mask, mask);
+ else
+ v = gen_rtvec (2, mask, CONST0_RTX (DFmode));
+ vec_mode = V2DFmode;
+ }
+
+ return force_reg (vec_mode, gen_rtx_CONST_VECTOR (vec_mode, v));
+}
+
+/* Generate code for floating point ABS or NEG. */
+
+void
+ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
+ rtx operands[])
+{
+ rtx mask, set, use, clob, dst, src;
+ bool matching_memory;
+ bool use_sse = false;
+ bool vector_mode = VECTOR_MODE_P (mode);
+ enum machine_mode elt_mode = mode;
+
+ if (vector_mode)
+ {
+ elt_mode = GET_MODE_INNER (mode);
+ use_sse = true;
+ }
+ else if (TARGET_SSE_MATH)
+ use_sse = SSE_FLOAT_MODE_P (mode);
+
+ /* NEG and ABS performed with SSE use bitwise mask operations.
+ Create the appropriate mask now. */
+ if (use_sse)
+ mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
+ else
+ mask = NULL_RTX;
+
+ dst = operands[0];
+ src = operands[1];
+
+ /* If the destination is memory, and we don't have matching source
+ operands or we're using the x87, do things in registers. */
+ matching_memory = false;
+ if (MEM_P (dst))
+ {
+ if (use_sse && rtx_equal_p (dst, src))
+ matching_memory = true;
+ else
+ dst = gen_reg_rtx (mode);
+ }
+ if (MEM_P (src) && !matching_memory)
+ src = force_reg (mode, src);
+
+ if (vector_mode)
+ {
+ set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
+ set = gen_rtx_SET (VOIDmode, dst, set);
+ emit_insn (set);
+ }
+ else
+ {
+ set = gen_rtx_fmt_e (code, mode, src);
+ set = gen_rtx_SET (VOIDmode, dst, set);
+ if (mask)
+ {
+ use = gen_rtx_USE (VOIDmode, mask);
+ clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
+ emit_insn (gen_rtx_PARALLEL (VOIDmode,
+ gen_rtvec (3, set, use, clob)));
+ }
+ else
+ emit_insn (set);
+ }
+
+ if (dst != operands[0])
+ emit_move_insn (operands[0], dst);
+}
+
+/* Expand a copysign operation. Special case operand 0 being a constant. */
+
+void
+ix86_expand_copysign (rtx operands[])
+{
+ enum machine_mode mode, vmode;
+ rtx dest, op0, op1, mask, nmask;
+
+ dest = operands[0];
+ op0 = operands[1];
+ op1 = operands[2];
+
+ mode = GET_MODE (dest);
+ vmode = mode == SFmode ? V4SFmode : V2DFmode;
+
+ if (GET_CODE (op0) == CONST_DOUBLE)
+ {
+ rtvec v;
+
+ if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
+ op0 = simplify_unary_operation (ABS, mode, op0, mode);
+
+ if (op0 == CONST0_RTX (mode))
+ op0 = CONST0_RTX (vmode);
+ else
+ {
+ if (mode == SFmode)
+ v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
+ CONST0_RTX (SFmode), CONST0_RTX (SFmode));
+ else
+ v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
+ op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
+ }
+
+ mask = ix86_build_signbit_mask (mode, 0, 0);
+
+ if (mode == SFmode)
+ emit_insn (gen_copysignsf3_const (dest, op0, op1, mask));
+ else
+ emit_insn (gen_copysigndf3_const (dest, op0, op1, mask));
+ }
+ else
+ {
+ nmask = ix86_build_signbit_mask (mode, 0, 1);
+ mask = ix86_build_signbit_mask (mode, 0, 0);
+
+ if (mode == SFmode)
+ emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask));
+ else
+ emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask));
+ }
+}
+
+/* Deconstruct a copysign operation into bit masks. Operand 0 is known to
+ be a constant, and so has already been expanded into a vector constant. */
+
+void
+ix86_split_copysign_const (rtx operands[])
+{
+ enum machine_mode mode, vmode;
+ rtx dest, op0, op1, mask, x;
+
+ dest = operands[0];
+ op0 = operands[1];
+ op1 = operands[2];
+ mask = operands[3];
+
+ mode = GET_MODE (dest);
+ vmode = GET_MODE (mask);
+
+ dest = simplify_gen_subreg (vmode, dest, mode, 0);
+ x = gen_rtx_AND (vmode, dest, mask);
+ emit_insn (gen_rtx_SET (VOIDmode, dest, x));
+
+ if (op0 != CONST0_RTX (vmode))
+ {
+ x = gen_rtx_IOR (vmode, dest, op0);
+ emit_insn (gen_rtx_SET (VOIDmode, dest, x));
+ }
+}
+
+/* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
+ so we have to do two masks. */
+
+void
+ix86_split_copysign_var (rtx operands[])
+{
+ enum machine_mode mode, vmode;
+ rtx dest, scratch, op0, op1, mask, nmask, x;
+
+ dest = operands[0];
+ scratch = operands[1];
+ op0 = operands[2];
+ op1 = operands[3];
+ nmask = operands[4];
+ mask = operands[5];
+
+ mode = GET_MODE (dest);
+ vmode = GET_MODE (mask);
+
+ if (rtx_equal_p (op0, op1))
+ {
+ /* Shouldn't happen often (it's useless, obviously), but when it does
+ we'd generate incorrect code if we continue below. */
+ emit_move_insn (dest, op0);
+ return;
+ }
+
+ if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
+ {
+ gcc_assert (REGNO (op1) == REGNO (scratch));
+
+ x = gen_rtx_AND (vmode, scratch, mask);
+ emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
+
+ dest = mask;
+ op0 = simplify_gen_subreg (vmode, op0, mode, 0);
+ x = gen_rtx_NOT (vmode, dest);
+ x = gen_rtx_AND (vmode, x, op0);
+ emit_insn (gen_rtx_SET (VOIDmode, dest, x));
+ }
+ else
+ {
+ if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
+ {
+ x = gen_rtx_AND (vmode, scratch, mask);
+ }
+ else /* alternative 2,4 */
+ {
+ gcc_assert (REGNO (mask) == REGNO (scratch));
+ op1 = simplify_gen_subreg (vmode, op1, mode, 0);
+ x = gen_rtx_AND (vmode, scratch, op1);
+ }
+ emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
+
+ if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
+ {
+ dest = simplify_gen_subreg (vmode, op0, mode, 0);
+ x = gen_rtx_AND (vmode, dest, nmask);
+ }
+ else /* alternative 3,4 */
+ {
+ gcc_assert (REGNO (nmask) == REGNO (dest));
+ dest = nmask;
+ op0 = simplify_gen_subreg (vmode, op0, mode, 0);
+ x = gen_rtx_AND (vmode, dest, op0);
+ }
+ emit_insn (gen_rtx_SET (VOIDmode, dest, x));
+ }
+
+ x = gen_rtx_IOR (vmode, dest, scratch);
+ emit_insn (gen_rtx_SET (VOIDmode, dest, x));
+}
+
+/* Return TRUE or FALSE depending on whether the first SET in INSN
+ has source and destination with matching CC modes, and that the
+ CC mode is at least as constrained as REQ_MODE. */
+
+int
+ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
+{
+ rtx set;
+ enum machine_mode set_mode;
+
+ set = PATTERN (insn);
+ if (GET_CODE (set) == PARALLEL)
+ set = XVECEXP (set, 0, 0);
+ gcc_assert (GET_CODE (set) == SET);
+ gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
+
+ set_mode = GET_MODE (SET_DEST (set));
+ switch (set_mode)
+ {
+ case CCNOmode:
+ if (req_mode != CCNOmode
+ && (req_mode != CCmode
+ || XEXP (SET_SRC (set), 1) != const0_rtx))
+ return 0;
+ break;
+ case CCmode:
+ if (req_mode == CCGCmode)
+ return 0;
+ /* FALLTHRU */
+ case CCGCmode:
+ if (req_mode == CCGOCmode || req_mode == CCNOmode)
+ return 0;
+ /* FALLTHRU */
+ case CCGOCmode:
+ if (req_mode == CCZmode)
+ return 0;
+ /* FALLTHRU */
+ case CCZmode:
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ return (GET_MODE (SET_SRC (set)) == set_mode);
+}
+
+/* Generate insn patterns to do an integer compare of OPERANDS. */
+
+static rtx
+ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
+{
+ enum machine_mode cmpmode;
+ rtx tmp, flags;
+
+ cmpmode = SELECT_CC_MODE (code, op0, op1);
+ flags = gen_rtx_REG (cmpmode, FLAGS_REG);
+
+ /* This is very simple, but making the interface the same as in the
+ FP case makes the rest of the code easier. */
+ tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
+ emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
+
+ /* Return the test that should be put into the flags user, i.e.
+ the bcc, scc, or cmov instruction. */
+ return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
+}
+
+/* Figure out whether to use ordered or unordered fp comparisons.
+ Return the appropriate mode to use. */
+
+enum machine_mode
+ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
+{
+ /* ??? In order to make all comparisons reversible, we do all comparisons
+ non-trapping when compiling for IEEE. Once gcc is able to distinguish
+ all forms trapping and nontrapping comparisons, we can make inequality
+ comparisons trapping again, since it results in better code when using
+ FCOM based compares. */
+ return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
+}
+
+enum machine_mode
+ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
+{
+ if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
+ return ix86_fp_compare_mode (code);
+ switch (code)
+ {
+ /* Only zero flag is needed. */
+ case EQ: /* ZF=0 */
+ case NE: /* ZF!=0 */
+ return CCZmode;
+ /* Codes needing carry flag. */
+ case GEU: /* CF=0 */
+ case GTU: /* CF=0 & ZF=0 */
+ case LTU: /* CF=1 */
+ case LEU: /* CF=1 | ZF=1 */
+ return CCmode;
+ /* Codes possibly doable only with sign flag when
+ comparing against zero. */
+ case GE: /* SF=OF or SF=0 */
+ case LT: /* SF<>OF or SF=1 */
+ if (op1 == const0_rtx)
+ return CCGOCmode;
+ else
+ /* For other cases Carry flag is not required. */
+ return CCGCmode;
+ /* Codes doable only with sign flag when comparing
+ against zero, but we miss jump instruction for it
+ so we need to use relational tests against overflow
+ that thus needs to be zero. */
+ case GT: /* ZF=0 & SF=OF */
+ case LE: /* ZF=1 | SF<>OF */
+ if (op1 == const0_rtx)
+ return CCNOmode;
+ else
+ return CCGCmode;
+ /* strcmp pattern do (use flags) and combine may ask us for proper
+ mode. */
+ case USE:
+ return CCmode;
+ default:
+ gcc_unreachable ();
+ }
+}
+
+/* Return the fixed registers used for condition codes. */
+
+static bool
+ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
+{
+ *p1 = FLAGS_REG;
+ *p2 = FPSR_REG;
+ return true;
+}
+
+/* If two condition code modes are compatible, return a condition code
+ mode which is compatible with both. Otherwise, return
+ VOIDmode. */
+
+static enum machine_mode
+ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
+{
+ if (m1 == m2)
+ return m1;
+
+ if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
+ return VOIDmode;
+
+ if ((m1 == CCGCmode && m2 == CCGOCmode)
+ || (m1 == CCGOCmode && m2 == CCGCmode))
+ return CCGCmode;
+
+ switch (m1)
+ {
+ default:
+ gcc_unreachable ();
+
+ case CCmode:
+ case CCGCmode:
+ case CCGOCmode:
+ case CCNOmode:
+ case CCZmode:
+ switch (m2)
+ {
+ default:
+ return VOIDmode;
+
+ case CCmode:
+ case CCGCmode:
+ case CCGOCmode:
+ case CCNOmode:
+ case CCZmode:
+ return CCmode;
+ }
+
+ case CCFPmode:
+ case CCFPUmode:
+ /* These are only compatible with themselves, which we already
+ checked above. */
+ return VOIDmode;
+ }
+}
+
+/* Return true if we should use an FCOMI instruction for this fp comparison. */
+
+int
+ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
+{
+ enum rtx_code swapped_code = swap_condition (code);
+ return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
+ || (ix86_fp_comparison_cost (swapped_code)
+ == ix86_fp_comparison_fcomi_cost (swapped_code)));
+}
+
+/* Swap, force into registers, or otherwise massage the two operands
+ to a fp comparison. The operands are updated in place; the new
+ comparison code is returned. */
+
+static enum rtx_code
+ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
+{
+ enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
+ rtx op0 = *pop0, op1 = *pop1;
+ enum machine_mode op_mode = GET_MODE (op0);
+ int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
+
+ /* All of the unordered compare instructions only work on registers.
+ The same is true of the fcomi compare instructions. The XFmode
+ compare instructions require registers except when comparing
+ against zero or when converting operand 1 from fixed point to
+ floating point. */
+
+ if (!is_sse
+ && (fpcmp_mode == CCFPUmode
+ || (op_mode == XFmode
+ && ! (standard_80387_constant_p (op0) == 1
+ || standard_80387_constant_p (op1) == 1)
+ && GET_CODE (op1) != FLOAT)
+ || ix86_use_fcomi_compare (code)))
+ {
+ op0 = force_reg (op_mode, op0);
+ op1 = force_reg (op_mode, op1);
+ }
+ else
+ {
+ /* %%% We only allow op1 in memory; op0 must be st(0). So swap
+ things around if they appear profitable, otherwise force op0
+ into a register. */
+
+ if (standard_80387_constant_p (op0) == 0
+ || (GET_CODE (op0) == MEM
+ && ! (standard_80387_constant_p (op1) == 0
+ || GET_CODE (op1) == MEM)))
+ {
+ rtx tmp;
+ tmp = op0, op0 = op1, op1 = tmp;
+ code = swap_condition (code);
+ }
+
+ if (GET_CODE (op0) != REG)
+ op0 = force_reg (op_mode, op0);
+
+ if (CONSTANT_P (op1))
+ {
+ int tmp = standard_80387_constant_p (op1);
+ if (tmp == 0)
+ op1 = validize_mem (force_const_mem (op_mode, op1));
+ else if (tmp == 1)
+ {
+ if (TARGET_CMOVE)
+ op1 = force_reg (op_mode, op1);
+ }
+ else
+ op1 = force_reg (op_mode, op1);
+ }
+ }
+
+ /* Try to rearrange the comparison to make it cheaper. */
+ if (ix86_fp_comparison_cost (code)
+ > ix86_fp_comparison_cost (swap_condition (code))
+ && (GET_CODE (op1) == REG || !no_new_pseudos))
+ {
+ rtx tmp;
+ tmp = op0, op0 = op1, op1 = tmp;
+ code = swap_condition (code);
+ if (GET_CODE (op0) != REG)
+ op0 = force_reg (op_mode, op0);
+ }
+
+ *pop0 = op0;
+ *pop1 = op1;
+ return code;
+}
+
+/* Convert comparison codes we use to represent FP comparison to integer
+ code that will result in proper branch. Return UNKNOWN if no such code
+ is available. */
+
+enum rtx_code
+ix86_fp_compare_code_to_integer (enum rtx_code code)
+{
+ switch (code)
+ {
+ case GT:
+ return GTU;
+ case GE:
+ return GEU;
+ case ORDERED:
+ case UNORDERED:
+ return code;
+ break;
+ case UNEQ:
+ return EQ;
+ break;
+ case UNLT:
+ return LTU;
+ break;
+ case UNLE:
+ return LEU;
+ break;
+ case LTGT:
+ return NE;
+ break;
+ default:
+ return UNKNOWN;
+ }
+}
+
+/* Split comparison code CODE into comparisons we can do using branch
+ instructions. BYPASS_CODE is comparison code for branch that will
+ branch around FIRST_CODE and SECOND_CODE. If some of branches
+ is not required, set value to UNKNOWN.
+ We never require more than two branches. */
+
+void
+ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
+ enum rtx_code *first_code,
+ enum rtx_code *second_code)
+{
+ *first_code = code;
+ *bypass_code = UNKNOWN;
+ *second_code = UNKNOWN;
+
+ /* The fcomi comparison sets flags as follows:
+
+ cmp ZF PF CF
+ > 0 0 0
+ < 0 0 1
+ = 1 0 0
+ un 1 1 1 */
+
+ switch (code)
+ {
+ case GT: /* GTU - CF=0 & ZF=0 */
+ case GE: /* GEU - CF=0 */
+ case ORDERED: /* PF=0 */
+ case UNORDERED: /* PF=1 */
+ case UNEQ: /* EQ - ZF=1 */
+ case UNLT: /* LTU - CF=1 */
+ case UNLE: /* LEU - CF=1 | ZF=1 */
+ case LTGT: /* EQ - ZF=0 */
+ break;
+ case LT: /* LTU - CF=1 - fails on unordered */
+ *first_code = UNLT;
+ *bypass_code = UNORDERED;
+ break;
+ case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
+ *first_code = UNLE;
+ *bypass_code = UNORDERED;
+ break;
+ case EQ: /* EQ - ZF=1 - fails on unordered */
+ *first_code = UNEQ;
+ *bypass_code = UNORDERED;
+ break;
+ case NE: /* NE - ZF=0 - fails on unordered */
+ *first_code = LTGT;
+ *second_code = UNORDERED;
+ break;
+ case UNGE: /* GEU - CF=0 - fails on unordered */
+ *first_code = GE;
+ *second_code = UNORDERED;
+ break;
+ case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
+ *first_code = GT;
+ *second_code = UNORDERED;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ if (!TARGET_IEEE_FP)
+ {
+ *second_code = UNKNOWN;
+ *bypass_code = UNKNOWN;
+ }
+}
+
+/* Return cost of comparison done fcom + arithmetics operations on AX.
+ All following functions do use number of instructions as a cost metrics.
+ In future this should be tweaked to compute bytes for optimize_size and
+ take into account performance of various instructions on various CPUs. */
+static int
+ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
+{
+ if (!TARGET_IEEE_FP)
+ return 4;
+ /* The cost of code output by ix86_expand_fp_compare. */
+ switch (code)
+ {
+ case UNLE:
+ case UNLT:
+ case LTGT:
+ case GT:
+ case GE:
+ case UNORDERED:
+ case ORDERED:
+ case UNEQ:
+ return 4;
+ break;
+ case LT:
+ case NE:
+ case EQ:
+ case UNGE:
+ return 5;
+ break;
+ case LE:
+ case UNGT:
+ return 6;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+}
+
+/* Return cost of comparison done using fcomi operation.
+ See ix86_fp_comparison_arithmetics_cost for the metrics. */
+static int
+ix86_fp_comparison_fcomi_cost (enum rtx_code code)
+{
+ enum rtx_code bypass_code, first_code, second_code;
+ /* Return arbitrarily high cost when instruction is not supported - this
+ prevents gcc from using it. */
+ if (!TARGET_CMOVE)
+ return 1024;
+ ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
+ return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
+}
+
+/* Return cost of comparison done using sahf operation.
+ See ix86_fp_comparison_arithmetics_cost for the metrics. */
+static int
+ix86_fp_comparison_sahf_cost (enum rtx_code code)
+{
+ enum rtx_code bypass_code, first_code, second_code;
+ /* Return arbitrarily high cost when instruction is not preferred - this
+ avoids gcc from using it. */
+ if (!TARGET_USE_SAHF && !optimize_size)
+ return 1024;
+ ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
+ return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
+}
+
+/* Compute cost of the comparison done using any method.
+ See ix86_fp_comparison_arithmetics_cost for the metrics. */
+static int
+ix86_fp_comparison_cost (enum rtx_code code)
+{
+ int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
+ int min;
+
+ fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
+ sahf_cost = ix86_fp_comparison_sahf_cost (code);
+
+ min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
+ if (min > sahf_cost)
+ min = sahf_cost;
+ if (min > fcomi_cost)
+ min = fcomi_cost;
+ return min;
+}
+
+/* Generate insn patterns to do a floating point compare of OPERANDS. */
+
+static rtx
+ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
+ rtx *second_test, rtx *bypass_test)
+{
+ enum machine_mode fpcmp_mode, intcmp_mode;
+ rtx tmp, tmp2;
+ int cost = ix86_fp_comparison_cost (code);
+ enum rtx_code bypass_code, first_code, second_code;
+
+ fpcmp_mode = ix86_fp_compare_mode (code);
+ code = ix86_prepare_fp_compare_args (code, &op0, &op1);
+
+ if (second_test)
+ *second_test = NULL_RTX;
+ if (bypass_test)
+ *bypass_test = NULL_RTX;
+
+ ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
+
+ /* Do fcomi/sahf based test when profitable. */
+ if ((bypass_code == UNKNOWN || bypass_test)
+ && (second_code == UNKNOWN || second_test)
+ && ix86_fp_comparison_arithmetics_cost (code) > cost)
+ {
+ if (TARGET_CMOVE)
+ {
+ tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
+ tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
+ tmp);
+ emit_insn (tmp);
+ }
+ else
+ {
+ tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
+ tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
+ if (!scratch)
+ scratch = gen_reg_rtx (HImode);
+ emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
+ emit_insn (gen_x86_sahf_1 (scratch));
+ }
+
+ /* The FP codes work out to act like unsigned. */
+ intcmp_mode = fpcmp_mode;
+ code = first_code;
+ if (bypass_code != UNKNOWN)
+ *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
+ gen_rtx_REG (intcmp_mode, FLAGS_REG),
+ const0_rtx);
+ if (second_code != UNKNOWN)
+ *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
+ gen_rtx_REG (intcmp_mode, FLAGS_REG),
+ const0_rtx);
+ }
+ else
+ {
+ /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
+ tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
+ tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
+ if (!scratch)
+ scratch = gen_reg_rtx (HImode);
+ emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
+
+ /* In the unordered case, we have to check C2 for NaN's, which
+ doesn't happen to work out to anything nice combination-wise.
+ So do some bit twiddling on the value we've got in AH to come
+ up with an appropriate set of condition codes. */
+
+ intcmp_mode = CCNOmode;
+ switch (code)
+ {
+ case GT:
+ case UNGT:
+ if (code == GT || !TARGET_IEEE_FP)
+ {
+ emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
+ code = EQ;
+ }
+ else
+ {
+ emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
+ emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
+ emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
+ intcmp_mode = CCmode;
+ code = GEU;
+ }
+ break;
+ case LT:
+ case UNLT:
+ if (code == LT && TARGET_IEEE_FP)
+ {
+ emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
+ emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
+ intcmp_mode = CCmode;
+ code = EQ;
+ }
+ else
+ {
+ emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
+ code = NE;
+ }
+ break;
+ case GE:
+ case UNGE:
+ if (code == GE || !TARGET_IEEE_FP)
+ {
+ emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
+ code = EQ;
+ }
+ else
+ {
+ emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
+ emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
+ GEN_INT (0x01)));
+ code = NE;
+ }
+ break;
+ case LE:
+ case UNLE:
+ if (code == LE && TARGET_IEEE_FP)
+ {
+ emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
+ emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
+ emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
+ intcmp_mode = CCmode;
+ code = LTU;
+ }
+ else
+ {
+ emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
+ code = NE;
+ }
+ break;
+ case EQ:
+ case UNEQ:
+ if (code == EQ && TARGET_IEEE_FP)
+ {
+ emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
+ emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
+ intcmp_mode = CCmode;
+ code = EQ;
+ }
+ else
+ {
+ emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
+ code = NE;
+ break;
+ }
+ break;
+ case NE:
+ case LTGT:
+ if (code == NE && TARGET_IEEE_FP)
+ {
+ emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
+ emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
+ GEN_INT (0x40)));
+ code = NE;
+ }
+ else
+ {
+ emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
+ code = EQ;
+ }
+ break;
+
+ case UNORDERED:
+ emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
+ code = NE;
+ break;
+ case ORDERED:
+ emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
+ code = EQ;
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+ }
+
+ /* Return the test that should be put into the flags user, i.e.
+ the bcc, scc, or cmov instruction. */
+ return gen_rtx_fmt_ee (code, VOIDmode,
+ gen_rtx_REG (intcmp_mode, FLAGS_REG),
+ const0_rtx);
+}
+
+rtx
+ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
+{
+ rtx op0, op1, ret;
+ op0 = ix86_compare_op0;
+ op1 = ix86_compare_op1;
+
+ if (second_test)
+ *second_test = NULL_RTX;
+ if (bypass_test)
+ *bypass_test = NULL_RTX;
+
+ if (ix86_compare_emitted)
+ {
+ ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
+ ix86_compare_emitted = NULL_RTX;
+ }
+ else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
+ ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
+ second_test, bypass_test);
+ else
+ ret = ix86_expand_int_compare (code, op0, op1);
+
+ return ret;
+}
+
+/* Return true if the CODE will result in nontrivial jump sequence. */
+bool
+ix86_fp_jump_nontrivial_p (enum rtx_code code)
+{
+ enum rtx_code bypass_code, first_code, second_code;
+ if (!TARGET_CMOVE)
+ return true;
+ ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
+ return bypass_code != UNKNOWN || second_code != UNKNOWN;
+}
+
+void
+ix86_expand_branch (enum rtx_code code, rtx label)
+{
+ rtx tmp;
+
+ /* If we have emitted a compare insn, go straight to simple.
+ ix86_expand_compare won't emit anything if ix86_compare_emitted
+ is non NULL. */
+ if (ix86_compare_emitted)
+ goto simple;
+
+ switch (GET_MODE (ix86_compare_op0))
+ {
+ case QImode:
+ case HImode:
+ case SImode:
+ simple:
+ tmp = ix86_expand_compare (code, NULL, NULL);
+ tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
+ gen_rtx_LABEL_REF (VOIDmode, label),
+ pc_rtx);
+ emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
+ return;
+
+ case SFmode:
+ case DFmode:
+ case XFmode:
+ {
+ rtvec vec;
+ int use_fcomi;
+ enum rtx_code bypass_code, first_code, second_code;
+
+ code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
+ &ix86_compare_op1);
+
+ ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
+
+ /* Check whether we will use the natural sequence with one jump. If
+ so, we can expand jump early. Otherwise delay expansion by
+ creating compound insn to not confuse optimizers. */
+ if (bypass_code == UNKNOWN && second_code == UNKNOWN
+ && TARGET_CMOVE)
+ {
+ ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
+ gen_rtx_LABEL_REF (VOIDmode, label),
+ pc_rtx, NULL_RTX, NULL_RTX);
+ }
+ else
+ {
+ tmp = gen_rtx_fmt_ee (code, VOIDmode,
+ ix86_compare_op0, ix86_compare_op1);
+ tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
+ gen_rtx_LABEL_REF (VOIDmode, label),
+ pc_rtx);
+ tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
+
+ use_fcomi = ix86_use_fcomi_compare (code);
+ vec = rtvec_alloc (3 + !use_fcomi);
+ RTVEC_ELT (vec, 0) = tmp;
+ RTVEC_ELT (vec, 1)
+ = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
+ RTVEC_ELT (vec, 2)
+ = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
+ if (! use_fcomi)
+ RTVEC_ELT (vec, 3)
+ = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
+
+ emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
+ }
+ return;
+ }
+
+ case DImode:
+ if (TARGET_64BIT)
+ goto simple;
+ case TImode:
+ /* Expand DImode branch into multiple compare+branch. */
+ {
+ rtx lo[2], hi[2], label2;
+ enum rtx_code code1, code2, code3;
+ enum machine_mode submode;
+
+ if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
+ {
+ tmp = ix86_compare_op0;
+ ix86_compare_op0 = ix86_compare_op1;
+ ix86_compare_op1 = tmp;
+ code = swap_condition (code);
+ }
+ if (GET_MODE (ix86_compare_op0) == DImode)
+ {
+ split_di (&ix86_compare_op0, 1, lo+0, hi+0);
+ split_di (&ix86_compare_op1, 1, lo+1, hi+1);
+ submode = SImode;
+ }
+ else
+ {
+ split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
+ split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
+ submode = DImode;
+ }
+
+ /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
+ avoid two branches. This costs one extra insn, so disable when
+ optimizing for size. */
+
+ if ((code == EQ || code == NE)
+ && (!optimize_size
+ || hi[1] == const0_rtx || lo[1] == const0_rtx))
+ {
+ rtx xor0, xor1;
+
+ xor1 = hi[0];
+ if (hi[1] != const0_rtx)
+ xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
+ NULL_RTX, 0, OPTAB_WIDEN);
+
+ xor0 = lo[0];
+ if (lo[1] != const0_rtx)
+ xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
+ NULL_RTX, 0, OPTAB_WIDEN);
+
+ tmp = expand_binop (submode, ior_optab, xor1, xor0,
+ NULL_RTX, 0, OPTAB_WIDEN);
+
+ ix86_compare_op0 = tmp;
+ ix86_compare_op1 = const0_rtx;
+ ix86_expand_branch (code, label);
+ return;
+ }
+
+ /* Otherwise, if we are doing less-than or greater-or-equal-than,
+ op1 is a constant and the low word is zero, then we can just
+ examine the high word. */
+
+ if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
+ switch (code)
+ {
+ case LT: case LTU: case GE: case GEU:
+ ix86_compare_op0 = hi[0];
+ ix86_compare_op1 = hi[1];
+ ix86_expand_branch (code, label);
+ return;
+ default:
+ break;
+ }
+
+ /* Otherwise, we need two or three jumps. */
+
+ label2 = gen_label_rtx ();
+
+ code1 = code;
+ code2 = swap_condition (code);
+ code3 = unsigned_condition (code);
+
+ switch (code)
+ {
+ case LT: case GT: case LTU: case GTU:
+ break;
+
+ case LE: code1 = LT; code2 = GT; break;
+ case GE: code1 = GT; code2 = LT; break;
+ case LEU: code1 = LTU; code2 = GTU; break;
+ case GEU: code1 = GTU; code2 = LTU; break;
+
+ case EQ: code1 = UNKNOWN; code2 = NE; break;
+ case NE: code2 = UNKNOWN; break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ /*
+ * a < b =>
+ * if (hi(a) < hi(b)) goto true;
+ * if (hi(a) > hi(b)) goto false;
+ * if (lo(a) < lo(b)) goto true;
+ * false:
+ */
+
+ ix86_compare_op0 = hi[0];
+ ix86_compare_op1 = hi[1];
+
+ if (code1 != UNKNOWN)
+ ix86_expand_branch (code1, label);
+ if (code2 != UNKNOWN)
+ ix86_expand_branch (code2, label2);
+
+ ix86_compare_op0 = lo[0];
+ ix86_compare_op1 = lo[1];
+ ix86_expand_branch (code3, label);
+
+ if (code2 != UNKNOWN)
+ emit_label (label2);
+ return;
+ }
+
+ default:
+ gcc_unreachable ();
+ }
+}
+
+/* Split branch based on floating point condition. */
+void
+ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
+ rtx target1, rtx target2, rtx tmp, rtx pushed)
+{
+ rtx second, bypass;
+ rtx label = NULL_RTX;
+ rtx condition;
+ int bypass_probability = -1, second_probability = -1, probability = -1;
+ rtx i;
+
+ if (target2 != pc_rtx)
+ {
+ rtx tmp = target2;
+ code = reverse_condition_maybe_unordered (code);
+ target2 = target1;
+ target1 = tmp;
+ }
+
+ condition = ix86_expand_fp_compare (code, op1, op2,
+ tmp, &second, &bypass);
+
+ /* Remove pushed operand from stack. */
+ if (pushed)
+ ix86_free_from_memory (GET_MODE (pushed));
+
+ if (split_branch_probability >= 0)
+ {
+ /* Distribute the probabilities across the jumps.
+ Assume the BYPASS and SECOND to be always test
+ for UNORDERED. */
+ probability = split_branch_probability;
+
+ /* Value of 1 is low enough to make no need for probability
+ to be updated. Later we may run some experiments and see
+ if unordered values are more frequent in practice. */
+ if (bypass)
+ bypass_probability = 1;
+ if (second)
+ second_probability = 1;
+ }
+ if (bypass != NULL_RTX)
+ {
+ label = gen_label_rtx ();
+ i = emit_jump_insn (gen_rtx_SET
+ (VOIDmode, pc_rtx,
+ gen_rtx_IF_THEN_ELSE (VOIDmode,
+ bypass,
+ gen_rtx_LABEL_REF (VOIDmode,
+ label),
+ pc_rtx)));
+ if (bypass_probability >= 0)
+ REG_NOTES (i)
+ = gen_rtx_EXPR_LIST (REG_BR_PROB,
+ GEN_INT (bypass_probability),
+ REG_NOTES (i));
+ }
+ i = emit_jump_insn (gen_rtx_SET
+ (VOIDmode, pc_rtx,
+ gen_rtx_IF_THEN_ELSE (VOIDmode,
+ condition, target1, target2)));
+ if (probability >= 0)
+ REG_NOTES (i)
+ = gen_rtx_EXPR_LIST (REG_BR_PROB,
+ GEN_INT (probability),
+ REG_NOTES (i));
+ if (second != NULL_RTX)
+ {
+ i = emit_jump_insn (gen_rtx_SET
+ (VOIDmode, pc_rtx,
+ gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
+ target2)));
+ if (second_probability >= 0)
+ REG_NOTES (i)
+ = gen_rtx_EXPR_LIST (REG_BR_PROB,
+ GEN_INT (second_probability),
+ REG_NOTES (i));
+ }
+ if (label != NULL_RTX)
+ emit_label (label);
+}
+
+int
+ix86_expand_setcc (enum rtx_code code, rtx dest)
+{
+ rtx ret, tmp, tmpreg, equiv;
+ rtx second_test, bypass_test;
+
+ if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
+ return 0; /* FAIL */
+
+ gcc_assert (GET_MODE (dest) == QImode);
+
+ ret = ix86_expand_compare (code, &second_test, &bypass_test);
+ PUT_MODE (ret, QImode);
+
+ tmp = dest;
+ tmpreg = dest;
+
+ emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
+ if (bypass_test || second_test)
+ {
+ rtx test = second_test;
+ int bypass = 0;
+ rtx tmp2 = gen_reg_rtx (QImode);
+ if (bypass_test)
+ {
+ gcc_assert (!second_test);
+ test = bypass_test;
+ bypass = 1;
+ PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
+ }
+ PUT_MODE (test, QImode);
+ emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
+
+ if (bypass)
+ emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
+ else
+ emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
+ }
+
+ /* Attach a REG_EQUAL note describing the comparison result. */
+ if (ix86_compare_op0 && ix86_compare_op1)
+ {
+ equiv = simplify_gen_relational (code, QImode,
+ GET_MODE (ix86_compare_op0),
+ ix86_compare_op0, ix86_compare_op1);
+ set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
+ }
+
+ return 1; /* DONE */
+}
+
+/* Expand comparison setting or clearing carry flag. Return true when
+ successful and set pop for the operation. */
+static bool
+ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
+{
+ enum machine_mode mode =
+ GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
+
+ /* Do not handle DImode compares that go through special path. Also we can't
+ deal with FP compares yet. This is possible to add. */
+ if (mode == (TARGET_64BIT ? TImode : DImode))
+ return false;
+ if (FLOAT_MODE_P (mode))
+ {
+ rtx second_test = NULL, bypass_test = NULL;
+ rtx compare_op, compare_seq;
+
+ /* Shortcut: following common codes never translate into carry flag compares. */
+ if (code == EQ || code == NE || code == UNEQ || code == LTGT
+ || code == ORDERED || code == UNORDERED)
+ return false;
+
+ /* These comparisons require zero flag; swap operands so they won't. */
+ if ((code == GT || code == UNLE || code == LE || code == UNGT)
+ && !TARGET_IEEE_FP)
+ {
+ rtx tmp = op0;
+ op0 = op1;
+ op1 = tmp;
+ code = swap_condition (code);
+ }
+
+ /* Try to expand the comparison and verify that we end up with carry flag
+ based comparison. This is fails to be true only when we decide to expand
+ comparison using arithmetic that is not too common scenario. */
+ start_sequence ();
+ compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
+ &second_test, &bypass_test);
+ compare_seq = get_insns ();
+ end_sequence ();
+
+ if (second_test || bypass_test)
+ return false;
+ if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
+ || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
+ code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
+ else
+ code = GET_CODE (compare_op);
+ if (code != LTU && code != GEU)
+ return false;
+ emit_insn (compare_seq);
+ *pop = compare_op;
+ return true;
+ }
+ if (!INTEGRAL_MODE_P (mode))
+ return false;
+ switch (code)
+ {
+ case LTU:
+ case GEU:
+ break;
+
+ /* Convert a==0 into (unsigned)a<1. */
+ case EQ:
+ case NE:
+ if (op1 != const0_rtx)
+ return false;
+ op1 = const1_rtx;
+ code = (code == EQ ? LTU : GEU);
+ break;
+
+ /* Convert a>b into b<a or a>=b-1. */
+ case GTU:
+ case LEU:
+ if (GET_CODE (op1) == CONST_INT)
+ {
+ op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
+ /* Bail out on overflow. We still can swap operands but that
+ would force loading of the constant into register. */
+ if (op1 == const0_rtx
+ || !x86_64_immediate_operand (op1, GET_MODE (op1)))
+ return false;
+ code = (code == GTU ? GEU : LTU);
+ }
+ else
+ {
+ rtx tmp = op1;
+ op1 = op0;
+ op0 = tmp;
+ code = (code == GTU ? LTU : GEU);
+ }
+ break;
+
+ /* Convert a>=0 into (unsigned)a<0x80000000. */
+ case LT:
+ case GE:
+ if (mode == DImode || op1 != const0_rtx)
+ return false;
+ op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
+ code = (code == LT ? GEU : LTU);
+ break;
+ case LE:
+ case GT:
+ if (mode == DImode || op1 != constm1_rtx)
+ return false;
+ op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
+ code = (code == LE ? GEU : LTU);
+ break;
+
+ default:
+ return false;
+ }
+ /* Swapping operands may cause constant to appear as first operand. */
+ if (!nonimmediate_operand (op0, VOIDmode))
+ {
+ if (no_new_pseudos)
+ return false;
+ op0 = force_reg (mode, op0);
+ }
+ ix86_compare_op0 = op0;
+ ix86_compare_op1 = op1;
+ *pop = ix86_expand_compare (code, NULL, NULL);
+ gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
+ return true;
+}
+
+int
+ix86_expand_int_movcc (rtx operands[])
+{
+ enum rtx_code code = GET_CODE (operands[1]), compare_code;
+ rtx compare_seq, compare_op;
+ rtx second_test, bypass_test;
+ enum machine_mode mode = GET_MODE (operands[0]);
+ bool sign_bit_compare_p = false;;
+
+ start_sequence ();
+ compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
+ compare_seq = get_insns ();
+ end_sequence ();
+
+ compare_code = GET_CODE (compare_op);
+
+ if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
+ || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
+ sign_bit_compare_p = true;
+
+ /* Don't attempt mode expansion here -- if we had to expand 5 or 6
+ HImode insns, we'd be swallowed in word prefix ops. */
+
+ if ((mode != HImode || TARGET_FAST_PREFIX)
+ && (mode != (TARGET_64BIT ? TImode : DImode))
+ && GET_CODE (operands[2]) == CONST_INT
+ && GET_CODE (operands[3]) == CONST_INT)
+ {
+ rtx out = operands[0];
+ HOST_WIDE_INT ct = INTVAL (operands[2]);
+ HOST_WIDE_INT cf = INTVAL (operands[3]);
+ HOST_WIDE_INT diff;
+
+ diff = ct - cf;
+ /* Sign bit compares are better done using shifts than we do by using
+ sbb. */
+ if (sign_bit_compare_p
+ || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
+ ix86_compare_op1, &compare_op))
+ {
+ /* Detect overlap between destination and compare sources. */
+ rtx tmp = out;
+
+ if (!sign_bit_compare_p)
+ {
+ bool fpcmp = false;
+
+ compare_code = GET_CODE (compare_op);
+
+ if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
+ || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
+ {
+ fpcmp = true;
+ compare_code = ix86_fp_compare_code_to_integer (compare_code);
+ }
+
+ /* To simplify rest of code, restrict to the GEU case. */
+ if (compare_code == LTU)
+ {
+ HOST_WIDE_INT tmp = ct;
+ ct = cf;
+ cf = tmp;
+ compare_code = reverse_condition (compare_code);
+ code = reverse_condition (code);
+ }
+ else
+ {
+ if (fpcmp)
+ PUT_CODE (compare_op,
+ reverse_condition_maybe_unordered
+ (GET_CODE (compare_op)));
+ else
+ PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
+ }
+ diff = ct - cf;
+
+ if (reg_overlap_mentioned_p (out, ix86_compare_op0)
+ || reg_overlap_mentioned_p (out, ix86_compare_op1))
+ tmp = gen_reg_rtx (mode);
+
+ if (mode == DImode)
+ emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
+ else
+ emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
+ }
+ else
+ {
+ if (code == GT || code == GE)
+ code = reverse_condition (code);
+ else
+ {
+ HOST_WIDE_INT tmp = ct;
+ ct = cf;
+ cf = tmp;
+ diff = ct - cf;
+ }
+ tmp = emit_store_flag (tmp, code, ix86_compare_op0,
+ ix86_compare_op1, VOIDmode, 0, -1);
+ }
+
+ if (diff == 1)
+ {
+ /*
+ * cmpl op0,op1
+ * sbbl dest,dest
+ * [addl dest, ct]
+ *
+ * Size 5 - 8.
+ */
+ if (ct)
+ tmp = expand_simple_binop (mode, PLUS,
+ tmp, GEN_INT (ct),
+ copy_rtx (tmp), 1, OPTAB_DIRECT);
+ }
+ else if (cf == -1)
+ {
+ /*
+ * cmpl op0,op1
+ * sbbl dest,dest
+ * orl $ct, dest
+ *
+ * Size 8.
+ */
+ tmp = expand_simple_binop (mode, IOR,
+ tmp, GEN_INT (ct),
+ copy_rtx (tmp), 1, OPTAB_DIRECT);
+ }
+ else if (diff == -1 && ct)
+ {
+ /*
+ * cmpl op0,op1
+ * sbbl dest,dest
+ * notl dest
+ * [addl dest, cf]
+ *
+ * Size 8 - 11.
+ */
+ tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
+ if (cf)
+ tmp = expand_simple_binop (mode, PLUS,
+ copy_rtx (tmp), GEN_INT (cf),
+ copy_rtx (tmp), 1, OPTAB_DIRECT);
+ }
+ else
+ {
+ /*
+ * cmpl op0,op1
+ * sbbl dest,dest
+ * [notl dest]
+ * andl cf - ct, dest
+ * [addl dest, ct]
+ *
+ * Size 8 - 11.
+ */
+
+ if (cf == 0)
+ {
+ cf = ct;
+ ct = 0;
+ tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
+ }
+
+ tmp = expand_simple_binop (mode, AND,
+ copy_rtx (tmp),
+ gen_int_mode (cf - ct, mode),
+ copy_rtx (tmp), 1, OPTAB_DIRECT);
+ if (ct)
+ tmp = expand_simple_binop (mode, PLUS,
+ copy_rtx (tmp), GEN_INT (ct),
+ copy_rtx (tmp), 1, OPTAB_DIRECT);
+ }
+
+ if (!rtx_equal_p (tmp, out))
+ emit_move_insn (copy_rtx (out), copy_rtx (tmp));
+
+ return 1; /* DONE */
+ }
+
+ if (diff < 0)
+ {
+ HOST_WIDE_INT tmp;
+ tmp = ct, ct = cf, cf = tmp;
+ diff = -diff;
+ if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
+ {
+ /* We may be reversing unordered compare to normal compare, that
+ is not valid in general (we may convert non-trapping condition
+ to trapping one), however on i386 we currently emit all
+ comparisons unordered. */
+ compare_code = reverse_condition_maybe_unordered (compare_code);
+ code = reverse_condition_maybe_unordered (code);
+ }
+ else
+ {
+ compare_code = reverse_condition (compare_code);
+ code = reverse_condition (code);
+ }
+ }
+
+ compare_code = UNKNOWN;
+ if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
+ && GET_CODE (ix86_compare_op1) == CONST_INT)
+ {
+ if (ix86_compare_op1 == const0_rtx
+ && (code == LT || code == GE))
+ compare_code = code;
+ else if (ix86_compare_op1 == constm1_rtx)
+ {
+ if (code == LE)
+ compare_code = LT;
+ else if (code == GT)
+ compare_code = GE;
+ }
+ }
+
+ /* Optimize dest = (op0 < 0) ? -1 : cf. */
+ if (compare_code != UNKNOWN
+ && GET_MODE (ix86_compare_op0) == GET_MODE (out)
+ && (cf == -1 || ct == -1))
+ {
+ /* If lea code below could be used, only optimize
+ if it results in a 2 insn sequence. */
+
+ if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
+ || diff == 3 || diff == 5 || diff == 9)
+ || (compare_code == LT && ct == -1)
+ || (compare_code == GE && cf == -1))
+ {
+ /*
+ * notl op1 (if necessary)
+ * sarl $31, op1
+ * orl cf, op1
+ */
+ if (ct != -1)
+ {
+ cf = ct;
+ ct = -1;
+ code = reverse_condition (code);
+ }
+
+ out = emit_store_flag (out, code, ix86_compare_op0,
+ ix86_compare_op1, VOIDmode, 0, -1);
+
+ out = expand_simple_binop (mode, IOR,
+ out, GEN_INT (cf),
+ out, 1, OPTAB_DIRECT);
+ if (out != operands[0])
+ emit_move_insn (operands[0], out);
+
+ return 1; /* DONE */
+ }
+ }
+
+
+ if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
+ || diff == 3 || diff == 5 || diff == 9)
+ && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
+ && (mode != DImode
+ || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
+ {
+ /*
+ * xorl dest,dest
+ * cmpl op1,op2
+ * setcc dest
+ * lea cf(dest*(ct-cf)),dest
+ *
+ * Size 14.
+ *
+ * This also catches the degenerate setcc-only case.
+ */
+
+ rtx tmp;
+ int nops;
+
+ out = emit_store_flag (out, code, ix86_compare_op0,
+ ix86_compare_op1, VOIDmode, 0, 1);
+
+ nops = 0;
+ /* On x86_64 the lea instruction operates on Pmode, so we need
+ to get arithmetics done in proper mode to match. */
+ if (diff == 1)
+ tmp = copy_rtx (out);
+ else
+ {
+ rtx out1;
+ out1 = copy_rtx (out);
+ tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
+ nops++;
+ if (diff & 1)
+ {
+ tmp = gen_rtx_PLUS (mode, tmp, out1);
+ nops++;
+ }
+ }
+ if (cf != 0)
+ {
+ tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
+ nops++;
+ }
+ if (!rtx_equal_p (tmp, out))
+ {
+ if (nops == 1)
+ out = force_operand (tmp, copy_rtx (out));
+ else
+ emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
+ }
+ if (!rtx_equal_p (out, operands[0]))
+ emit_move_insn (operands[0], copy_rtx (out));
+
+ return 1; /* DONE */
+ }
+
+ /*
+ * General case: Jumpful:
+ * xorl dest,dest cmpl op1, op2
+ * cmpl op1, op2 movl ct, dest
+ * setcc dest jcc 1f
+ * decl dest movl cf, dest
+ * andl (cf-ct),dest 1:
+ * addl ct,dest
+ *
+ * Size 20. Size 14.
+ *
+ * This is reasonably steep, but branch mispredict costs are
+ * high on modern cpus, so consider failing only if optimizing
+ * for space.
+ */
+
+ if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
+ && BRANCH_COST >= 2)
+ {
+ if (cf == 0)
+ {
+ cf = ct;
+ ct = 0;
+ if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
+ /* We may be reversing unordered compare to normal compare,
+ that is not valid in general (we may convert non-trapping
+ condition to trapping one), however on i386 we currently
+ emit all comparisons unordered. */
+ code = reverse_condition_maybe_unordered (code);
+ else
+ {
+ code = reverse_condition (code);
+ if (compare_code != UNKNOWN)
+ compare_code = reverse_condition (compare_code);
+ }
+ }
+
+ if (compare_code != UNKNOWN)
+ {
+ /* notl op1 (if needed)
+ sarl $31, op1
+ andl (cf-ct), op1
+ addl ct, op1
+
+ For x < 0 (resp. x <= -1) there will be no notl,
+ so if possible swap the constants to get rid of the
+ complement.
+ True/false will be -1/0 while code below (store flag
+ followed by decrement) is 0/-1, so the constants need
+ to be exchanged once more. */
+
+ if (compare_code == GE || !cf)
+ {
+ code = reverse_condition (code);
+ compare_code = LT;
+ }
+ else
+ {
+ HOST_WIDE_INT tmp = cf;
+ cf = ct;
+ ct = tmp;
+ }
+
+ out = emit_store_flag (out, code, ix86_compare_op0,
+ ix86_compare_op1, VOIDmode, 0, -1);
+ }
+ else
+ {
+ out = emit_store_flag (out, code, ix86_compare_op0,
+ ix86_compare_op1, VOIDmode, 0, 1);
+
+ out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
+ copy_rtx (out), 1, OPTAB_DIRECT);
+ }
+
+ out = expand_simple_binop (mode, AND, copy_rtx (out),
+ gen_int_mode (cf - ct, mode),
+ copy_rtx (out), 1, OPTAB_DIRECT);
+ if (ct)
+ out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
+ copy_rtx (out), 1, OPTAB_DIRECT);
+ if (!rtx_equal_p (out, operands[0]))
+ emit_move_insn (operands[0], copy_rtx (out));
+
+ return 1; /* DONE */
+ }
+ }
+
+ if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
+ {
+ /* Try a few things more with specific constants and a variable. */
+
+ optab op;
+ rtx var, orig_out, out, tmp;
+
+ if (BRANCH_COST <= 2)
+ return 0; /* FAIL */
+
+ /* If one of the two operands is an interesting constant, load a
+ constant with the above and mask it in with a logical operation. */
+
+ if (GET_CODE (operands[2]) == CONST_INT)
+ {
+ var = operands[3];
+ if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
+ operands[3] = constm1_rtx, op = and_optab;
+ else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
+ operands[3] = const0_rtx, op = ior_optab;
+ else
+ return 0; /* FAIL */
+ }
+ else if (GET_CODE (operands[3]) == CONST_INT)
+ {
+ var = operands[2];
+ if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
+ operands[2] = constm1_rtx, op = and_optab;
+ else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
+ operands[2] = const0_rtx, op = ior_optab;
+ else
+ return 0; /* FAIL */
+ }
+ else
+ return 0; /* FAIL */
+
+ orig_out = operands[0];
+ tmp = gen_reg_rtx (mode);
+ operands[0] = tmp;
+
+ /* Recurse to get the constant loaded. */
+ if (ix86_expand_int_movcc (operands) == 0)
+ return 0; /* FAIL */
+
+ /* Mask in the interesting variable. */
+ out = expand_binop (mode, op, var, tmp, orig_out, 0,
+ OPTAB_WIDEN);
+ if (!rtx_equal_p (out, orig_out))
+ emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
+
+ return 1; /* DONE */
+ }
+
+ /*
+ * For comparison with above,
+ *
+ * movl cf,dest
+ * movl ct,tmp
+ * cmpl op1,op2
+ * cmovcc tmp,dest
+ *
+ * Size 15.
+ */
+
+ if (! nonimmediate_operand (operands[2], mode))
+ operands[2] = force_reg (mode, operands[2]);
+ if (! nonimmediate_operand (operands[3], mode))
+ operands[3] = force_reg (mode, operands[3]);
+
+ if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
+ {
+ rtx tmp = gen_reg_rtx (mode);
+ emit_move_insn (tmp, operands[3]);
+ operands[3] = tmp;
+ }
+ if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
+ {
+ rtx tmp = gen_reg_rtx (mode);
+ emit_move_insn (tmp, operands[2]);
+ operands[2] = tmp;
+ }
+
+ if (! register_operand (operands[2], VOIDmode)
+ && (mode == QImode
+ || ! register_operand (operands[3], VOIDmode)))
+ operands[2] = force_reg (mode, operands[2]);
+
+ if (mode == QImode
+ && ! register_operand (operands[3], VOIDmode))
+ operands[3] = force_reg (mode, operands[3]);
+
+ emit_insn (compare_seq);
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0],
+ gen_rtx_IF_THEN_ELSE (mode,
+ compare_op, operands[2],
+ operands[3])));
+ if (bypass_test)
+ emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
+ gen_rtx_IF_THEN_ELSE (mode,
+ bypass_test,
+ copy_rtx (operands[3]),
+ copy_rtx (operands[0]))));
+ if (second_test)
+ emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
+ gen_rtx_IF_THEN_ELSE (mode,
+ second_test,
+ copy_rtx (operands[2]),
+ copy_rtx (operands[0]))));
+
+ return 1; /* DONE */
+}
+
+/* Swap, force into registers, or otherwise massage the two operands
+ to an sse comparison with a mask result. Thus we differ a bit from
+ ix86_prepare_fp_compare_args which expects to produce a flags result.
+
+ The DEST operand exists to help determine whether to commute commutative
+ operators. The POP0/POP1 operands are updated in place. The new
+ comparison code is returned, or UNKNOWN if not implementable. */
+
+static enum rtx_code
+ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
+ rtx *pop0, rtx *pop1)
+{
+ rtx tmp;
+
+ switch (code)
+ {
+ case LTGT:
+ case UNEQ:
+ /* We have no LTGT as an operator. We could implement it with
+ NE & ORDERED, but this requires an extra temporary. It's
+ not clear that it's worth it. */
+ return UNKNOWN;
+
+ case LT:
+ case LE:
+ case UNGT:
+ case UNGE:
+ /* These are supported directly. */
+ break;
+
+ case EQ:
+ case NE:
+ case UNORDERED:
+ case ORDERED:
+ /* For commutative operators, try to canonicalize the destination
+ operand to be first in the comparison - this helps reload to
+ avoid extra moves. */
+ if (!dest || !rtx_equal_p (dest, *pop1))
+ break;
+ /* FALLTHRU */
+
+ case GE:
+ case GT:
+ case UNLE:
+ case UNLT:
+ /* These are not supported directly. Swap the comparison operands
+ to transform into something that is supported. */
+ tmp = *pop0;
+ *pop0 = *pop1;
+ *pop1 = tmp;
+ code = swap_condition (code);
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ return code;
+}
+
+/* Detect conditional moves that exactly match min/max operational
+ semantics. Note that this is IEEE safe, as long as we don't
+ interchange the operands.
+
+ Returns FALSE if this conditional move doesn't match a MIN/MAX,
+ and TRUE if the operation is successful and instructions are emitted. */
+
+static bool
+ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
+ rtx cmp_op1, rtx if_true, rtx if_false)
+{
+ enum machine_mode mode;
+ bool is_min;
+ rtx tmp;
+
+ if (code == LT)
+ ;
+ else if (code == UNGE)
+ {
+ tmp = if_true;
+ if_true = if_false;
+ if_false = tmp;
+ }
+ else
+ return false;
+
+ if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
+ is_min = true;
+ else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
+ is_min = false;
+ else
+ return false;
+
+ mode = GET_MODE (dest);
+
+ /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
+ but MODE may be a vector mode and thus not appropriate. */
+ if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
+ {
+ int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
+ rtvec v;
+
+ if_true = force_reg (mode, if_true);
+ v = gen_rtvec (2, if_true, if_false);
+ tmp = gen_rtx_UNSPEC (mode, v, u);
+ }
+ else
+ {
+ code = is_min ? SMIN : SMAX;
+ tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
+ }
+
+ emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
+ return true;
+}
+
+/* Expand an sse vector comparison. Return the register with the result. */
+
+static rtx
+ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
+ rtx op_true, rtx op_false)
+{
+ enum machine_mode mode = GET_MODE (dest);
+ rtx x;
+
+ cmp_op0 = force_reg (mode, cmp_op0);
+ if (!nonimmediate_operand (cmp_op1, mode))
+ cmp_op1 = force_reg (mode, cmp_op1);
+
+ if (optimize
+ || reg_overlap_mentioned_p (dest, op_true)
+ || reg_overlap_mentioned_p (dest, op_false))
+ dest = gen_reg_rtx (mode);
+
+ x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
+ emit_insn (gen_rtx_SET (VOIDmode, dest, x));
+
+ return dest;
+}
+
+/* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
+ operations. This is used for both scalar and vector conditional moves. */
+
+static void
+ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
+{
+ enum machine_mode mode = GET_MODE (dest);
+ rtx t2, t3, x;
+
+ if (op_false == CONST0_RTX (mode))
+ {
+ op_true = force_reg (mode, op_true);
+ x = gen_rtx_AND (mode, cmp, op_true);
+ emit_insn (gen_rtx_SET (VOIDmode, dest, x));
+ }
+ else if (op_true == CONST0_RTX (mode))
+ {
+ op_false = force_reg (mode, op_false);
+ x = gen_rtx_NOT (mode, cmp);
+ x = gen_rtx_AND (mode, x, op_false);
+ emit_insn (gen_rtx_SET (VOIDmode, dest, x));
+ }
+ else
+ {
+ op_true = force_reg (mode, op_true);
+ op_false = force_reg (mode, op_false);
+
+ t2 = gen_reg_rtx (mode);
+ if (optimize)
+ t3 = gen_reg_rtx (mode);
+ else
+ t3 = dest;
+
+ x = gen_rtx_AND (mode, op_true, cmp);
+ emit_insn (gen_rtx_SET (VOIDmode, t2, x));
+
+ x = gen_rtx_NOT (mode, cmp);
+ x = gen_rtx_AND (mode, x, op_false);
+ emit_insn (gen_rtx_SET (VOIDmode, t3, x));
+
+ x = gen_rtx_IOR (mode, t3, t2);
+ emit_insn (gen_rtx_SET (VOIDmode, dest, x));
+ }
+}
+
+/* Expand a floating-point conditional move. Return true if successful. */
+
+int
+ix86_expand_fp_movcc (rtx operands[])
+{
+ enum machine_mode mode = GET_MODE (operands[0]);
+ enum rtx_code code = GET_CODE (operands[1]);
+ rtx tmp, compare_op, second_test, bypass_test;
+
+ if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
+ {
+ enum machine_mode cmode;
+
+ /* Since we've no cmove for sse registers, don't force bad register
+ allocation just to gain access to it. Deny movcc when the
+ comparison mode doesn't match the move mode. */
+ cmode = GET_MODE (ix86_compare_op0);
+ if (cmode == VOIDmode)
+ cmode = GET_MODE (ix86_compare_op1);
+ if (cmode != mode)
+ return 0;
+
+ code = ix86_prepare_sse_fp_compare_args (operands[0], code,
+ &ix86_compare_op0,
+ &ix86_compare_op1);
+ if (code == UNKNOWN)
+ return 0;
+
+ if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
+ ix86_compare_op1, operands[2],
+ operands[3]))
+ return 1;
+
+ tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
+ ix86_compare_op1, operands[2], operands[3]);
+ ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
+ return 1;
+ }
+
+ /* The floating point conditional move instructions don't directly
+ support conditions resulting from a signed integer comparison. */
+
+ compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
+
+ /* The floating point conditional move instructions don't directly
+ support signed integer comparisons. */
+
+ if (!fcmov_comparison_operator (compare_op, VOIDmode))
+ {
+ gcc_assert (!second_test && !bypass_test);
+ tmp = gen_reg_rtx (QImode);
+ ix86_expand_setcc (code, tmp);
+ code = NE;
+ ix86_compare_op0 = tmp;
+ ix86_compare_op1 = const0_rtx;
+ compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
+ }
+ if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
+ {
+ tmp = gen_reg_rtx (mode);
+ emit_move_insn (tmp, operands[3]);
+ operands[3] = tmp;
+ }
+ if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
+ {
+ tmp = gen_reg_rtx (mode);
+ emit_move_insn (tmp, operands[2]);
+ operands[2] = tmp;
+ }
+
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0],
+ gen_rtx_IF_THEN_ELSE (mode, compare_op,
+ operands[2], operands[3])));
+ if (bypass_test)
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0],
+ gen_rtx_IF_THEN_ELSE (mode, bypass_test,
+ operands[3], operands[0])));
+ if (second_test)
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0],
+ gen_rtx_IF_THEN_ELSE (mode, second_test,
+ operands[2], operands[0])));
+
+ return 1;
+}
+
+/* Expand a floating-point vector conditional move; a vcond operation
+ rather than a movcc operation. */
+
+bool
+ix86_expand_fp_vcond (rtx operands[])
+{
+ enum rtx_code code = GET_CODE (operands[3]);
+ rtx cmp;
+
+ code = ix86_prepare_sse_fp_compare_args (operands[0], code,
+ &operands[4], &operands[5]);
+ if (code == UNKNOWN)
+ return false;
+
+ if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
+ operands[5], operands[1], operands[2]))
+ return true;
+
+ cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
+ operands[1], operands[2]);
+ ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
+ return true;
+}
+
+/* Expand a signed integral vector conditional move. */
+
+bool
+ix86_expand_int_vcond (rtx operands[])
+{
+ enum machine_mode mode = GET_MODE (operands[0]);
+ enum rtx_code code = GET_CODE (operands[3]);
+ bool negate = false;
+ rtx x, cop0, cop1;
+
+ cop0 = operands[4];
+ cop1 = operands[5];
+
+ /* Canonicalize the comparison to EQ, GT, GTU. */
+ switch (code)
+ {
+ case EQ:
+ case GT:
+ case GTU:
+ break;
+
+ case NE:
+ case LE:
+ case LEU:
+ code = reverse_condition (code);
+ negate = true;
+ break;
+
+ case GE:
+ case GEU:
+ code = reverse_condition (code);
+ negate = true;
+ /* FALLTHRU */
+
+ case LT:
+ case LTU:
+ code = swap_condition (code);
+ x = cop0, cop0 = cop1, cop1 = x;
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ /* APPLE LOCAL begin 5612787 mainline sse4 */
+ /* Only SSE4.1/SSE4.2 supports V2DImode. */
+ if (mode == V2DImode)
+ {
+ switch (code)
+ {
+ case EQ:
+ /* SSE4.1 supports EQ. */
+ if (!TARGET_SSE4_1)
+ return false;
+ break;
+
+ case GT:
+ case GTU:
+ /* SSE4.2 supports GT/GTU. */
+ if (!TARGET_SSE4_2)
+ return false;
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+ }
+ /* APPLE LOCAL end 5612787 mainline sse4 */
+
+ /* Unsigned parallel compare is not supported by the hardware. Play some
+ tricks to turn this into a signed comparison against 0. */
+ if (code == GTU)
+ {
+ cop0 = force_reg (mode, cop0);
+
+ switch (mode)
+ {
+ case V4SImode:
+ {
+ rtx t1, t2, mask;
+
+ /* Perform a parallel modulo subtraction. */
+ t1 = gen_reg_rtx (mode);
+ emit_insn (gen_subv4si3 (t1, cop0, cop1));
+
+ /* Extract the original sign bit of op0. */
+ mask = GEN_INT (-0x80000000);
+ mask = gen_rtx_CONST_VECTOR (mode,
+ gen_rtvec (4, mask, mask, mask, mask));
+ mask = force_reg (mode, mask);
+ t2 = gen_reg_rtx (mode);
+ emit_insn (gen_andv4si3 (t2, cop0, mask));
+
+ /* XOR it back into the result of the subtraction. This results
+ in the sign bit set iff we saw unsigned underflow. */
+ x = gen_reg_rtx (mode);
+ emit_insn (gen_xorv4si3 (x, t1, t2));
+
+ code = GT;
+ }
+ break;
+
+ case V16QImode:
+ case V8HImode:
+ /* Perform a parallel unsigned saturating subtraction. */
+ x = gen_reg_rtx (mode);
+ emit_insn (gen_rtx_SET (VOIDmode, x,
+ gen_rtx_US_MINUS (mode, cop0, cop1)));
+
+ code = EQ;
+ negate = !negate;
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ cop0 = x;
+ cop1 = CONST0_RTX (mode);
+ }
+
+ x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
+ operands[1+negate], operands[2-negate]);
+
+ ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
+ operands[2-negate]);
+ return true;
+}
+
+/* APPLE LOCAL begin 5612787 mainline sse4 */
+/* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
+ true if we should do zero extension, else sign extension. HIGH_P is
+ true if we want the N/2 high elements, else the low elements. */
+
+void
+ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
+{
+ enum machine_mode imode = GET_MODE (operands[1]);
+ rtx (*unpack)(rtx, rtx, rtx);
+ rtx se, dest;
+
+ switch (imode)
+ {
+ case V16QImode:
+ if (high_p)
+ unpack = gen_vec_interleave_highv16qi;
+ else
+ unpack = gen_vec_interleave_lowv16qi;
+ break;
+ case V8HImode:
+ if (high_p)
+ unpack = gen_vec_interleave_highv8hi;
+ else
+ unpack = gen_vec_interleave_lowv8hi;
+ break;
+ case V4SImode:
+ if (high_p)
+ unpack = gen_vec_interleave_highv4si;
+ else
+ unpack = gen_vec_interleave_lowv4si;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ dest = gen_lowpart (imode, operands[0]);
+
+ if (unsigned_p)
+ se = force_reg (imode, CONST0_RTX (imode));
+ else
+ se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
+ operands[1], pc_rtx, pc_rtx);
+
+ emit_insn (unpack (dest, operands[1], se));
+}
+
+/* This function performs the same task as ix86_expand_sse_unpack,
+ but with SSE4.1 instructions. */
+
+void
+ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
+{
+ enum machine_mode imode = GET_MODE (operands[1]);
+ rtx (*unpack)(rtx, rtx);
+ rtx src, dest;
+
+ switch (imode)
+ {
+ case V16QImode:
+ if (unsigned_p)
+ unpack = gen_sse4_1_zero_extendv8qiv8hi2;
+ else
+ unpack = gen_sse4_1_extendv8qiv8hi2;
+ break;
+ case V8HImode:
+ if (unsigned_p)
+ unpack = gen_sse4_1_zero_extendv4hiv4si2;
+ else
+ unpack = gen_sse4_1_extendv4hiv4si2;
+ break;
+ case V4SImode:
+ if (unsigned_p)
+ unpack = gen_sse4_1_zero_extendv2siv2di2;
+ else
+ unpack = gen_sse4_1_extendv2siv2di2;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ dest = operands[0];
+ if (high_p)
+ {
+ /* Shift higher 8 bytes to lower 8 bytes. */
+ src = gen_reg_rtx (imode);
+ emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src),
+ gen_lowpart (TImode, operands[1]),
+ GEN_INT (64)));
+ }
+ else
+ src = operands[1];
+
+ emit_insn (unpack (dest, src));
+}
+/* APPLE LOCAL end 5612787 mainline sse4 */
+
+/* Expand conditional increment or decrement using adb/sbb instructions.
+ The default case using setcc followed by the conditional move can be
+ done by generic code. */
+int
+ix86_expand_int_addcc (rtx operands[])
+{
+ enum rtx_code code = GET_CODE (operands[1]);
+ rtx compare_op;
+ rtx val = const0_rtx;
+ bool fpcmp = false;
+ enum machine_mode mode = GET_MODE (operands[0]);
+
+ if (operands[3] != const1_rtx
+ && operands[3] != constm1_rtx)
+ return 0;
+ if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
+ ix86_compare_op1, &compare_op))
+ return 0;
+ code = GET_CODE (compare_op);
+
+ if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
+ || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
+ {
+ fpcmp = true;
+ code = ix86_fp_compare_code_to_integer (code);
+ }
+
+ if (code != LTU)
+ {
+ val = constm1_rtx;
+ if (fpcmp)
+ PUT_CODE (compare_op,
+ reverse_condition_maybe_unordered
+ (GET_CODE (compare_op)));
+ else
+ PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
+ }
+ PUT_MODE (compare_op, mode);
+
+ /* Construct either adc or sbb insn. */
+ if ((code == LTU) == (operands[3] == constm1_rtx))
+ {
+ switch (GET_MODE (operands[0]))
+ {
+ case QImode:
+ emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
+ break;
+ case HImode:
+ emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
+ break;
+ case SImode:
+ emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
+ break;
+ case DImode:
+ emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ }
+ else
+ {
+ switch (GET_MODE (operands[0]))
+ {
+ case QImode:
+ emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
+ break;
+ case HImode:
+ emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
+ break;
+ case SImode:
+ emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
+ break;
+ case DImode:
+ emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ }
+ return 1; /* DONE */
+}
+
+
+/* Split operands 0 and 1 into SImode parts. Similar to split_di, but
+ works for floating pointer parameters and nonoffsetable memories.
+ For pushes, it returns just stack offsets; the values will be saved
+ in the right order. Maximally three parts are generated. */
+
+static int
+ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
+{
+ int size;
+
+ if (!TARGET_64BIT)
+ size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
+ else
+ size = (GET_MODE_SIZE (mode) + 4) / 8;
+
+ gcc_assert (GET_CODE (operand) != REG || !MMX_REGNO_P (REGNO (operand)));
+ gcc_assert (size >= 2 && size <= 3);
+
+ /* Optimize constant pool reference to immediates. This is used by fp
+ moves, that force all constants to memory to allow combining. */
+ if (GET_CODE (operand) == MEM && MEM_READONLY_P (operand))
+ {
+ rtx tmp = maybe_get_pool_constant (operand);
+ if (tmp)
+ operand = tmp;
+ }
+
+ if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
+ {
+ /* The only non-offsetable memories we handle are pushes. */
+ int ok = push_operand (operand, VOIDmode);
+
+ gcc_assert (ok);
+
+ operand = copy_rtx (operand);
+ PUT_MODE (operand, Pmode);
+ parts[0] = parts[1] = parts[2] = operand;
+ return size;
+ }
+
+ if (GET_CODE (operand) == CONST_VECTOR)
+ {
+ enum machine_mode imode = int_mode_for_mode (mode);
+ /* Caution: if we looked through a constant pool memory above,
+ the operand may actually have a different mode now. That's
+ ok, since we want to pun this all the way back to an integer. */
+ operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
+ gcc_assert (operand != NULL);
+ mode = imode;
+ }
+
+ if (!TARGET_64BIT)
+ {
+ if (mode == DImode)
+ split_di (&operand, 1, &parts[0], &parts[1]);
+ else
+ {
+ if (REG_P (operand))
+ {
+ gcc_assert (reload_completed);
+ parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
+ parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
+ if (size == 3)
+ parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
+ }
+ else if (offsettable_memref_p (operand))
+ {
+ operand = adjust_address (operand, SImode, 0);
+ parts[0] = operand;
+ parts[1] = adjust_address (operand, SImode, 4);
+ if (size == 3)
+ parts[2] = adjust_address (operand, SImode, 8);
+ }
+ else if (GET_CODE (operand) == CONST_DOUBLE)
+ {
+ REAL_VALUE_TYPE r;
+ long l[4];
+
+ REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
+ switch (mode)
+ {
+ case XFmode:
+ REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
+ parts[2] = gen_int_mode (l[2], SImode);
+ break;
+ case DFmode:
+ REAL_VALUE_TO_TARGET_DOUBLE (r, l);
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ parts[1] = gen_int_mode (l[1], SImode);
+ parts[0] = gen_int_mode (l[0], SImode);
+ }
+ else
+ gcc_unreachable ();
+ }
+ }
+ else
+ {
+ if (mode == TImode)
+ split_ti (&operand, 1, &parts[0], &parts[1]);
+ if (mode == XFmode || mode == TFmode)
+ {
+ enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
+ if (REG_P (operand))
+ {
+ gcc_assert (reload_completed);
+ parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
+ parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
+ }
+ else if (offsettable_memref_p (operand))
+ {
+ operand = adjust_address (operand, DImode, 0);
+ parts[0] = operand;
+ parts[1] = adjust_address (operand, upper_mode, 8);
+ }
+ else if (GET_CODE (operand) == CONST_DOUBLE)
+ {
+ REAL_VALUE_TYPE r;
+ long l[4];
+
+ REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
+ real_to_target (l, &r, mode);
+
+ /* Do not use shift by 32 to avoid warning on 32bit systems. */
+ if (HOST_BITS_PER_WIDE_INT >= 64)
+ parts[0]
+ = gen_int_mode
+ ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
+ + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
+ DImode);
+ else
+ parts[0] = immed_double_const (l[0], l[1], DImode);
+
+ if (upper_mode == SImode)
+ parts[1] = gen_int_mode (l[2], SImode);
+ else if (HOST_BITS_PER_WIDE_INT >= 64)
+ parts[1]
+ = gen_int_mode
+ ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
+ + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
+ DImode);
+ else
+ parts[1] = immed_double_const (l[2], l[3], DImode);
+ }
+ else
+ gcc_unreachable ();
+ }
+ }
+
+ return size;
+}
+
+/* Emit insns to perform a move or push of DI, DF, and XF values.
+ Return false when normal moves are needed; true when all required
+ insns have been emitted. Operands 2-4 contain the input values
+ int the correct order; operands 5-7 contain the output values. */
+
+void
+ix86_split_long_move (rtx operands[])
+{
+ rtx part[2][3];
+ int nparts;
+ int push = 0;
+ int collisions = 0;
+ enum machine_mode mode = GET_MODE (operands[0]);
+
+ /* The DFmode expanders may ask us to move double.
+ For 64bit target this is single move. By hiding the fact
+ here we simplify i386.md splitters. */
+ if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
+ {
+ /* Optimize constant pool reference to immediates. This is used by
+ fp moves, that force all constants to memory to allow combining. */
+
+ if (GET_CODE (operands[1]) == MEM
+ && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
+ && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
+ operands[1] = get_pool_constant (XEXP (operands[1], 0));
+ if (push_operand (operands[0], VOIDmode))
+ {
+ operands[0] = copy_rtx (operands[0]);
+ PUT_MODE (operands[0], Pmode);
+ }
+ else
+ operands[0] = gen_lowpart (DImode, operands[0]);
+ operands[1] = gen_lowpart (DImode, operands[1]);
+ emit_move_insn (operands[0], operands[1]);
+ return;
+ }
+
+ /* The only non-offsettable memory we handle is push. */
+ if (push_operand (operands[0], VOIDmode))
+ push = 1;
+ else
+ gcc_assert (GET_CODE (operands[0]) != MEM
+ || offsettable_memref_p (operands[0]));
+
+ nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
+ ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
+
+ /* When emitting push, take care for source operands on the stack. */
+ if (push && GET_CODE (operands[1]) == MEM
+ && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
+ {
+ /* APPLE LOCAL begin 4099768 */
+ if (nparts == 3 && TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
+ part[1][2] = adjust_address (part[1][2], SImode, 4);
+ /* APPLE LOCAL end 4099768 */
+ if (nparts == 3)
+ part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
+ XEXP (part[1][2], 0));
+ part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
+ XEXP (part[1][1], 0));
+ }
+
+ /* We need to do copy in the right order in case an address register
+ of the source overlaps the destination. */
+ if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
+ {
+ if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
+ collisions++;
+ if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
+ collisions++;
+ if (nparts == 3
+ && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
+ collisions++;
+
+ /* Collision in the middle part can be handled by reordering. */
+ if (collisions == 1 && nparts == 3
+ && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
+ {
+ rtx tmp;
+ tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
+ tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
+ }
+
+ /* If there are more collisions, we can't handle it by reordering.
+ Do an lea to the last part and use only one colliding move. */
+ else if (collisions > 1)
+ {
+ rtx base;
+
+ collisions = 1;
+
+ base = part[0][nparts - 1];
+
+ /* Handle the case when the last part isn't valid for lea.
+ Happens in 64-bit mode storing the 12-byte XFmode. */
+ if (GET_MODE (base) != Pmode)
+ base = gen_rtx_REG (Pmode, REGNO (base));
+
+ emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
+ part[1][0] = replace_equiv_address (part[1][0], base);
+ part[1][1] = replace_equiv_address (part[1][1],
+ plus_constant (base, UNITS_PER_WORD));
+ if (nparts == 3)
+ part[1][2] = replace_equiv_address (part[1][2],
+ plus_constant (base, 8));
+ }
+ }
+
+ if (push)
+ {
+ if (!TARGET_64BIT)
+ {
+ if (nparts == 3)
+ {
+ if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
+ emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
+ emit_move_insn (part[0][2], part[1][2]);
+ }
+ }
+ else
+ {
+ /* In 64bit mode we don't have 32bit push available. In case this is
+ register, it is OK - we will just use larger counterpart. We also
+ retype memory - these comes from attempt to avoid REX prefix on
+ moving of second half of TFmode value. */
+ if (GET_MODE (part[1][1]) == SImode)
+ {
+ switch (GET_CODE (part[1][1]))
+ {
+ case MEM:
+ part[1][1] = adjust_address (part[1][1], DImode, 0);
+ break;
+
+ case REG:
+ part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ if (GET_MODE (part[1][0]) == SImode)
+ part[1][0] = part[1][1];
+ }
+ }
+ emit_move_insn (part[0][1], part[1][1]);
+ emit_move_insn (part[0][0], part[1][0]);
+ return;
+ }
+
+ /* Choose correct order to not overwrite the source before it is copied. */
+ if ((REG_P (part[0][0])
+ && REG_P (part[1][1])
+ && (REGNO (part[0][0]) == REGNO (part[1][1])
+ || (nparts == 3
+ && REGNO (part[0][0]) == REGNO (part[1][2]))))
+ || (collisions > 0
+ && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
+ {
+ if (nparts == 3)
+ {
+ operands[2] = part[0][2];
+ operands[3] = part[0][1];
+ operands[4] = part[0][0];
+ operands[5] = part[1][2];
+ operands[6] = part[1][1];
+ operands[7] = part[1][0];
+ }
+ else
+ {
+ operands[2] = part[0][1];
+ operands[3] = part[0][0];
+ operands[5] = part[1][1];
+ operands[6] = part[1][0];
+ }
+ }
+ else
+ {
+ if (nparts == 3)
+ {
+ operands[2] = part[0][0];
+ operands[3] = part[0][1];
+ operands[4] = part[0][2];
+ operands[5] = part[1][0];
+ operands[6] = part[1][1];
+ operands[7] = part[1][2];
+ }
+ else
+ {
+ operands[2] = part[0][0];
+ operands[3] = part[0][1];
+ operands[5] = part[1][0];
+ operands[6] = part[1][1];
+ }
+ }
+
+ /* If optimizing for size, attempt to locally unCSE nonzero constants. */
+ if (optimize_size)
+ {
+ if (GET_CODE (operands[5]) == CONST_INT
+ && operands[5] != const0_rtx
+ && REG_P (operands[2]))
+ {
+ if (GET_CODE (operands[6]) == CONST_INT
+ && INTVAL (operands[6]) == INTVAL (operands[5]))
+ operands[6] = operands[2];
+
+ if (nparts == 3
+ && GET_CODE (operands[7]) == CONST_INT
+ && INTVAL (operands[7]) == INTVAL (operands[5]))
+ operands[7] = operands[2];
+ }
+
+ if (nparts == 3
+ && GET_CODE (operands[6]) == CONST_INT
+ && operands[6] != const0_rtx
+ && REG_P (operands[3])
+ && GET_CODE (operands[7]) == CONST_INT
+ && INTVAL (operands[7]) == INTVAL (operands[6]))
+ operands[7] = operands[3];
+ }
+
+ emit_move_insn (operands[2], operands[5]);
+ emit_move_insn (operands[3], operands[6]);
+ if (nparts == 3)
+ emit_move_insn (operands[4], operands[7]);
+
+ return;
+}
+
+/* Helper function of ix86_split_ashl used to generate an SImode/DImode
+ left shift by a constant, either using a single shift or
+ a sequence of add instructions. */
+
+static void
+ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
+{
+ if (count == 1)
+ {
+ emit_insn ((mode == DImode
+ ? gen_addsi3
+ : gen_adddi3) (operand, operand, operand));
+ }
+ else if (!optimize_size
+ && count * ix86_cost->add <= ix86_cost->shift_const)
+ {
+ int i;
+ for (i=0; i<count; i++)
+ {
+ emit_insn ((mode == DImode
+ ? gen_addsi3
+ : gen_adddi3) (operand, operand, operand));
+ }
+ }
+ else
+ emit_insn ((mode == DImode
+ ? gen_ashlsi3
+ : gen_ashldi3) (operand, operand, GEN_INT (count)));
+}
+
+void
+ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
+{
+ rtx low[2], high[2];
+ int count;
+ const int single_width = mode == DImode ? 32 : 64;
+
+ if (GET_CODE (operands[2]) == CONST_INT)
+ {
+ (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
+ count = INTVAL (operands[2]) & (single_width * 2 - 1);
+
+ if (count >= single_width)
+ {
+ emit_move_insn (high[0], low[1]);
+ emit_move_insn (low[0], const0_rtx);
+
+ if (count > single_width)
+ ix86_expand_ashl_const (high[0], count - single_width, mode);
+ }
+ else
+ {
+ if (!rtx_equal_p (operands[0], operands[1]))
+ emit_move_insn (operands[0], operands[1]);
+ emit_insn ((mode == DImode
+ ? gen_x86_shld_1
+ : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
+ ix86_expand_ashl_const (low[0], count, mode);
+ }
+ return;
+ }
+
+ (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
+
+ if (operands[1] == const1_rtx)
+ {
+ /* Assuming we've chosen a QImode capable registers, then 1 << N
+ can be done with two 32/64-bit shifts, no branches, no cmoves. */
+ if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
+ {
+ rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
+
+ ix86_expand_clear (low[0]);
+ ix86_expand_clear (high[0]);
+ emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
+
+ d = gen_lowpart (QImode, low[0]);
+ d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
+ s = gen_rtx_EQ (QImode, flags, const0_rtx);
+ emit_insn (gen_rtx_SET (VOIDmode, d, s));
+
+ d = gen_lowpart (QImode, high[0]);
+ d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
+ s = gen_rtx_NE (QImode, flags, const0_rtx);
+ emit_insn (gen_rtx_SET (VOIDmode, d, s));
+ }
+
+ /* Otherwise, we can get the same results by manually performing
+ a bit extract operation on bit 5/6, and then performing the two
+ shifts. The two methods of getting 0/1 into low/high are exactly
+ the same size. Avoiding the shift in the bit extract case helps
+ pentium4 a bit; no one else seems to care much either way. */
+ else
+ {
+ rtx x;
+
+ if (TARGET_PARTIAL_REG_STALL && !optimize_size)
+ x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
+ else
+ x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
+ emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
+
+ emit_insn ((mode == DImode
+ ? gen_lshrsi3
+ : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
+ emit_insn ((mode == DImode
+ ? gen_andsi3
+ : gen_anddi3) (high[0], high[0], GEN_INT (1)));
+ emit_move_insn (low[0], high[0]);
+ emit_insn ((mode == DImode
+ ? gen_xorsi3
+ : gen_xordi3) (low[0], low[0], GEN_INT (1)));
+ }
+
+ emit_insn ((mode == DImode
+ ? gen_ashlsi3
+ : gen_ashldi3) (low[0], low[0], operands[2]));
+ emit_insn ((mode == DImode
+ ? gen_ashlsi3
+ : gen_ashldi3) (high[0], high[0], operands[2]));
+ return;
+ }
+
+ if (operands[1] == constm1_rtx)
+ {
+ /* For -1 << N, we can avoid the shld instruction, because we
+ know that we're shifting 0...31/63 ones into a -1. */
+ emit_move_insn (low[0], constm1_rtx);
+ if (optimize_size)
+ emit_move_insn (high[0], low[0]);
+ else
+ emit_move_insn (high[0], constm1_rtx);
+ }
+ else
+ {
+ if (!rtx_equal_p (operands[0], operands[1]))
+ emit_move_insn (operands[0], operands[1]);
+
+ (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
+ emit_insn ((mode == DImode
+ ? gen_x86_shld_1
+ : gen_x86_64_shld) (high[0], low[0], operands[2]));
+ }
+
+ emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
+
+ if (TARGET_CMOVE && scratch)
+ {
+ ix86_expand_clear (scratch);
+ emit_insn ((mode == DImode
+ ? gen_x86_shift_adj_1
+ : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
+ }
+ else
+ emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
+}
+
+void
+ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
+{
+ rtx low[2], high[2];
+ int count;
+ const int single_width = mode == DImode ? 32 : 64;
+
+ if (GET_CODE (operands[2]) == CONST_INT)
+ {
+ (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
+ count = INTVAL (operands[2]) & (single_width * 2 - 1);
+
+ if (count == single_width * 2 - 1)
+ {
+ emit_move_insn (high[0], high[1]);
+ emit_insn ((mode == DImode
+ ? gen_ashrsi3
+ : gen_ashrdi3) (high[0], high[0],
+ GEN_INT (single_width - 1)));
+ emit_move_insn (low[0], high[0]);
+
+ }
+ else if (count >= single_width)
+ {
+ emit_move_insn (low[0], high[1]);
+ emit_move_insn (high[0], low[0]);
+ emit_insn ((mode == DImode
+ ? gen_ashrsi3
+ : gen_ashrdi3) (high[0], high[0],
+ GEN_INT (single_width - 1)));
+ if (count > single_width)
+ emit_insn ((mode == DImode
+ ? gen_ashrsi3
+ : gen_ashrdi3) (low[0], low[0],
+ GEN_INT (count - single_width)));
+ }
+ else
+ {
+ if (!rtx_equal_p (operands[0], operands[1]))
+ emit_move_insn (operands[0], operands[1]);
+ emit_insn ((mode == DImode
+ ? gen_x86_shrd_1
+ : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
+ emit_insn ((mode == DImode
+ ? gen_ashrsi3
+ : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
+ }
+ }
+ else
+ {
+ if (!rtx_equal_p (operands[0], operands[1]))
+ emit_move_insn (operands[0], operands[1]);
+
+ (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
+
+ emit_insn ((mode == DImode
+ ? gen_x86_shrd_1
+ : gen_x86_64_shrd) (low[0], high[0], operands[2]));
+ emit_insn ((mode == DImode
+ ? gen_ashrsi3
+ : gen_ashrdi3) (high[0], high[0], operands[2]));
+
+ if (TARGET_CMOVE && scratch)
+ {
+ emit_move_insn (scratch, high[0]);
+ emit_insn ((mode == DImode
+ ? gen_ashrsi3
+ : gen_ashrdi3) (scratch, scratch,
+ GEN_INT (single_width - 1)));
+ emit_insn ((mode == DImode
+ ? gen_x86_shift_adj_1
+ : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
+ scratch));
+ }
+ else
+ emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
+ }
+}
+
+void
+ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
+{
+ rtx low[2], high[2];
+ int count;
+ const int single_width = mode == DImode ? 32 : 64;
+
+ if (GET_CODE (operands[2]) == CONST_INT)
+ {
+ (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
+ count = INTVAL (operands[2]) & (single_width * 2 - 1);
+
+ if (count >= single_width)
+ {
+ emit_move_insn (low[0], high[1]);
+ ix86_expand_clear (high[0]);
+
+ if (count > single_width)
+ emit_insn ((mode == DImode
+ ? gen_lshrsi3
+ : gen_lshrdi3) (low[0], low[0],
+ GEN_INT (count - single_width)));
+ }
+ else
+ {
+ if (!rtx_equal_p (operands[0], operands[1]))
+ emit_move_insn (operands[0], operands[1]);
+ emit_insn ((mode == DImode
+ ? gen_x86_shrd_1
+ : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
+ emit_insn ((mode == DImode
+ ? gen_lshrsi3
+ : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
+ }
+ }
+ else
+ {
+ if (!rtx_equal_p (operands[0], operands[1]))
+ emit_move_insn (operands[0], operands[1]);
+
+ (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
+
+ emit_insn ((mode == DImode
+ ? gen_x86_shrd_1
+ : gen_x86_64_shrd) (low[0], high[0], operands[2]));
+ emit_insn ((mode == DImode
+ ? gen_lshrsi3
+ : gen_lshrdi3) (high[0], high[0], operands[2]));
+
+ /* Heh. By reversing the arguments, we can reuse this pattern. */
+ if (TARGET_CMOVE && scratch)
+ {
+ ix86_expand_clear (scratch);
+ emit_insn ((mode == DImode
+ ? gen_x86_shift_adj_1
+ : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
+ scratch));
+ }
+ else
+ emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
+ }
+}
+
+/* Helper function for the string operations below. Dest VARIABLE whether
+ it is aligned to VALUE bytes. If true, jump to the label. */
+static rtx
+ix86_expand_aligntest (rtx variable, int value)
+{
+ rtx label = gen_label_rtx ();
+ rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
+ if (GET_MODE (variable) == DImode)
+ emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
+ else
+ emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
+ emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
+ 1, label);
+ return label;
+}
+
+/* Adjust COUNTER by the VALUE. */
+static void
+ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
+{
+ if (GET_MODE (countreg) == DImode)
+ emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
+ else
+ emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
+}
+
+/* Zero extend possibly SImode EXP to Pmode register. */
+rtx
+ix86_zero_extend_to_Pmode (rtx exp)
+{
+ rtx r;
+ if (GET_MODE (exp) == VOIDmode)
+ return force_reg (Pmode, exp);
+ if (GET_MODE (exp) == Pmode)
+ return copy_to_mode_reg (Pmode, exp);
+ r = gen_reg_rtx (Pmode);
+ emit_insn (gen_zero_extendsidi2 (r, exp));
+ return r;
+}
+
+/* Expand string move (memcpy) operation. Use i386 string operations when
+ profitable. expand_clrmem contains similar code. */
+int
+ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp)
+{
+ rtx srcreg, destreg, countreg, srcexp, destexp;
+ enum machine_mode counter_mode;
+ HOST_WIDE_INT align = 0;
+ unsigned HOST_WIDE_INT count = 0;
+
+ if (GET_CODE (align_exp) == CONST_INT)
+ align = INTVAL (align_exp);
+
+ /* Can't use any of this if the user has appropriated esi or edi. */
+ if (global_regs[4] || global_regs[5])
+ return 0;
+
+ /* This simple hack avoids all inlining code and simplifies code below. */
+ if (!TARGET_ALIGN_STRINGOPS)
+ align = 64;
+
+ if (GET_CODE (count_exp) == CONST_INT)
+ {
+ count = INTVAL (count_exp);
+ if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
+ return 0;
+ }
+
+ /* Figure out proper mode for counter. For 32bits it is always SImode,
+ for 64bits use SImode when possible, otherwise DImode.
+ Set count to number of bytes copied when known at compile time. */
+ if (!TARGET_64BIT
+ || GET_MODE (count_exp) == SImode
+ || x86_64_zext_immediate_operand (count_exp, VOIDmode))
+ counter_mode = SImode;
+ else
+ counter_mode = DImode;
+
+ gcc_assert (counter_mode == SImode || counter_mode == DImode);
+
+ destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
+ if (destreg != XEXP (dst, 0))
+ dst = replace_equiv_address_nv (dst, destreg);
+ srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
+ if (srcreg != XEXP (src, 0))
+ src = replace_equiv_address_nv (src, srcreg);
+
+ /* When optimizing for size emit simple rep ; movsb instruction for
+ counts not divisible by 4, except when (movsl;)*(movsw;)?(movsb;)?
+ sequence is shorter than mov{b,l} $count, %{ecx,cl}; rep; movsb.
+ Sice of (movsl;)*(movsw;)?(movsb;)? sequence is
+ count / 4 + (count & 3), the other sequence is either 4 or 7 bytes,
+ but we don't know whether upper 24 (resp. 56) bits of %ecx will be
+ known to be zero or not. The rep; movsb sequence causes higher
+ register pressure though, so take that into account. */
+
+ if ((!optimize || optimize_size)
+ && (count == 0
+ || ((count & 0x03)
+ && (!optimize_size
+ || count > 5 * 4
+ || (count & 3) + count / 4 > 6))))
+ {
+ emit_insn (gen_cld ());
+ countreg = ix86_zero_extend_to_Pmode (count_exp);
+ destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
+ srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
+ emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
+ destexp, srcexp));
+ }
+
+ /* For constant aligned (or small unaligned) copies use rep movsl
+ followed by code copying the rest. For PentiumPro ensure 8 byte
+ alignment to allow rep movsl acceleration. */
+
+ else if (count != 0
+ && (align >= 8
+ || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
+ || optimize_size || count < (unsigned int) 64))
+ {
+ unsigned HOST_WIDE_INT offset = 0;
+ int size = TARGET_64BIT && !optimize_size ? 8 : 4;
+ rtx srcmem, dstmem;
+
+ emit_insn (gen_cld ());
+ if (count & ~(size - 1))
+ {
+ if ((TARGET_SINGLE_STRINGOP || optimize_size) && count < 5 * 4)
+ {
+ enum machine_mode movs_mode = size == 4 ? SImode : DImode;
+
+ while (offset < (count & ~(size - 1)))
+ {
+ srcmem = adjust_automodify_address_nv (src, movs_mode,
+ srcreg, offset);
+ dstmem = adjust_automodify_address_nv (dst, movs_mode,
+ destreg, offset);
+ emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
+ offset += size;
+ }
+ }
+ else
+ {
+ countreg = GEN_INT ((count >> (size == 4 ? 2 : 3))
+ & (TARGET_64BIT ? -1 : 0x3fffffff));
+ countreg = copy_to_mode_reg (counter_mode, countreg);
+ countreg = ix86_zero_extend_to_Pmode (countreg);
+
+ destexp = gen_rtx_ASHIFT (Pmode, countreg,
+ GEN_INT (size == 4 ? 2 : 3));
+ srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
+ destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
+
+ emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
+ countreg, destexp, srcexp));
+ offset = count & ~(size - 1);
+ }
+ }
+ if (size == 8 && (count & 0x04))
+ {
+ srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
+ offset);
+ dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
+ offset);
+ emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
+ offset += 4;
+ }
+ if (count & 0x02)
+ {
+ srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
+ offset);
+ dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
+ offset);
+ emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
+ offset += 2;
+ }
+ if (count & 0x01)
+ {
+ srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
+ offset);
+ dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
+ offset);
+ emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
+ }
+ }
+ /* The generic code based on the glibc implementation:
+ - align destination to 4 bytes (8 byte alignment is used for PentiumPro
+ allowing accelerated copying there)
+ - copy the data using rep movsl
+ - copy the rest. */
+ else
+ {
+ rtx countreg2;
+ rtx label = NULL;
+ rtx srcmem, dstmem;
+ int desired_alignment = (TARGET_PENTIUMPRO
+ && (count == 0 || count >= (unsigned int) 260)
+ ? 8 : UNITS_PER_WORD);
+ /* Get rid of MEM_OFFSETs, they won't be accurate. */
+ dst = change_address (dst, BLKmode, destreg);
+ src = change_address (src, BLKmode, srcreg);
+
+ /* In case we don't know anything about the alignment, default to
+ library version, since it is usually equally fast and result in
+ shorter code.
+
+ Also emit call when we know that the count is large and call overhead
+ will not be important. */
+ if (!TARGET_INLINE_ALL_STRINGOPS
+ && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
+ return 0;
+
+ if (TARGET_SINGLE_STRINGOP)
+ emit_insn (gen_cld ());
+
+ countreg2 = gen_reg_rtx (Pmode);
+ countreg = copy_to_mode_reg (counter_mode, count_exp);
+
+ /* We don't use loops to align destination and to copy parts smaller
+ than 4 bytes, because gcc is able to optimize such code better (in
+ the case the destination or the count really is aligned, gcc is often
+ able to predict the branches) and also it is friendlier to the
+ hardware branch prediction.
+
+ Using loops is beneficial for generic case, because we can
+ handle small counts using the loops. Many CPUs (such as Athlon)
+ have large REP prefix setup costs.
+
+ This is quite costly. Maybe we can revisit this decision later or
+ add some customizability to this code. */
+
+ if (count == 0 && align < desired_alignment)
+ {
+ label = gen_label_rtx ();
+ emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
+ LEU, 0, counter_mode, 1, label);
+ }
+ if (align <= 1)
+ {
+ rtx label = ix86_expand_aligntest (destreg, 1);
+ srcmem = change_address (src, QImode, srcreg);
+ dstmem = change_address (dst, QImode, destreg);
+ emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
+ ix86_adjust_counter (countreg, 1);
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+ }
+ if (align <= 2)
+ {
+ rtx label = ix86_expand_aligntest (destreg, 2);
+ srcmem = change_address (src, HImode, srcreg);
+ dstmem = change_address (dst, HImode, destreg);
+ emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
+ ix86_adjust_counter (countreg, 2);
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+ }
+ if (align <= 4 && desired_alignment > 4)
+ {
+ rtx label = ix86_expand_aligntest (destreg, 4);
+ srcmem = change_address (src, SImode, srcreg);
+ dstmem = change_address (dst, SImode, destreg);
+ emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
+ ix86_adjust_counter (countreg, 4);
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+ }
+
+ if (label && desired_alignment > 4 && !TARGET_64BIT)
+ {
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+ label = NULL_RTX;
+ }
+ if (!TARGET_SINGLE_STRINGOP)
+ emit_insn (gen_cld ());
+ if (TARGET_64BIT)
+ {
+ emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
+ GEN_INT (3)));
+ destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
+ }
+ else
+ {
+ emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
+ destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
+ }
+ srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
+ destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
+ emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
+ countreg2, destexp, srcexp));
+
+ if (label)
+ {
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+ }
+ if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
+ {
+ srcmem = change_address (src, SImode, srcreg);
+ dstmem = change_address (dst, SImode, destreg);
+ emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
+ }
+ if ((align <= 4 || count == 0) && TARGET_64BIT)
+ {
+ rtx label = ix86_expand_aligntest (countreg, 4);
+ srcmem = change_address (src, SImode, srcreg);
+ dstmem = change_address (dst, SImode, destreg);
+ emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+ }
+ if (align > 2 && count != 0 && (count & 2))
+ {
+ srcmem = change_address (src, HImode, srcreg);
+ dstmem = change_address (dst, HImode, destreg);
+ emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
+ }
+ if (align <= 2 || count == 0)
+ {
+ rtx label = ix86_expand_aligntest (countreg, 2);
+ srcmem = change_address (src, HImode, srcreg);
+ dstmem = change_address (dst, HImode, destreg);
+ emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+ }
+ if (align > 1 && count != 0 && (count & 1))
+ {
+ srcmem = change_address (src, QImode, srcreg);
+ dstmem = change_address (dst, QImode, destreg);
+ emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
+ }
+ if (align <= 1 || count == 0)
+ {
+ rtx label = ix86_expand_aligntest (countreg, 1);
+ srcmem = change_address (src, QImode, srcreg);
+ dstmem = change_address (dst, QImode, destreg);
+ emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+ }
+ }
+
+ return 1;
+}
+
+/* Expand string clear operation (bzero). Use i386 string operations when
+ profitable. expand_movmem contains similar code. */
+int
+ix86_expand_clrmem (rtx dst, rtx count_exp, rtx align_exp)
+{
+ rtx destreg, zeroreg, countreg, destexp;
+ enum machine_mode counter_mode;
+ HOST_WIDE_INT align = 0;
+ unsigned HOST_WIDE_INT count = 0;
+
+ if (GET_CODE (align_exp) == CONST_INT)
+ align = INTVAL (align_exp);
+
+ /* Can't use any of this if the user has appropriated esi. */
+ if (global_regs[4])
+ return 0;
+
+ /* This simple hack avoids all inlining code and simplifies code below. */
+ if (!TARGET_ALIGN_STRINGOPS)
+ align = 32;
+
+ if (GET_CODE (count_exp) == CONST_INT)
+ {
+ count = INTVAL (count_exp);
+ if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
+ return 0;
+ }
+ /* Figure out proper mode for counter. For 32bits it is always SImode,
+ for 64bits use SImode when possible, otherwise DImode.
+ Set count to number of bytes copied when known at compile time. */
+ if (!TARGET_64BIT
+ || GET_MODE (count_exp) == SImode
+ || x86_64_zext_immediate_operand (count_exp, VOIDmode))
+ counter_mode = SImode;
+ else
+ counter_mode = DImode;
+
+ destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
+ if (destreg != XEXP (dst, 0))
+ dst = replace_equiv_address_nv (dst, destreg);
+
+
+ /* When optimizing for size emit simple rep ; movsb instruction for
+ counts not divisible by 4. The movl $N, %ecx; rep; stosb
+ sequence is 7 bytes long, so if optimizing for size and count is
+ small enough that some stosl, stosw and stosb instructions without
+ rep are shorter, fall back into the next if. */
+
+ if ((!optimize || optimize_size)
+ && (count == 0
+ || ((count & 0x03)
+ && (!optimize_size || (count & 0x03) + (count >> 2) > 7))))
+ {
+ emit_insn (gen_cld ());
+
+ countreg = ix86_zero_extend_to_Pmode (count_exp);
+ zeroreg = copy_to_mode_reg (QImode, const0_rtx);
+ destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
+ emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
+ }
+ else if (count != 0
+ && (align >= 8
+ || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
+ || optimize_size || count < (unsigned int) 64))
+ {
+ int size = TARGET_64BIT && !optimize_size ? 8 : 4;
+ unsigned HOST_WIDE_INT offset = 0;
+
+ emit_insn (gen_cld ());
+
+ zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
+ if (count & ~(size - 1))
+ {
+ unsigned HOST_WIDE_INT repcount;
+ unsigned int max_nonrep;
+
+ repcount = count >> (size == 4 ? 2 : 3);
+ if (!TARGET_64BIT)
+ repcount &= 0x3fffffff;
+
+ /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes.
+ movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN
+ bytes. In both cases the latter seems to be faster for small
+ values of N. */
+ max_nonrep = size == 4 ? 7 : 4;
+ if (!optimize_size)
+ switch (ix86_tune)
+ {
+ case PROCESSOR_PENTIUM4:
+ case PROCESSOR_NOCONA:
+ max_nonrep = 3;
+ break;
+ default:
+ break;
+ }
+
+ if (repcount <= max_nonrep)
+ while (repcount-- > 0)
+ {
+ rtx mem = adjust_automodify_address_nv (dst,
+ GET_MODE (zeroreg),
+ destreg, offset);
+ emit_insn (gen_strset (destreg, mem, zeroreg));
+ offset += size;
+ }
+ else
+ {
+ countreg = copy_to_mode_reg (counter_mode, GEN_INT (repcount));
+ countreg = ix86_zero_extend_to_Pmode (countreg);
+ destexp = gen_rtx_ASHIFT (Pmode, countreg,
+ GEN_INT (size == 4 ? 2 : 3));
+ destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
+ emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg,
+ destexp));
+ offset = count & ~(size - 1);
+ }
+ }
+ if (size == 8 && (count & 0x04))
+ {
+ rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
+ offset);
+ emit_insn (gen_strset (destreg, mem,
+ gen_rtx_SUBREG (SImode, zeroreg, 0)));
+ offset += 4;
+ }
+ if (count & 0x02)
+ {
+ rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
+ offset);
+ emit_insn (gen_strset (destreg, mem,
+ gen_rtx_SUBREG (HImode, zeroreg, 0)));
+ offset += 2;
+ }
+ if (count & 0x01)
+ {
+ rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
+ offset);
+ emit_insn (gen_strset (destreg, mem,
+ gen_rtx_SUBREG (QImode, zeroreg, 0)));
+ }
+ }
+ else
+ {
+ rtx countreg2;
+ rtx label = NULL;
+ /* Compute desired alignment of the string operation. */
+ int desired_alignment = (TARGET_PENTIUMPRO
+ && (count == 0 || count >= (unsigned int) 260)
+ ? 8 : UNITS_PER_WORD);
+
+ /* In case we don't know anything about the alignment, default to
+ library version, since it is usually equally fast and result in
+ shorter code.
+
+ Also emit call when we know that the count is large and call overhead
+ will not be important. */
+ if (!TARGET_INLINE_ALL_STRINGOPS
+ && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
+ return 0;
+
+ if (TARGET_SINGLE_STRINGOP)
+ emit_insn (gen_cld ());
+
+ countreg2 = gen_reg_rtx (Pmode);
+ countreg = copy_to_mode_reg (counter_mode, count_exp);
+ zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
+ /* Get rid of MEM_OFFSET, it won't be accurate. */
+ dst = change_address (dst, BLKmode, destreg);
+
+ if (count == 0 && align < desired_alignment)
+ {
+ label = gen_label_rtx ();
+ emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
+ LEU, 0, counter_mode, 1, label);
+ }
+ if (align <= 1)
+ {
+ rtx label = ix86_expand_aligntest (destreg, 1);
+ emit_insn (gen_strset (destreg, dst,
+ gen_rtx_SUBREG (QImode, zeroreg, 0)));
+ ix86_adjust_counter (countreg, 1);
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+ }
+ if (align <= 2)
+ {
+ rtx label = ix86_expand_aligntest (destreg, 2);
+ emit_insn (gen_strset (destreg, dst,
+ gen_rtx_SUBREG (HImode, zeroreg, 0)));
+ ix86_adjust_counter (countreg, 2);
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+ }
+ if (align <= 4 && desired_alignment > 4)
+ {
+ rtx label = ix86_expand_aligntest (destreg, 4);
+ emit_insn (gen_strset (destreg, dst,
+ (TARGET_64BIT
+ ? gen_rtx_SUBREG (SImode, zeroreg, 0)
+ : zeroreg)));
+ ix86_adjust_counter (countreg, 4);
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+ }
+
+ if (label && desired_alignment > 4 && !TARGET_64BIT)
+ {
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+ label = NULL_RTX;
+ }
+
+ if (!TARGET_SINGLE_STRINGOP)
+ emit_insn (gen_cld ());
+ if (TARGET_64BIT)
+ {
+ emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
+ GEN_INT (3)));
+ destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
+ }
+ else
+ {
+ emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
+ destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
+ }
+ destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
+ emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
+
+ if (label)
+ {
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+ }
+
+ if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
+ emit_insn (gen_strset (destreg, dst,
+ gen_rtx_SUBREG (SImode, zeroreg, 0)));
+ if (TARGET_64BIT && (align <= 4 || count == 0))
+ {
+ rtx label = ix86_expand_aligntest (countreg, 4);
+ emit_insn (gen_strset (destreg, dst,
+ gen_rtx_SUBREG (SImode, zeroreg, 0)));
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+ }
+ if (align > 2 && count != 0 && (count & 2))
+ emit_insn (gen_strset (destreg, dst,
+ gen_rtx_SUBREG (HImode, zeroreg, 0)));
+ if (align <= 2 || count == 0)
+ {
+ rtx label = ix86_expand_aligntest (countreg, 2);
+ emit_insn (gen_strset (destreg, dst,
+ gen_rtx_SUBREG (HImode, zeroreg, 0)));
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+ }
+ if (align > 1 && count != 0 && (count & 1))
+ emit_insn (gen_strset (destreg, dst,
+ gen_rtx_SUBREG (QImode, zeroreg, 0)));
+ if (align <= 1 || count == 0)
+ {
+ rtx label = ix86_expand_aligntest (countreg, 1);
+ emit_insn (gen_strset (destreg, dst,
+ gen_rtx_SUBREG (QImode, zeroreg, 0)));
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+ }
+ }
+ return 1;
+}
+
+/* Expand strlen. */
+int
+ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
+{
+ rtx addr, scratch1, scratch2, scratch3, scratch4;
+
+ /* The generic case of strlen expander is long. Avoid it's
+ expanding unless TARGET_INLINE_ALL_STRINGOPS. */
+
+ if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
+ && !TARGET_INLINE_ALL_STRINGOPS
+ && !optimize_size
+ && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
+ return 0;
+
+ addr = force_reg (Pmode, XEXP (src, 0));
+ scratch1 = gen_reg_rtx (Pmode);
+
+ if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
+ && !optimize_size)
+ {
+ /* Well it seems that some optimizer does not combine a call like
+ foo(strlen(bar), strlen(bar));
+ when the move and the subtraction is done here. It does calculate
+ the length just once when these instructions are done inside of
+ output_strlen_unroll(). But I think since &bar[strlen(bar)] is
+ often used and I use one fewer register for the lifetime of
+ output_strlen_unroll() this is better. */
+
+ emit_move_insn (out, addr);
+
+ ix86_expand_strlensi_unroll_1 (out, src, align);
+
+ /* strlensi_unroll_1 returns the address of the zero at the end of
+ the string, like memchr(), so compute the length by subtracting
+ the start address. */
+ if (TARGET_64BIT)
+ emit_insn (gen_subdi3 (out, out, addr));
+ else
+ emit_insn (gen_subsi3 (out, out, addr));
+ }
+ else
+ {
+ rtx unspec;
+ scratch2 = gen_reg_rtx (Pmode);
+ scratch3 = gen_reg_rtx (Pmode);
+ scratch4 = force_reg (Pmode, constm1_rtx);
+
+ emit_move_insn (scratch3, addr);
+ eoschar = force_reg (QImode, eoschar);
+
+ emit_insn (gen_cld ());
+ src = replace_equiv_address_nv (src, scratch3);
+
+ /* If .md starts supporting :P, this can be done in .md. */
+ unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
+ scratch4), UNSPEC_SCAS);
+ emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
+ if (TARGET_64BIT)
+ {
+ emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
+ emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
+ }
+ else
+ {
+ emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
+ emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
+ }
+ }
+ return 1;
+}
+
+/* Expand the appropriate insns for doing strlen if not just doing
+ repnz; scasb
+
+ out = result, initialized with the start address
+ align_rtx = alignment of the address.
+ scratch = scratch register, initialized with the startaddress when
+ not aligned, otherwise undefined
+
+ This is just the body. It needs the initializations mentioned above and
+ some address computing at the end. These things are done in i386.md. */
+
+static void
+ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
+{
+ int align;
+ rtx tmp;
+ rtx align_2_label = NULL_RTX;
+ rtx align_3_label = NULL_RTX;
+ rtx align_4_label = gen_label_rtx ();
+ rtx end_0_label = gen_label_rtx ();
+ rtx mem;
+ rtx tmpreg = gen_reg_rtx (SImode);
+ rtx scratch = gen_reg_rtx (SImode);
+ rtx cmp;
+
+ align = 0;
+ if (GET_CODE (align_rtx) == CONST_INT)
+ align = INTVAL (align_rtx);
+
+ /* Loop to check 1..3 bytes for null to get an aligned pointer. */
+
+ /* Is there a known alignment and is it less than 4? */
+ if (align < 4)
+ {
+ rtx scratch1 = gen_reg_rtx (Pmode);
+ emit_move_insn (scratch1, out);
+ /* Is there a known alignment and is it not 2? */
+ if (align != 2)
+ {
+ align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
+ align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
+
+ /* Leave just the 3 lower bits. */
+ align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
+ NULL_RTX, 0, OPTAB_WIDEN);
+
+ emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
+ Pmode, 1, align_4_label);
+ emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
+ Pmode, 1, align_2_label);
+ emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
+ Pmode, 1, align_3_label);
+ }
+ else
+ {
+ /* Since the alignment is 2, we have to check 2 or 0 bytes;
+ check if is aligned to 4 - byte. */
+
+ align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
+ NULL_RTX, 0, OPTAB_WIDEN);
+
+ emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
+ Pmode, 1, align_4_label);
+ }
+
+ mem = change_address (src, QImode, out);
+
+ /* Now compare the bytes. */
+
+ /* Compare the first n unaligned byte on a byte per byte basis. */
+ emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
+ QImode, 1, end_0_label);
+
+ /* Increment the address. */
+ if (TARGET_64BIT)
+ emit_insn (gen_adddi3 (out, out, const1_rtx));
+ else
+ emit_insn (gen_addsi3 (out, out, const1_rtx));
+
+ /* Not needed with an alignment of 2 */
+ if (align != 2)
+ {
+ emit_label (align_2_label);
+
+ emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
+ end_0_label);
+
+ if (TARGET_64BIT)
+ emit_insn (gen_adddi3 (out, out, const1_rtx));
+ else
+ emit_insn (gen_addsi3 (out, out, const1_rtx));
+
+ emit_label (align_3_label);
+ }
+
+ emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
+ end_0_label);
+
+ if (TARGET_64BIT)
+ emit_insn (gen_adddi3 (out, out, const1_rtx));
+ else
+ emit_insn (gen_addsi3 (out, out, const1_rtx));
+ }
+
+ /* Generate loop to check 4 bytes at a time. It is not a good idea to
+ align this loop. It gives only huge programs, but does not help to
+ speed up. */
+ emit_label (align_4_label);
+
+ mem = change_address (src, SImode, out);
+ emit_move_insn (scratch, mem);
+ if (TARGET_64BIT)
+ emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
+ else
+ emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
+
+ /* This formula yields a nonzero result iff one of the bytes is zero.
+ This saves three branches inside loop and many cycles. */
+
+ emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
+ emit_insn (gen_one_cmplsi2 (scratch, scratch));
+ emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
+ emit_insn (gen_andsi3 (tmpreg, tmpreg,
+ gen_int_mode (0x80808080, SImode)));
+ emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
+ align_4_label);
+
+ if (TARGET_CMOVE)
+ {
+ rtx reg = gen_reg_rtx (SImode);
+ rtx reg2 = gen_reg_rtx (Pmode);
+ emit_move_insn (reg, tmpreg);
+ emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
+
+ /* If zero is not in the first two bytes, move two bytes forward. */
+ emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
+ tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
+ tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
+ emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
+ gen_rtx_IF_THEN_ELSE (SImode, tmp,
+ reg,
+ tmpreg)));
+ /* Emit lea manually to avoid clobbering of flags. */
+ emit_insn (gen_rtx_SET (SImode, reg2,
+ gen_rtx_PLUS (Pmode, out, const2_rtx)));
+
+ tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
+ tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
+ emit_insn (gen_rtx_SET (VOIDmode, out,
+ gen_rtx_IF_THEN_ELSE (Pmode, tmp,
+ reg2,
+ out)));
+
+ }
+ else
+ {
+ rtx end_2_label = gen_label_rtx ();
+ /* Is zero in the first two bytes? */
+
+ emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
+ tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
+ tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
+ tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
+ gen_rtx_LABEL_REF (VOIDmode, end_2_label),
+ pc_rtx);
+ tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
+ JUMP_LABEL (tmp) = end_2_label;
+
+ /* Not in the first two. Move two bytes forward. */
+ emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
+ if (TARGET_64BIT)
+ emit_insn (gen_adddi3 (out, out, const2_rtx));
+ else
+ emit_insn (gen_addsi3 (out, out, const2_rtx));
+
+ emit_label (end_2_label);
+
+ }
+
+ /* Avoid branch in fixing the byte. */
+ tmpreg = gen_lowpart (QImode, tmpreg);
+ emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
+ cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
+ if (TARGET_64BIT)
+ emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
+ else
+ emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
+
+ emit_label (end_0_label);
+}
+
+void
+ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
+ rtx callarg2 ATTRIBUTE_UNUSED,
+ rtx pop, int sibcall)
+{
+ rtx use = NULL, call;
+
+ if (pop == const0_rtx)
+ pop = NULL;
+ gcc_assert (!TARGET_64BIT || !pop);
+
+ if (TARGET_MACHO && !TARGET_64BIT)
+ {
+#if TARGET_MACHO
+ if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
+ fnaddr = machopic_indirect_call_target (fnaddr);
+#endif
+ }
+ else
+ {
+ /* Static functions and indirect calls don't need the pic register. */
+ if (! TARGET_64BIT && flag_pic
+ && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
+ && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
+ use_reg (&use, pic_offset_table_rtx);
+ }
+
+ if (TARGET_64BIT && INTVAL (callarg2) >= 0)
+ {
+ rtx al = gen_rtx_REG (QImode, 0);
+ emit_move_insn (al, callarg2);
+ use_reg (&use, al);
+ }
+
+ if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
+ {
+ fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
+ fnaddr = gen_rtx_MEM (QImode, fnaddr);
+ }
+ if (sibcall && TARGET_64BIT
+ && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
+ {
+ rtx addr;
+ addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
+ fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
+ emit_move_insn (fnaddr, addr);
+ fnaddr = gen_rtx_MEM (QImode, fnaddr);
+ }
+
+ call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
+ if (retval)
+ call = gen_rtx_SET (VOIDmode, retval, call);
+ if (pop)
+ {
+ pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
+ pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
+ call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
+ }
+
+ call = emit_call_insn (call);
+ if (use)
+ CALL_INSN_FUNCTION_USAGE (call) = use;
+}
+
+
+/* Clear stack slot assignments remembered from previous functions.
+ This is called from INIT_EXPANDERS once before RTL is emitted for each
+ function. */
+
+static struct machine_function *
+ix86_init_machine_status (void)
+{
+ struct machine_function *f;
+
+ f = ggc_alloc_cleared (sizeof (struct machine_function));
+ f->use_fast_prologue_epilogue_nregs = -1;
+ f->tls_descriptor_call_expanded_p = 0;
+
+ return f;
+}
+
+/* Return a MEM corresponding to a stack slot with mode MODE.
+ Allocate a new slot if necessary.
+
+ The RTL for a function can have several slots available: N is
+ which slot to use. */
+
+rtx
+assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
+{
+ struct stack_local_entry *s;
+
+ gcc_assert (n < MAX_386_STACK_LOCALS);
+
+ /* Virtual slot is valid only before vregs are instantiated. */
+ gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
+
+ for (s = ix86_stack_locals; s; s = s->next)
+ if (s->mode == mode && s->n == n)
+ return s->rtl;
+
+ s = (struct stack_local_entry *)
+ ggc_alloc (sizeof (struct stack_local_entry));
+ s->n = n;
+ s->mode = mode;
+ s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
+
+ s->next = ix86_stack_locals;
+ ix86_stack_locals = s;
+ return s->rtl;
+}
+
+/* Construct the SYMBOL_REF for the tls_get_addr function. */
+
+static GTY(()) rtx ix86_tls_symbol;
+rtx
+ix86_tls_get_addr (void)
+{
+
+ if (!ix86_tls_symbol)
+ {
+ ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
+ (TARGET_ANY_GNU_TLS
+ && !TARGET_64BIT)
+ ? "___tls_get_addr"
+ : "__tls_get_addr");
+ }
+
+ return ix86_tls_symbol;
+}
+
+/* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
+
+static GTY(()) rtx ix86_tls_module_base_symbol;
+rtx
+ix86_tls_module_base (void)
+{
+
+ if (!ix86_tls_module_base_symbol)
+ {
+ ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
+ "_TLS_MODULE_BASE_");
+ SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
+ |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
+ }
+
+ return ix86_tls_module_base_symbol;
+}
+
+/* Calculate the length of the memory address in the instruction
+ encoding. Does not include the one-byte modrm, opcode, or prefix. */
+
+int
+memory_address_length (rtx addr)
+{
+ struct ix86_address parts;
+ rtx base, index, disp;
+ int len;
+ int ok;
+
+ if (GET_CODE (addr) == PRE_DEC
+ || GET_CODE (addr) == POST_INC
+ || GET_CODE (addr) == PRE_MODIFY
+ || GET_CODE (addr) == POST_MODIFY)
+ return 0;
+
+ ok = ix86_decompose_address (addr, &parts);
+ gcc_assert (ok);
+
+ if (parts.base && GET_CODE (parts.base) == SUBREG)
+ parts.base = SUBREG_REG (parts.base);
+ if (parts.index && GET_CODE (parts.index) == SUBREG)
+ parts.index = SUBREG_REG (parts.index);
+
+ base = parts.base;
+ index = parts.index;
+ disp = parts.disp;
+ len = 0;
+
+ /* Rule of thumb:
+ - esp as the base always wants an index,
+ - ebp as the base always wants a displacement. */
+
+ /* Register Indirect. */
+ if (base && !index && !disp)
+ {
+ /* esp (for its index) and ebp (for its displacement) need
+ the two-byte modrm form. */
+ if (addr == stack_pointer_rtx
+ || addr == arg_pointer_rtx
+ || addr == frame_pointer_rtx
+ || addr == hard_frame_pointer_rtx)
+ len = 1;
+ }
+
+ /* Direct Addressing. */
+ else if (disp && !base && !index)
+ len = 4;
+
+ else
+ {
+ /* Find the length of the displacement constant. */
+ if (disp)
+ {
+ if (base && satisfies_constraint_K (disp))
+ len = 1;
+ else
+ len = 4;
+ }
+ /* ebp always wants a displacement. */
+ else if (base == hard_frame_pointer_rtx)
+ len = 1;
+
+ /* An index requires the two-byte modrm form.... */
+ if (index
+ /* ...like esp, which always wants an index. */
+ || base == stack_pointer_rtx
+ || base == arg_pointer_rtx
+ || base == frame_pointer_rtx)
+ len += 1;
+ }
+
+ return len;
+}
+
+/* Compute default value for "length_immediate" attribute. When SHORTFORM
+ is set, expect that insn have 8bit immediate alternative. */
+int
+ix86_attr_length_immediate_default (rtx insn, int shortform)
+{
+ int len = 0;
+ int i;
+ extract_insn_cached (insn);
+ for (i = recog_data.n_operands - 1; i >= 0; --i)
+ if (CONSTANT_P (recog_data.operand[i]))
+ {
+ gcc_assert (!len);
+ if (shortform && satisfies_constraint_K (recog_data.operand[i]))
+ len = 1;
+ else
+ {
+ switch (get_attr_mode (insn))
+ {
+ case MODE_QI:
+ len+=1;
+ break;
+ case MODE_HI:
+ len+=2;
+ break;
+ case MODE_SI:
+ len+=4;
+ break;
+ /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
+ case MODE_DI:
+ len+=4;
+ break;
+ default:
+ fatal_insn ("unknown insn mode", insn);
+ }
+ }
+ }
+ return len;
+}
+/* Compute default value for "length_address" attribute. */
+int
+ix86_attr_length_address_default (rtx insn)
+{
+ int i;
+
+ if (get_attr_type (insn) == TYPE_LEA)
+ {
+ rtx set = PATTERN (insn);
+
+ if (GET_CODE (set) == PARALLEL)
+ set = XVECEXP (set, 0, 0);
+
+ gcc_assert (GET_CODE (set) == SET);
+
+ return memory_address_length (SET_SRC (set));
+ }
+
+ extract_insn_cached (insn);
+ for (i = recog_data.n_operands - 1; i >= 0; --i)
+ if (GET_CODE (recog_data.operand[i]) == MEM)
+ {
+ return memory_address_length (XEXP (recog_data.operand[i], 0));
+ break;
+ }
+ return 0;
+}
+
+/* Return the maximum number of instructions a cpu can issue. */
+
+static int
+ix86_issue_rate (void)
+{
+ switch (ix86_tune)
+ {
+ case PROCESSOR_PENTIUM:
+ case PROCESSOR_K6:
+ return 2;
+
+ case PROCESSOR_PENTIUMPRO:
+ case PROCESSOR_PENTIUM4:
+ case PROCESSOR_ATHLON:
+ case PROCESSOR_K8:
+ case PROCESSOR_NOCONA:
+ case PROCESSOR_GENERIC32:
+ case PROCESSOR_GENERIC64:
+ return 3;
+ /* APPLE LOCAL begin mainline */
+ case PROCESSOR_CORE2:
+ return 4;
+ /* APPLE LOCAL end mainline */
+
+ default:
+ return 1;
+ }
+}
+
+/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
+ by DEP_INSN and nothing set by DEP_INSN. */
+
+static int
+ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
+{
+ rtx set, set2;
+
+ /* Simplify the test for uninteresting insns. */
+ if (insn_type != TYPE_SETCC
+ && insn_type != TYPE_ICMOV
+ && insn_type != TYPE_FCMOV
+ && insn_type != TYPE_IBR)
+ return 0;
+
+ if ((set = single_set (dep_insn)) != 0)
+ {
+ set = SET_DEST (set);
+ set2 = NULL_RTX;
+ }
+ else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
+ && XVECLEN (PATTERN (dep_insn), 0) == 2
+ && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
+ && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
+ {
+ set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
+ set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
+ }
+ else
+ return 0;
+
+ if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
+ return 0;
+
+ /* This test is true if the dependent insn reads the flags but
+ not any other potentially set register. */
+ if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
+ return 0;
+
+ if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
+ return 0;
+
+ return 1;
+}
+
+/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
+ address with operands set by DEP_INSN. */
+
+static int
+ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
+{
+ rtx addr;
+
+ if (insn_type == TYPE_LEA
+ && TARGET_PENTIUM)
+ {
+ addr = PATTERN (insn);
+
+ if (GET_CODE (addr) == PARALLEL)
+ addr = XVECEXP (addr, 0, 0);
+
+ gcc_assert (GET_CODE (addr) == SET);
+
+ addr = SET_SRC (addr);
+ }
+ else
+ {
+ int i;
+ extract_insn_cached (insn);
+ for (i = recog_data.n_operands - 1; i >= 0; --i)
+ if (GET_CODE (recog_data.operand[i]) == MEM)
+ {
+ addr = XEXP (recog_data.operand[i], 0);
+ goto found;
+ }
+ return 0;
+ found:;
+ }
+
+ return modified_in_p (addr, dep_insn);
+}
+
+static int
+ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
+{
+ enum attr_type insn_type, dep_insn_type;
+ enum attr_memory memory;
+ rtx set, set2;
+ int dep_insn_code_number;
+
+ /* Anti and output dependencies have zero cost on all CPUs. */
+ if (REG_NOTE_KIND (link) != 0)
+ return 0;
+
+ dep_insn_code_number = recog_memoized (dep_insn);
+
+ /* If we can't recognize the insns, we can't really do anything. */
+ if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
+ return cost;
+
+ insn_type = get_attr_type (insn);
+ dep_insn_type = get_attr_type (dep_insn);
+
+ switch (ix86_tune)
+ {
+ case PROCESSOR_PENTIUM:
+ /* Address Generation Interlock adds a cycle of latency. */
+ if (ix86_agi_dependent (insn, dep_insn, insn_type))
+ cost += 1;
+
+ /* ??? Compares pair with jump/setcc. */
+ if (ix86_flags_dependent (insn, dep_insn, insn_type))
+ cost = 0;
+
+ /* Floating point stores require value to be ready one cycle earlier. */
+ if (insn_type == TYPE_FMOV
+ && get_attr_memory (insn) == MEMORY_STORE
+ && !ix86_agi_dependent (insn, dep_insn, insn_type))
+ cost += 1;
+ break;
+
+ case PROCESSOR_PENTIUMPRO:
+ memory = get_attr_memory (insn);
+
+ /* INT->FP conversion is expensive. */
+ if (get_attr_fp_int_src (dep_insn))
+ cost += 5;
+
+ /* There is one cycle extra latency between an FP op and a store. */
+ if (insn_type == TYPE_FMOV
+ && (set = single_set (dep_insn)) != NULL_RTX
+ && (set2 = single_set (insn)) != NULL_RTX
+ && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
+ && GET_CODE (SET_DEST (set2)) == MEM)
+ cost += 1;
+
+ /* Show ability of reorder buffer to hide latency of load by executing
+ in parallel with previous instruction in case
+ previous instruction is not needed to compute the address. */
+ if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
+ && !ix86_agi_dependent (insn, dep_insn, insn_type))
+ {
+ /* Claim moves to take one cycle, as core can issue one load
+ at time and the next load can start cycle later. */
+ if (dep_insn_type == TYPE_IMOV
+ || dep_insn_type == TYPE_FMOV)
+ cost = 1;
+ else if (cost > 1)
+ cost--;
+ }
+ break;
+
+ case PROCESSOR_K6:
+ memory = get_attr_memory (insn);
+
+ /* The esp dependency is resolved before the instruction is really
+ finished. */
+ if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
+ && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
+ return 1;
+
+ /* INT->FP conversion is expensive. */
+ if (get_attr_fp_int_src (dep_insn))
+ cost += 5;
+
+ /* Show ability of reorder buffer to hide latency of load by executing
+ in parallel with previous instruction in case
+ previous instruction is not needed to compute the address. */
+ if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
+ && !ix86_agi_dependent (insn, dep_insn, insn_type))
+ {
+ /* Claim moves to take one cycle, as core can issue one load
+ at time and the next load can start cycle later. */
+ if (dep_insn_type == TYPE_IMOV
+ || dep_insn_type == TYPE_FMOV)
+ cost = 1;
+ else if (cost > 2)
+ cost -= 2;
+ else
+ cost = 1;
+ }
+ break;
+
+ case PROCESSOR_ATHLON:
+ case PROCESSOR_K8:
+ case PROCESSOR_GENERIC32:
+ case PROCESSOR_GENERIC64:
+ memory = get_attr_memory (insn);
+
+ /* Show ability of reorder buffer to hide latency of load by executing
+ in parallel with previous instruction in case
+ previous instruction is not needed to compute the address. */
+ if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
+ && !ix86_agi_dependent (insn, dep_insn, insn_type))
+ {
+ enum attr_unit unit = get_attr_unit (insn);
+ int loadcost = 3;
+
+ /* Because of the difference between the length of integer and
+ floating unit pipeline preparation stages, the memory operands
+ for floating point are cheaper.
+
+ ??? For Athlon it the difference is most probably 2. */
+ if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
+ loadcost = 3;
+ else
+ loadcost = TARGET_ATHLON ? 2 : 0;
+
+ if (cost >= loadcost)
+ cost -= loadcost;
+ else
+ cost = 0;
+ }
+
+ default:
+ break;
+ }
+
+ return cost;
+}
+
+/* How many alternative schedules to try. This should be as wide as the
+ scheduling freedom in the DFA, but no wider. Making this value too
+ large results extra work for the scheduler. */
+
+static int
+ia32_multipass_dfa_lookahead (void)
+{
+ if (ix86_tune == PROCESSOR_PENTIUM)
+ return 2;
+
+ if (ix86_tune == PROCESSOR_PENTIUMPRO
+ || ix86_tune == PROCESSOR_K6)
+ return 1;
+
+ else
+ return 0;
+}
+
+
+/* Compute the alignment given to a constant that is being placed in memory.
+ EXP is the constant and ALIGN is the alignment that the object would
+ ordinarily have.
+ The value of this function is used instead of that alignment to align
+ the object. */
+
+int
+ix86_constant_alignment (tree exp, int align)
+{
+ if (TREE_CODE (exp) == REAL_CST)
+ {
+ if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
+ return 64;
+ else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
+ return 128;
+ }
+ else if (!optimize_size && TREE_CODE (exp) == STRING_CST
+ && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
+ return BITS_PER_WORD;
+
+/* APPLE LOCAL begin 4090661 */
+#if TARGET_MACHO
+ /* Without this, static arrays initialized to strings get aligned
+ to 32 bytes. These go in cstring, so would result in a lot of extra
+ padding in files with a couple of small strings. 4090661. */
+ else if (TREE_CODE (exp) == STRING_CST)
+ {
+ if (TREE_STRING_LENGTH (exp) >= 31 && !optimize_size)
+ return BITS_PER_WORD;
+ else
+ return 8;
+ }
+#endif
+/* APPLE LOCAL end 4090661 */
+ return align;
+}
+
+/* Compute the alignment for a static variable.
+ TYPE is the data type, and ALIGN is the alignment that
+ the object would ordinarily have. The value of this function is used
+ instead of that alignment to align the object. */
+
+int
+ix86_data_alignment (tree type, int align)
+{
+ int max_align = optimize_size ? BITS_PER_WORD : 256;
+
+ if (AGGREGATE_TYPE_P (type)
+ && TYPE_SIZE (type)
+ && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
+ && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
+ || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
+ && align < max_align)
+ align = max_align;
+
+ /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
+ to 16byte boundary. */
+ if (TARGET_64BIT)
+ {
+ if (AGGREGATE_TYPE_P (type)
+ && TYPE_SIZE (type)
+ && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
+ && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
+ || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
+ return 128;
+ }
+
+ if (TREE_CODE (type) == ARRAY_TYPE)
+ {
+ if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
+ return 64;
+ if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
+ return 128;
+ }
+ else if (TREE_CODE (type) == COMPLEX_TYPE)
+ {
+
+ if (TYPE_MODE (type) == DCmode && align < 64)
+ return 64;
+ if (TYPE_MODE (type) == XCmode && align < 128)
+ return 128;
+ }
+ else if ((TREE_CODE (type) == RECORD_TYPE
+ || TREE_CODE (type) == UNION_TYPE
+ || TREE_CODE (type) == QUAL_UNION_TYPE)
+ && TYPE_FIELDS (type))
+ {
+ if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
+ return 64;
+ if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
+ return 128;
+ }
+ else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
+ || TREE_CODE (type) == INTEGER_TYPE)
+ {
+ if (TYPE_MODE (type) == DFmode && align < 64)
+ return 64;
+ if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
+ return 128;
+ }
+
+ return align;
+}
+
+/* Compute the alignment for a local variable.
+ TYPE is the data type, and ALIGN is the alignment that
+ the object would ordinarily have. The value of this macro is used
+ instead of that alignment to align the object. */
+
+int
+ix86_local_alignment (tree type, int align)
+{
+ /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
+ to 16byte boundary. */
+ if (TARGET_64BIT)
+ {
+ if (AGGREGATE_TYPE_P (type)
+ && TYPE_SIZE (type)
+ && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
+ && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
+ || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
+ return 128;
+ }
+ if (TREE_CODE (type) == ARRAY_TYPE)
+ {
+ if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
+ return 64;
+ if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
+ return 128;
+ }
+ else if (TREE_CODE (type) == COMPLEX_TYPE)
+ {
+ if (TYPE_MODE (type) == DCmode && align < 64)
+ return 64;
+ if (TYPE_MODE (type) == XCmode && align < 128)
+ return 128;
+ }
+ else if ((TREE_CODE (type) == RECORD_TYPE
+ || TREE_CODE (type) == UNION_TYPE
+ || TREE_CODE (type) == QUAL_UNION_TYPE)
+ && TYPE_FIELDS (type))
+ {
+ if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
+ return 64;
+ if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
+ return 128;
+ }
+ else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
+ || TREE_CODE (type) == INTEGER_TYPE)
+ {
+
+ if (TYPE_MODE (type) == DFmode && align < 64)
+ return 64;
+ if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
+ return 128;
+ }
+ return align;
+}
+
+/* Emit RTL insns to initialize the variable parts of a trampoline.
+ FNADDR is an RTX for the address of the function's pure code.
+ CXT is an RTX for the static chain value for the function. */
+void
+x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
+{
+ if (!TARGET_64BIT)
+ {
+ /* Compute offset from the end of the jmp to the target function. */
+ rtx disp = expand_binop (SImode, sub_optab, fnaddr,
+ plus_constant (tramp, 10),
+ NULL_RTX, 1, OPTAB_DIRECT);
+ emit_move_insn (gen_rtx_MEM (QImode, tramp),
+ gen_int_mode (0xb9, QImode));
+ emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
+ emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
+ gen_int_mode (0xe9, QImode));
+ emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
+ }
+ else
+ {
+ int offset = 0;
+ /* Try to load address using shorter movl instead of movabs.
+ We may want to support movq for kernel mode, but kernel does not use
+ trampolines at the moment. */
+ if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
+ {
+ fnaddr = copy_to_mode_reg (DImode, fnaddr);
+ emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
+ gen_int_mode (0xbb41, HImode));
+ emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
+ gen_lowpart (SImode, fnaddr));
+ offset += 6;
+ }
+ else
+ {
+ emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
+ gen_int_mode (0xbb49, HImode));
+ emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
+ fnaddr);
+ offset += 10;
+ }
+ /* Load static chain using movabs to r10. */
+ emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
+ gen_int_mode (0xba49, HImode));
+ emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
+ cxt);
+ offset += 10;
+ /* Jump to the r11 */
+ emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
+ gen_int_mode (0xff49, HImode));
+ emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
+ gen_int_mode (0xe3, QImode));
+ offset += 3;
+ gcc_assert (offset <= TRAMPOLINE_SIZE);
+ }
+
+#ifdef ENABLE_EXECUTE_STACK
+ emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
+ LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
+#endif
+}
+
+/* Codes for all the SSE/MMX builtins. */
+enum ix86_builtins
+{
+ IX86_BUILTIN_ADDPS,
+ IX86_BUILTIN_ADDSS,
+ IX86_BUILTIN_DIVPS,
+ IX86_BUILTIN_DIVSS,
+ IX86_BUILTIN_MULPS,
+ IX86_BUILTIN_MULSS,
+ IX86_BUILTIN_SUBPS,
+ IX86_BUILTIN_SUBSS,
+
+ IX86_BUILTIN_CMPEQPS,
+ IX86_BUILTIN_CMPLTPS,
+ IX86_BUILTIN_CMPLEPS,
+ IX86_BUILTIN_CMPGTPS,
+ IX86_BUILTIN_CMPGEPS,
+ IX86_BUILTIN_CMPNEQPS,
+ IX86_BUILTIN_CMPNLTPS,
+ IX86_BUILTIN_CMPNLEPS,
+ IX86_BUILTIN_CMPNGTPS,
+ IX86_BUILTIN_CMPNGEPS,
+ IX86_BUILTIN_CMPORDPS,
+ IX86_BUILTIN_CMPUNORDPS,
+ IX86_BUILTIN_CMPEQSS,
+ IX86_BUILTIN_CMPLTSS,
+ IX86_BUILTIN_CMPLESS,
+ IX86_BUILTIN_CMPNEQSS,
+ IX86_BUILTIN_CMPNLTSS,
+ IX86_BUILTIN_CMPNLESS,
+ IX86_BUILTIN_CMPNGTSS,
+ IX86_BUILTIN_CMPNGESS,
+ IX86_BUILTIN_CMPORDSS,
+ IX86_BUILTIN_CMPUNORDSS,
+
+ IX86_BUILTIN_COMIEQSS,
+ IX86_BUILTIN_COMILTSS,
+ IX86_BUILTIN_COMILESS,
+ IX86_BUILTIN_COMIGTSS,
+ IX86_BUILTIN_COMIGESS,
+ IX86_BUILTIN_COMINEQSS,
+ IX86_BUILTIN_UCOMIEQSS,
+ IX86_BUILTIN_UCOMILTSS,
+ IX86_BUILTIN_UCOMILESS,
+ IX86_BUILTIN_UCOMIGTSS,
+ IX86_BUILTIN_UCOMIGESS,
+ IX86_BUILTIN_UCOMINEQSS,
+
+ IX86_BUILTIN_CVTPI2PS,
+ IX86_BUILTIN_CVTPS2PI,
+ IX86_BUILTIN_CVTSI2SS,
+ IX86_BUILTIN_CVTSI642SS,
+ IX86_BUILTIN_CVTSS2SI,
+ IX86_BUILTIN_CVTSS2SI64,
+ IX86_BUILTIN_CVTTPS2PI,
+ IX86_BUILTIN_CVTTSS2SI,
+ IX86_BUILTIN_CVTTSS2SI64,
+
+ IX86_BUILTIN_MAXPS,
+ IX86_BUILTIN_MAXSS,
+ IX86_BUILTIN_MINPS,
+ IX86_BUILTIN_MINSS,
+
+ IX86_BUILTIN_LOADUPS,
+ IX86_BUILTIN_STOREUPS,
+ IX86_BUILTIN_MOVSS,
+
+ IX86_BUILTIN_MOVHLPS,
+ IX86_BUILTIN_MOVLHPS,
+ IX86_BUILTIN_LOADHPS,
+ IX86_BUILTIN_LOADLPS,
+ IX86_BUILTIN_STOREHPS,
+ IX86_BUILTIN_STORELPS,
+
+ IX86_BUILTIN_MASKMOVQ,
+ IX86_BUILTIN_MOVMSKPS,
+ IX86_BUILTIN_PMOVMSKB,
+
+ IX86_BUILTIN_MOVNTPS,
+ IX86_BUILTIN_MOVNTQ,
+
+ IX86_BUILTIN_LOADDQU,
+ IX86_BUILTIN_STOREDQU,
+
+ IX86_BUILTIN_PACKSSWB,
+ IX86_BUILTIN_PACKSSDW,
+ IX86_BUILTIN_PACKUSWB,
+
+ IX86_BUILTIN_PADDB,
+ IX86_BUILTIN_PADDW,
+ IX86_BUILTIN_PADDD,
+ IX86_BUILTIN_PADDQ,
+ IX86_BUILTIN_PADDSB,
+ IX86_BUILTIN_PADDSW,
+ IX86_BUILTIN_PADDUSB,
+ IX86_BUILTIN_PADDUSW,
+ IX86_BUILTIN_PSUBB,
+ IX86_BUILTIN_PSUBW,
+ IX86_BUILTIN_PSUBD,
+ IX86_BUILTIN_PSUBQ,
+ IX86_BUILTIN_PSUBSB,
+ IX86_BUILTIN_PSUBSW,
+ IX86_BUILTIN_PSUBUSB,
+ IX86_BUILTIN_PSUBUSW,
+
+ IX86_BUILTIN_PAND,
+ IX86_BUILTIN_PANDN,
+ IX86_BUILTIN_POR,
+ IX86_BUILTIN_PXOR,
+
+ IX86_BUILTIN_PAVGB,
+ IX86_BUILTIN_PAVGW,
+
+ IX86_BUILTIN_PCMPEQB,
+ IX86_BUILTIN_PCMPEQW,
+ IX86_BUILTIN_PCMPEQD,
+ IX86_BUILTIN_PCMPGTB,
+ IX86_BUILTIN_PCMPGTW,
+ IX86_BUILTIN_PCMPGTD,
+
+ IX86_BUILTIN_PMADDWD,
+
+ IX86_BUILTIN_PMAXSW,
+ IX86_BUILTIN_PMAXUB,
+ IX86_BUILTIN_PMINSW,
+ IX86_BUILTIN_PMINUB,
+
+ IX86_BUILTIN_PMULHUW,
+ IX86_BUILTIN_PMULHW,
+ IX86_BUILTIN_PMULLW,
+
+ IX86_BUILTIN_PSADBW,
+ IX86_BUILTIN_PSHUFW,
+
+ IX86_BUILTIN_PSLLW,
+ IX86_BUILTIN_PSLLD,
+ IX86_BUILTIN_PSLLQ,
+ IX86_BUILTIN_PSRAW,
+ IX86_BUILTIN_PSRAD,
+ IX86_BUILTIN_PSRLW,
+ IX86_BUILTIN_PSRLD,
+ IX86_BUILTIN_PSRLQ,
+ IX86_BUILTIN_PSLLWI,
+ IX86_BUILTIN_PSLLDI,
+ IX86_BUILTIN_PSLLQI,
+ IX86_BUILTIN_PSRAWI,
+ IX86_BUILTIN_PSRADI,
+ IX86_BUILTIN_PSRLWI,
+ IX86_BUILTIN_PSRLDI,
+ IX86_BUILTIN_PSRLQI,
+
+ IX86_BUILTIN_PUNPCKHBW,
+ IX86_BUILTIN_PUNPCKHWD,
+ IX86_BUILTIN_PUNPCKHDQ,
+ IX86_BUILTIN_PUNPCKLBW,
+ IX86_BUILTIN_PUNPCKLWD,
+ IX86_BUILTIN_PUNPCKLDQ,
+
+ IX86_BUILTIN_SHUFPS,
+
+ IX86_BUILTIN_RCPPS,
+ IX86_BUILTIN_RCPSS,
+ IX86_BUILTIN_RSQRTPS,
+ IX86_BUILTIN_RSQRTSS,
+ IX86_BUILTIN_SQRTPS,
+ IX86_BUILTIN_SQRTSS,
+
+ IX86_BUILTIN_UNPCKHPS,
+ IX86_BUILTIN_UNPCKLPS,
+
+ IX86_BUILTIN_ANDPS,
+ IX86_BUILTIN_ANDNPS,
+ IX86_BUILTIN_ORPS,
+ IX86_BUILTIN_XORPS,
+
+ IX86_BUILTIN_EMMS,
+ IX86_BUILTIN_LDMXCSR,
+ IX86_BUILTIN_STMXCSR,
+ IX86_BUILTIN_SFENCE,
+
+ /* 3DNow! Original */
+ IX86_BUILTIN_FEMMS,
+ IX86_BUILTIN_PAVGUSB,
+ IX86_BUILTIN_PF2ID,
+ IX86_BUILTIN_PFACC,
+ IX86_BUILTIN_PFADD,
+ IX86_BUILTIN_PFCMPEQ,
+ IX86_BUILTIN_PFCMPGE,
+ IX86_BUILTIN_PFCMPGT,
+ IX86_BUILTIN_PFMAX,
+ IX86_BUILTIN_PFMIN,
+ IX86_BUILTIN_PFMUL,
+ IX86_BUILTIN_PFRCP,
+ IX86_BUILTIN_PFRCPIT1,
+ IX86_BUILTIN_PFRCPIT2,
+ IX86_BUILTIN_PFRSQIT1,
+ IX86_BUILTIN_PFRSQRT,
+ IX86_BUILTIN_PFSUB,
+ IX86_BUILTIN_PFSUBR,
+ IX86_BUILTIN_PI2FD,
+ IX86_BUILTIN_PMULHRW,
+
+ /* 3DNow! Athlon Extensions */
+ IX86_BUILTIN_PF2IW,
+ IX86_BUILTIN_PFNACC,
+ IX86_BUILTIN_PFPNACC,
+ IX86_BUILTIN_PI2FW,
+ IX86_BUILTIN_PSWAPDSI,
+ IX86_BUILTIN_PSWAPDSF,
+
+ /* SSE2 */
+ IX86_BUILTIN_ADDPD,
+ IX86_BUILTIN_ADDSD,
+ IX86_BUILTIN_DIVPD,
+ IX86_BUILTIN_DIVSD,
+ IX86_BUILTIN_MULPD,
+ IX86_BUILTIN_MULSD,
+ IX86_BUILTIN_SUBPD,
+ IX86_BUILTIN_SUBSD,
+
+ IX86_BUILTIN_CMPEQPD,
+ IX86_BUILTIN_CMPLTPD,
+ IX86_BUILTIN_CMPLEPD,
+ IX86_BUILTIN_CMPGTPD,
+ IX86_BUILTIN_CMPGEPD,
+ IX86_BUILTIN_CMPNEQPD,
+ IX86_BUILTIN_CMPNLTPD,
+ IX86_BUILTIN_CMPNLEPD,
+ IX86_BUILTIN_CMPNGTPD,
+ IX86_BUILTIN_CMPNGEPD,
+ IX86_BUILTIN_CMPORDPD,
+ IX86_BUILTIN_CMPUNORDPD,
+ IX86_BUILTIN_CMPNEPD,
+ IX86_BUILTIN_CMPEQSD,
+ IX86_BUILTIN_CMPLTSD,
+ IX86_BUILTIN_CMPLESD,
+ IX86_BUILTIN_CMPNEQSD,
+ IX86_BUILTIN_CMPNLTSD,
+ IX86_BUILTIN_CMPNLESD,
+ IX86_BUILTIN_CMPORDSD,
+ IX86_BUILTIN_CMPUNORDSD,
+ IX86_BUILTIN_CMPNESD,
+
+ IX86_BUILTIN_COMIEQSD,
+ IX86_BUILTIN_COMILTSD,
+ IX86_BUILTIN_COMILESD,
+ IX86_BUILTIN_COMIGTSD,
+ IX86_BUILTIN_COMIGESD,
+ IX86_BUILTIN_COMINEQSD,
+ IX86_BUILTIN_UCOMIEQSD,
+ IX86_BUILTIN_UCOMILTSD,
+ IX86_BUILTIN_UCOMILESD,
+ IX86_BUILTIN_UCOMIGTSD,
+ IX86_BUILTIN_UCOMIGESD,
+ IX86_BUILTIN_UCOMINEQSD,
+
+ IX86_BUILTIN_MAXPD,
+ IX86_BUILTIN_MAXSD,
+ IX86_BUILTIN_MINPD,
+ IX86_BUILTIN_MINSD,
+
+ IX86_BUILTIN_ANDPD,
+ IX86_BUILTIN_ANDNPD,
+ IX86_BUILTIN_ORPD,
+ IX86_BUILTIN_XORPD,
+
+ IX86_BUILTIN_SQRTPD,
+ IX86_BUILTIN_SQRTSD,
+
+ IX86_BUILTIN_UNPCKHPD,
+ IX86_BUILTIN_UNPCKLPD,
+
+ IX86_BUILTIN_SHUFPD,
+
+ IX86_BUILTIN_LOADUPD,
+ IX86_BUILTIN_STOREUPD,
+ IX86_BUILTIN_MOVSD,
+
+ IX86_BUILTIN_LOADHPD,
+ IX86_BUILTIN_LOADLPD,
+
+ IX86_BUILTIN_CVTDQ2PD,
+ IX86_BUILTIN_CVTDQ2PS,
+
+ IX86_BUILTIN_CVTPD2DQ,
+ IX86_BUILTIN_CVTPD2PI,
+ IX86_BUILTIN_CVTPD2PS,
+ IX86_BUILTIN_CVTTPD2DQ,
+ IX86_BUILTIN_CVTTPD2PI,
+
+ IX86_BUILTIN_CVTPI2PD,
+ IX86_BUILTIN_CVTSI2SD,
+ IX86_BUILTIN_CVTSI642SD,
+
+ IX86_BUILTIN_CVTSD2SI,
+ IX86_BUILTIN_CVTSD2SI64,
+ IX86_BUILTIN_CVTSD2SS,
+ IX86_BUILTIN_CVTSS2SD,
+ IX86_BUILTIN_CVTTSD2SI,
+ IX86_BUILTIN_CVTTSD2SI64,
+
+ IX86_BUILTIN_CVTPS2DQ,
+ IX86_BUILTIN_CVTPS2PD,
+ IX86_BUILTIN_CVTTPS2DQ,
+
+ IX86_BUILTIN_MOVNTI,
+ IX86_BUILTIN_MOVNTPD,
+ IX86_BUILTIN_MOVNTDQ,
+
+ /* SSE2 MMX */
+ IX86_BUILTIN_MASKMOVDQU,
+ IX86_BUILTIN_MOVMSKPD,
+ IX86_BUILTIN_PMOVMSKB128,
+
+ /* APPLE LOCAL begin 4099020 */
+ IX86_BUILTIN_MOVQ,
+ IX86_BUILTIN_LOADQ,
+ IX86_BUILTIN_STOREQ,
+ /* APPLE LOCAL end 4099020 */
+
+ IX86_BUILTIN_PACKSSWB128,
+ IX86_BUILTIN_PACKSSDW128,
+ IX86_BUILTIN_PACKUSWB128,
+
+ IX86_BUILTIN_PADDB128,
+ IX86_BUILTIN_PADDW128,
+ IX86_BUILTIN_PADDD128,
+ IX86_BUILTIN_PADDQ128,
+ IX86_BUILTIN_PADDSB128,
+ IX86_BUILTIN_PADDSW128,
+ IX86_BUILTIN_PADDUSB128,
+ IX86_BUILTIN_PADDUSW128,
+ IX86_BUILTIN_PSUBB128,
+ IX86_BUILTIN_PSUBW128,
+ IX86_BUILTIN_PSUBD128,
+ IX86_BUILTIN_PSUBQ128,
+ IX86_BUILTIN_PSUBSB128,
+ IX86_BUILTIN_PSUBSW128,
+ IX86_BUILTIN_PSUBUSB128,
+ IX86_BUILTIN_PSUBUSW128,
+
+ IX86_BUILTIN_PAND128,
+ IX86_BUILTIN_PANDN128,
+ IX86_BUILTIN_POR128,
+ IX86_BUILTIN_PXOR128,
+
+ IX86_BUILTIN_PAVGB128,
+ IX86_BUILTIN_PAVGW128,
+
+ IX86_BUILTIN_PCMPEQB128,
+ IX86_BUILTIN_PCMPEQW128,
+ IX86_BUILTIN_PCMPEQD128,
+ IX86_BUILTIN_PCMPGTB128,
+ IX86_BUILTIN_PCMPGTW128,
+ IX86_BUILTIN_PCMPGTD128,
+
+ IX86_BUILTIN_PMADDWD128,
+
+ IX86_BUILTIN_PMAXSW128,
+ IX86_BUILTIN_PMAXUB128,
+ IX86_BUILTIN_PMINSW128,
+ IX86_BUILTIN_PMINUB128,
+
+ IX86_BUILTIN_PMULUDQ,
+ IX86_BUILTIN_PMULUDQ128,
+ IX86_BUILTIN_PMULHUW128,
+ IX86_BUILTIN_PMULHW128,
+ IX86_BUILTIN_PMULLW128,
+
+ IX86_BUILTIN_PSADBW128,
+ IX86_BUILTIN_PSHUFHW,
+ IX86_BUILTIN_PSHUFLW,
+ IX86_BUILTIN_PSHUFD,
+
+ IX86_BUILTIN_PSLLW128,
+ IX86_BUILTIN_PSLLD128,
+ IX86_BUILTIN_PSLLQ128,
+ IX86_BUILTIN_PSRAW128,
+ IX86_BUILTIN_PSRAD128,
+ IX86_BUILTIN_PSRLW128,
+ IX86_BUILTIN_PSRLD128,
+ IX86_BUILTIN_PSRLQ128,
+ IX86_BUILTIN_PSLLDQI128,
+ /* APPLE LOCAL 591583 */
+ IX86_BUILTIN_PSLLDQI128_BYTESHIFT,
+ IX86_BUILTIN_PSLLWI128,
+ IX86_BUILTIN_PSLLDI128,
+ IX86_BUILTIN_PSLLQI128,
+ IX86_BUILTIN_PSRAWI128,
+ IX86_BUILTIN_PSRADI128,
+ IX86_BUILTIN_PSRLDQI128,
+ /* APPLE LOCAL 591583 */
+ IX86_BUILTIN_PSRLDQI128_BYTESHIFT,
+ IX86_BUILTIN_PSRLWI128,
+ IX86_BUILTIN_PSRLDI128,
+ IX86_BUILTIN_PSRLQI128,
+
+ IX86_BUILTIN_PUNPCKHBW128,
+ IX86_BUILTIN_PUNPCKHWD128,
+ IX86_BUILTIN_PUNPCKHDQ128,
+ IX86_BUILTIN_PUNPCKHQDQ128,
+ IX86_BUILTIN_PUNPCKLBW128,
+ IX86_BUILTIN_PUNPCKLWD128,
+ IX86_BUILTIN_PUNPCKLDQ128,
+ IX86_BUILTIN_PUNPCKLQDQ128,
+
+ IX86_BUILTIN_CLFLUSH,
+ IX86_BUILTIN_MFENCE,
+ IX86_BUILTIN_LFENCE,
+
+ /* Prescott New Instructions. */
+ IX86_BUILTIN_ADDSUBPS,
+ IX86_BUILTIN_HADDPS,
+ IX86_BUILTIN_HSUBPS,
+ IX86_BUILTIN_MOVSHDUP,
+ IX86_BUILTIN_MOVSLDUP,
+ IX86_BUILTIN_ADDSUBPD,
+ IX86_BUILTIN_HADDPD,
+ IX86_BUILTIN_HSUBPD,
+ IX86_BUILTIN_LDDQU,
+
+ IX86_BUILTIN_MONITOR,
+ IX86_BUILTIN_MWAIT,
+ /* APPLE LOCAL begin mainline */
+ /* Merom New Instructions. */
+ IX86_BUILTIN_PHADDW,
+ IX86_BUILTIN_PHADDD,
+ IX86_BUILTIN_PHADDSW,
+ IX86_BUILTIN_PHSUBW,
+ IX86_BUILTIN_PHSUBD,
+ IX86_BUILTIN_PHSUBSW,
+ IX86_BUILTIN_PMADDUBSW,
+ IX86_BUILTIN_PMULHRSW,
+ IX86_BUILTIN_PSHUFB,
+ IX86_BUILTIN_PSIGNB,
+ IX86_BUILTIN_PSIGNW,
+ IX86_BUILTIN_PSIGND,
+ IX86_BUILTIN_PALIGNR,
+ IX86_BUILTIN_PABSB,
+ IX86_BUILTIN_PABSW,
+ IX86_BUILTIN_PABSD,
+
+ IX86_BUILTIN_PHADDW128,
+ IX86_BUILTIN_PHADDD128,
+ IX86_BUILTIN_PHADDSW128,
+ IX86_BUILTIN_PHSUBW128,
+ IX86_BUILTIN_PHSUBD128,
+ IX86_BUILTIN_PHSUBSW128,
+ IX86_BUILTIN_PMADDUBSW128,
+ IX86_BUILTIN_PMULHRSW128,
+ IX86_BUILTIN_PSHUFB128,
+ IX86_BUILTIN_PSIGNB128,
+ IX86_BUILTIN_PSIGNW128,
+ IX86_BUILTIN_PSIGND128,
+ IX86_BUILTIN_PALIGNR128,
+ IX86_BUILTIN_PABSB128,
+ IX86_BUILTIN_PABSW128,
+ IX86_BUILTIN_PABSD128,
+ /* APPLE LOCAL begin 5612787 mainline sse4 */
+ /* AMDFAM10 - SSE4A New Instructions. */
+ IX86_BUILTIN_MOVNTSD,
+ IX86_BUILTIN_MOVNTSS,
+ IX86_BUILTIN_EXTRQI,
+ IX86_BUILTIN_EXTRQ,
+ IX86_BUILTIN_INSERTQI,
+ IX86_BUILTIN_INSERTQ,
+
+ /* SSE4.1. */
+ IX86_BUILTIN_BLENDPD,
+ IX86_BUILTIN_BLENDPS,
+ IX86_BUILTIN_BLENDVPD,
+ IX86_BUILTIN_BLENDVPS,
+ IX86_BUILTIN_PBLENDVB128,
+ IX86_BUILTIN_PBLENDW128,
+
+ IX86_BUILTIN_DPPD,
+ IX86_BUILTIN_DPPS,
+
+ IX86_BUILTIN_INSERTPS128,
+
+ IX86_BUILTIN_MOVNTDQA,
+ IX86_BUILTIN_MPSADBW128,
+ IX86_BUILTIN_PACKUSDW128,
+ IX86_BUILTIN_PCMPEQQ,
+ IX86_BUILTIN_PHMINPOSUW128,
+
+ IX86_BUILTIN_PMAXSB128,
+ IX86_BUILTIN_PMAXSD128,
+ IX86_BUILTIN_PMAXUD128,
+ IX86_BUILTIN_PMAXUW128,
+
+ IX86_BUILTIN_PMINSB128,
+ IX86_BUILTIN_PMINSD128,
+ IX86_BUILTIN_PMINUD128,
+ IX86_BUILTIN_PMINUW128,
+
+ IX86_BUILTIN_PMOVSXBW128,
+ IX86_BUILTIN_PMOVSXBD128,
+ IX86_BUILTIN_PMOVSXBQ128,
+ IX86_BUILTIN_PMOVSXWD128,
+ IX86_BUILTIN_PMOVSXWQ128,
+ IX86_BUILTIN_PMOVSXDQ128,
+
+ IX86_BUILTIN_PMOVZXBW128,
+ IX86_BUILTIN_PMOVZXBD128,
+ IX86_BUILTIN_PMOVZXBQ128,
+ IX86_BUILTIN_PMOVZXWD128,
+ IX86_BUILTIN_PMOVZXWQ128,
+ IX86_BUILTIN_PMOVZXDQ128,
+
+ IX86_BUILTIN_PMULDQ128,
+ IX86_BUILTIN_PMULLD128,
+
+ IX86_BUILTIN_ROUNDPD,
+ IX86_BUILTIN_ROUNDPS,
+ IX86_BUILTIN_ROUNDSD,
+ IX86_BUILTIN_ROUNDSS,
+
+ IX86_BUILTIN_PTESTZ,
+ IX86_BUILTIN_PTESTC,
+ IX86_BUILTIN_PTESTNZC,
+ /* APPLE LOCAL end 5612787 mainline sse4 */
+ /* APPLE LOCAL end mainline */
+ IX86_BUILTIN_VEC_INIT_V2SI,
+ IX86_BUILTIN_VEC_INIT_V4HI,
+ IX86_BUILTIN_VEC_INIT_V8QI,
+ IX86_BUILTIN_VEC_EXT_V2DF,
+ IX86_BUILTIN_VEC_EXT_V2DI,
+ IX86_BUILTIN_VEC_EXT_V4SF,
+ IX86_BUILTIN_VEC_EXT_V4SI,
+ IX86_BUILTIN_VEC_EXT_V8HI,
+ /* APPLE LOCAL begin 5612787 mainline sse4 */
+ /* deletion */
+ /* APPLE LOCAL end 5612787 mainline sse4 */
+ IX86_BUILTIN_VEC_EXT_V2SI,
+ IX86_BUILTIN_VEC_EXT_V4HI,
+ /* APPLE LOCAL begin 5612787 mainline sse4 */
+ IX86_BUILTIN_VEC_EXT_V16QI,
+ IX86_BUILTIN_VEC_SET_V2DI,
+ IX86_BUILTIN_VEC_SET_V4SF,
+ IX86_BUILTIN_VEC_SET_V4SI,
+ /* APPLE LOCAL end 5612787 mainline sse4 */
+ IX86_BUILTIN_VEC_SET_V8HI,
+ IX86_BUILTIN_VEC_SET_V4HI,
+ /* APPLE LOCAL begin 5612787 mainline sse4 */
+ IX86_BUILTIN_VEC_SET_V16QI,
+
+ IX86_BUILTIN_VEC_PACK_SFIX,
+
+ /* SSE4.2. */
+ IX86_BUILTIN_CRC32QI,
+ IX86_BUILTIN_CRC32HI,
+ IX86_BUILTIN_CRC32SI,
+ IX86_BUILTIN_CRC32DI,
+
+ IX86_BUILTIN_PCMPESTRI128,
+ IX86_BUILTIN_PCMPESTRM128,
+ IX86_BUILTIN_PCMPESTRA128,
+ IX86_BUILTIN_PCMPESTRC128,
+ IX86_BUILTIN_PCMPESTRO128,
+ IX86_BUILTIN_PCMPESTRS128,
+ IX86_BUILTIN_PCMPESTRZ128,
+ IX86_BUILTIN_PCMPISTRI128,
+ IX86_BUILTIN_PCMPISTRM128,
+ IX86_BUILTIN_PCMPISTRA128,
+ IX86_BUILTIN_PCMPISTRC128,
+ IX86_BUILTIN_PCMPISTRO128,
+ IX86_BUILTIN_PCMPISTRS128,
+ IX86_BUILTIN_PCMPISTRZ128,
+
+ IX86_BUILTIN_PCMPGTQ,
+
+ /* TFmode support builtins. */
+ IX86_BUILTIN_INFQ,
+ IX86_BUILTIN_FABSQ,
+ IX86_BUILTIN_COPYSIGNQ,
+ /* APPLE LOCAL end 5612787 mainline sse4 */
+
+ IX86_BUILTIN_MAX
+};
+
+#define def_builtin(MASK, NAME, TYPE, CODE) \
+do { \
+ if ((MASK) & target_flags \
+ && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
+ lang_hooks.builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
+ NULL, NULL_TREE); \
+} while (0)
+
+/* APPLE LOCAL begin 5612787 mainline sse4 */
+/* Like def_builtin, but also marks the function decl "const". */
+
+static inline tree
+def_builtin_const (int mask, const char *name, tree type,
+ enum ix86_builtins code)
+{
+ tree decl = NULL_TREE;
+ if ((mask) & target_flags
+ && (!((mask) & MASK_64BIT) || TARGET_64BIT))
+ decl = lang_hooks.builtin_function (name, type, code, BUILT_IN_MD,
+ NULL, NULL_TREE);
+
+ if (decl)
+ TREE_READONLY (decl) = 1;
+ return decl;
+}
+/* APPLE LOCAL end 5612787 mainline sse4 */
+
+/* Bits for builtin_description.flag. */
+
+/* Set when we don't support the comparison natively, and should
+ swap_comparison in order to support it. */
+#define BUILTIN_DESC_SWAP_OPERANDS 1
+
+struct builtin_description
+{
+ const unsigned int mask;
+ const enum insn_code icode;
+ const char *const name;
+ const enum ix86_builtins code;
+ const enum rtx_code comparison;
+ const unsigned int flag;
+};
+
+/* APPLE LOCAL begin 4299257 */
+static const struct builtin_description bdesc_comi[] =
+{
+ { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
+ { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
+ { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
+ { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
+ { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
+ { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
+};
+static const struct builtin_description bdesc_ucomi[] =
+{
+ { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, EQ, 0 },
+ { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, LT, 0 },
+ { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, LE, 0 },
+ { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
+ { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
+ { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, EQ, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, LT, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, LE, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
+};
+/* APPLE LOCAL end 4299257 */
+
+/* APPLE LOCAL begin 5612787 mainline sse4 */
+static const struct builtin_description bdesc_ptest[] =
+{
+ /* SSE4.1 */
+ { MASK_SSE4_1, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, 0 },
+ { MASK_SSE4_1, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, 0 },
+ { MASK_SSE4_1, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, 0 },
+};
+
+static const struct builtin_description bdesc_pcmpestr[] =
+{
+ /* SSE4.2 */
+ { MASK_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
+ { MASK_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
+ { MASK_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
+ { MASK_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
+ { MASK_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
+ { MASK_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
+ { MASK_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
+};
+
+static const struct builtin_description bdesc_pcmpistr[] =
+{
+ /* SSE4.2 */
+ { MASK_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
+ { MASK_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
+ { MASK_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
+ { MASK_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
+ { MASK_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
+ { MASK_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
+ { MASK_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
+};
+
+static const struct builtin_description bdesc_crc32[] =
+{
+ /* SSE4.2 */
+ { MASK_SSE4_2 | MASK_64BIT, CODE_FOR_sse4_2_crc32qi, 0, IX86_BUILTIN_CRC32QI, UNKNOWN, 0 },
+ { MASK_SSE4_2, CODE_FOR_sse4_2_crc32hi, 0, IX86_BUILTIN_CRC32HI, UNKNOWN, 0 },
+ { MASK_SSE4_2, CODE_FOR_sse4_2_crc32si, 0, IX86_BUILTIN_CRC32SI, UNKNOWN, 0 },
+ { MASK_SSE4_2, CODE_FOR_sse4_2_crc32di, 0, IX86_BUILTIN_CRC32DI, UNKNOWN, 0 },
+};
+
+/* SSE builtins with 3 arguments and the last argument must be an immediate or xmm0. */
+static const struct builtin_description bdesc_sse_3arg[] =
+{
+ /* SSE4.1 */
+ { MASK_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, 0 },
+ { MASK_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, 0 },
+ { MASK_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, 0 },
+ { MASK_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, 0 },
+ { MASK_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, 0 },
+ { MASK_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, 0 },
+ { MASK_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, 0 },
+ { MASK_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, 0 },
+ { MASK_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, 0 },
+ { MASK_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, 0 },
+ { MASK_SSE4_1, CODE_FOR_sse4_1_roundsd, 0, IX86_BUILTIN_ROUNDSD, UNKNOWN, 0 },
+ { MASK_SSE4_1, CODE_FOR_sse4_1_roundss, 0, IX86_BUILTIN_ROUNDSS, UNKNOWN, 0 },
+};
+/* APPLE LOCAL end 5612787 mainline sse4 */
+
+static const struct builtin_description bdesc_2arg[] =
+{
+ /* SSE */
+ { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
+ { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
+ { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
+ { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
+ { MASK_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
+ { MASK_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
+ { MASK_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
+ { MASK_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
+
+ { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
+ { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
+ { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
+ { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
+ BUILTIN_DESC_SWAP_OPERANDS },
+ { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
+ BUILTIN_DESC_SWAP_OPERANDS },
+ { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
+ { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
+ { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
+ { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
+ { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE,
+ BUILTIN_DESC_SWAP_OPERANDS },
+ { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT,
+ BUILTIN_DESC_SWAP_OPERANDS },
+ { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
+ { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
+ { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
+ { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
+ { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
+ { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
+ { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
+ { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
+ { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE,
+ BUILTIN_DESC_SWAP_OPERANDS },
+ { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT,
+ BUILTIN_DESC_SWAP_OPERANDS },
+ { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, 0 },
+
+ { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
+ { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
+ { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
+ { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
+
+ { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
+ { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
+ { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
+ { MASK_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
+
+ { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
+ { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
+ { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
+ { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
+ { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
+
+ /* MMX */
+ { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
+ /* APPLE LOCAL begin 4656532 use V1DImode for _m64 */
+ { MASK_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
+ { MASK_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
+
+ { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
+
+ { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
+ { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
+
+ { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
+
+ { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
+ { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
+
+ { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
+
+ { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
+ { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
+ { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
+ { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
+
+ { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
+
+ /* Special. */
+ { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
+
+ { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
+ { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
+ { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
+
+ { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_ashlv4hi2si, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_ashlv2si2si, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_ashlv1di3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_ashlv1di2si, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
+
+ { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_lshrv4hi2si, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_lshrv2si2si, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_lshrv1di3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_lshrv1di2si, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
+
+ { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_ashrv4hi2si, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_ashrv2si2si, 0, IX86_BUILTIN_PSRADI, 0, 0 },
+ /* APPLE LOCAL end 4656532 use V1DImode for _m64 */
+
+ { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
+
+ /* SSE2 */
+ { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
+ { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
+ { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
+ { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
+
+ { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
+ BUILTIN_DESC_SWAP_OPERANDS },
+ { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
+ BUILTIN_DESC_SWAP_OPERANDS },
+ { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE,
+ BUILTIN_DESC_SWAP_OPERANDS },
+ { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT,
+ BUILTIN_DESC_SWAP_OPERANDS },
+ { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
+
+ { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
+ { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
+
+ { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
+ { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
+ { MASK_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
+
+ { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
+
+ /* SSE2 MMX */
+ { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
+ { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
+ { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
+ { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
+ { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
+ { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
+ { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
+ { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
+
+ { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
+ { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
+ { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
+ { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
+ { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
+ { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
+ { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
+ { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
+
+ { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
+ /* APPLE LOCAL 5612787 mainline sse4 */
+ { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
+
+ { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
+ { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
+ { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
+
+ { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
+
+ { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
+
+ { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
+ { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
+ { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
+ { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
+
+ { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
+
+ { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
+
+ /* APPLE LOCAL 5612787 mainline sse4 */
+ { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
+
+ { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
+
+ { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
+ { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
+ { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
+
+ { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
+ { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
+ { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
+
+ { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
+ { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
+
+ { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
+
+ { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
+ { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
+
+ /* SSE3 MMX */
+ { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
+ { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
+ { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
+ { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
+ { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
+ /* APPLE LOCAL begin mainline */
+ { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 },
+
+ /* SSSE3 MMX */
+ { MASK_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, 0, 0 },
+ { MASK_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, 0, 0 },
+ { MASK_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, 0, 0 },
+ { MASK_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, 0, 0 },
+ { MASK_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, 0, 0 },
+ { MASK_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, 0, 0 },
+ { MASK_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, 0, 0 },
+ { MASK_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, 0, 0 },
+ { MASK_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, 0, 0 },
+ { MASK_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, 0, 0 },
+ { MASK_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, 0, 0 },
+ { MASK_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, 0, 0 },
+ { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv8hi3, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, 0, 0 },
+ { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv4hi3, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, 0, 0 },
+ { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, 0, 0 },
+ { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, 0, 0 },
+ { MASK_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, 0, 0 },
+ { MASK_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, 0, 0 },
+ { MASK_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, 0, 0 },
+ { MASK_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, 0, 0 },
+ { MASK_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, 0, 0 },
+ { MASK_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, 0, 0 },
+ { MASK_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, 0, 0 },
+ /* APPLE LOCAL 5612787 mainline sse4 */
+ { MASK_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, 0, 0 },
+ /* APPLE LOCAL end mainline */
+ /* APPLE LOCAL begin 5612787 mainline sse4 */
+ /* SSE4.1 */
+ { MASK_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, 0 },
+ { MASK_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, 0 },
+ { MASK_SSE4_1, CODE_FOR_sse4_1_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, 0 },
+ { MASK_SSE4_1, CODE_FOR_sse4_1_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, 0 },
+ { MASK_SSE4_1, CODE_FOR_sse4_1_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, 0 },
+ { MASK_SSE4_1, CODE_FOR_sse4_1_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, 0 },
+ { MASK_SSE4_1, CODE_FOR_sse4_1_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, 0 },
+ { MASK_SSE4_1, CODE_FOR_sse4_1_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, 0 },
+ { MASK_SSE4_1, CODE_FOR_sse4_1_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, 0 },
+ { MASK_SSE4_1, CODE_FOR_sse4_1_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, 0 },
+ { MASK_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, 0, IX86_BUILTIN_PMULDQ128, UNKNOWN, 0 },
+ { MASK_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, 0 },
+
+ /* SSE4.2 */
+ { MASK_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, 0 },
+ /* APPLE LOCAL end 5612787 mainline sse4 */
+};
+
+static const struct builtin_description bdesc_1arg[] =
+{
+ { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
+ { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
+
+ { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
+ { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
+ { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
+
+ { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
+ { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
+ { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
+ { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
+ { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
+ { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
+
+ { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
+
+ { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
+
+ { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
+
+ { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
+
+ { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
+
+ { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
+ { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
+ { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
+
+ { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
+
+ /* SSE3 */
+ { MASK_SSE3, CODE_FOR_sse3_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
+ { MASK_SSE3, CODE_FOR_sse3_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
+ /* APPLE LOCAL begin mainline */
+
+ /* SSSE3 */
+ { MASK_SSSE3, CODE_FOR_ssse3_pabsv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, 0, 0 },
+ { MASK_SSSE3, CODE_FOR_ssse3_pabsv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, 0, 0 },
+ { MASK_SSSE3, CODE_FOR_ssse3_pabsv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, 0, 0 },
+ { MASK_SSSE3, CODE_FOR_ssse3_pabsv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, 0, 0 },
+ { MASK_SSSE3, CODE_FOR_ssse3_pabsv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, 0, 0 },
+ /* APPLE LOCAL 5612787 mainline sse4 */
+ { MASK_SSSE3, CODE_FOR_ssse3_pabsv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, 0, 0 },
+ /* APPLE LOCAL end mainline */
+ /* APPLE LOCAL begin 5612787 mainline sse4 */
+ /* SSE4.1 */
+ { MASK_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, 0, IX86_BUILTIN_PMOVSXBW128, UNKNOWN, 0 },
+ { MASK_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, 0, IX86_BUILTIN_PMOVSXBD128, UNKNOWN, 0 },
+ { MASK_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, 0, IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, 0 },
+ { MASK_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, 0, IX86_BUILTIN_PMOVSXWD128, UNKNOWN, 0 },
+ { MASK_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, 0, IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, 0 },
+ { MASK_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, 0, IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, 0 },
+ { MASK_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, 0, IX86_BUILTIN_PMOVZXBW128, UNKNOWN, 0 },
+ { MASK_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, 0, IX86_BUILTIN_PMOVZXBD128, UNKNOWN, 0 },
+ { MASK_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, 0, IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, 0 },
+ { MASK_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, 0, IX86_BUILTIN_PMOVZXWD128, UNKNOWN, 0 },
+ { MASK_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, 0, IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, 0 },
+ { MASK_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, 0, IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, 0 },
+ { MASK_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, 0 },
+
+ /* Fake 1 arg builtins with a constant smaller than 8 bits as the 2nd arg. */
+ { MASK_SSE4_1, CODE_FOR_sse4_1_roundpd, 0, IX86_BUILTIN_ROUNDPD, UNKNOWN, 0 },
+ { MASK_SSE4_1, CODE_FOR_sse4_1_roundps, 0, IX86_BUILTIN_ROUNDPS, UNKNOWN, 0 },
+ /* APPLE LOCAL end 5612787 mainline sse4 */
+};
+
+static void
+ix86_init_builtins (void)
+{
+ if (TARGET_MMX)
+ ix86_init_mmx_sse_builtins ();
+
+ /* APPLE LOCAL begin constant cfstrings */
+#ifdef SUBTARGET_INIT_BUILTINS
+ SUBTARGET_INIT_BUILTINS;
+#endif
+ /* APPLE LOCAL end constant cfstrings */
+}
+
+/* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
+ is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
+ builtins. */
+static void
+ix86_init_mmx_sse_builtins (void)
+{
+ const struct builtin_description * d;
+ size_t i;
+
+ tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode);
+ tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
+ tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
+ tree V2DI_type_node
+ = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
+ tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
+ tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
+ tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
+ tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
+ tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
+ tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
+ /* APPLE LOCAL 4656532 use V1DImode for _m64 */
+ tree V1DI_type_node = build_vector_type_for_mode (long_long_integer_type_node, V1DImode);
+
+ tree pchar_type_node = build_pointer_type (char_type_node);
+ tree pcchar_type_node = build_pointer_type (
+ build_type_variant (char_type_node, 1, 0));
+ tree pfloat_type_node = build_pointer_type (float_type_node);
+ tree pcfloat_type_node = build_pointer_type (
+ build_type_variant (float_type_node, 1, 0));
+ tree pv2si_type_node = build_pointer_type (V2SI_type_node);
+ tree pv2di_type_node = build_pointer_type (V2DI_type_node);
+ /* APPLE LOCAL 4656532 use V1DImode for _m64 */
+ tree pv1di_type_node = build_pointer_type (V1DI_type_node);
+
+ /* Comparisons. */
+ tree int_ftype_v4sf_v4sf
+ = build_function_type_list (integer_type_node,
+ V4SF_type_node, V4SF_type_node, NULL_TREE);
+ tree v4si_ftype_v4sf_v4sf
+ = build_function_type_list (V4SI_type_node,
+ V4SF_type_node, V4SF_type_node, NULL_TREE);
+ /* MMX/SSE/integer conversions. */
+ tree int_ftype_v4sf
+ = build_function_type_list (integer_type_node,
+ V4SF_type_node, NULL_TREE);
+ tree int64_ftype_v4sf
+ = build_function_type_list (long_long_integer_type_node,
+ V4SF_type_node, NULL_TREE);
+ tree int_ftype_v8qi
+ = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
+ tree v4sf_ftype_v4sf_int
+ = build_function_type_list (V4SF_type_node,
+ V4SF_type_node, integer_type_node, NULL_TREE);
+ tree v4sf_ftype_v4sf_int64
+ = build_function_type_list (V4SF_type_node,
+ V4SF_type_node, long_long_integer_type_node,
+ NULL_TREE);
+ tree v4sf_ftype_v4sf_v2si
+ = build_function_type_list (V4SF_type_node,
+ V4SF_type_node, V2SI_type_node, NULL_TREE);
+
+ /* Miscellaneous. */
+ tree v8qi_ftype_v4hi_v4hi
+ = build_function_type_list (V8QI_type_node,
+ V4HI_type_node, V4HI_type_node, NULL_TREE);
+ tree v4hi_ftype_v2si_v2si
+ = build_function_type_list (V4HI_type_node,
+ V2SI_type_node, V2SI_type_node, NULL_TREE);
+ tree v4sf_ftype_v4sf_v4sf_int
+ = build_function_type_list (V4SF_type_node,
+ V4SF_type_node, V4SF_type_node,
+ integer_type_node, NULL_TREE);
+ tree v2si_ftype_v4hi_v4hi
+ = build_function_type_list (V2SI_type_node,
+ V4HI_type_node, V4HI_type_node, NULL_TREE);
+ tree v4hi_ftype_v4hi_int
+ = build_function_type_list (V4HI_type_node,
+ V4HI_type_node, integer_type_node, NULL_TREE);
+ /* APPLE LOCAL begin 4656532 use V1DImode for _m64 */
+ tree v4hi_ftype_v4hi_v1di
+ = build_function_type_list (V4HI_type_node,
+ V4HI_type_node, V1DI_type_node,
+ NULL_TREE);
+ tree v2si_ftype_v2si_int
+ = build_function_type_list (V2SI_type_node,
+ V2SI_type_node, integer_type_node, NULL_TREE);
+ tree v2si_ftype_v2si_v1di
+ = build_function_type_list (V2SI_type_node,
+ V2SI_type_node, V1DI_type_node, NULL_TREE);
+ /* APPLE LOCAL end 4656532 use V1DImode for _m64 */
+
+ tree void_ftype_void
+ = build_function_type (void_type_node, void_list_node);
+ tree void_ftype_unsigned
+ = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
+ tree void_ftype_unsigned_unsigned
+ = build_function_type_list (void_type_node, unsigned_type_node,
+ unsigned_type_node, NULL_TREE);
+ tree void_ftype_pcvoid_unsigned_unsigned
+ = build_function_type_list (void_type_node, const_ptr_type_node,
+ unsigned_type_node, unsigned_type_node,
+ NULL_TREE);
+ tree unsigned_ftype_void
+ = build_function_type (unsigned_type_node, void_list_node);
+ tree v2si_ftype_v4sf
+ = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
+ /* Loads/stores. */
+ tree void_ftype_v8qi_v8qi_pchar
+ = build_function_type_list (void_type_node,
+ V8QI_type_node, V8QI_type_node,
+ pchar_type_node, NULL_TREE);
+ tree v4sf_ftype_pcfloat
+ = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
+ /* @@@ the type is bogus */
+ tree v4sf_ftype_v4sf_pv2si
+ = build_function_type_list (V4SF_type_node,
+ V4SF_type_node, pv2si_type_node, NULL_TREE);
+ tree void_ftype_pv2si_v4sf
+ = build_function_type_list (void_type_node,
+ pv2si_type_node, V4SF_type_node, NULL_TREE);
+ tree void_ftype_pfloat_v4sf
+ = build_function_type_list (void_type_node,
+ pfloat_type_node, V4SF_type_node, NULL_TREE);
+ /* APPLE LOCAL begin 4656532 use V1DImode for _m64 */
+ tree void_ftype_pv1di_v1di
+ = build_function_type_list (void_type_node,
+ pv1di_type_node, V1DI_type_node, NULL_TREE);
+ /* APPLE LOCAL end 4656532 use V1DImode for _m64 */
+ tree void_ftype_pv2di_v2di
+ = build_function_type_list (void_type_node,
+ pv2di_type_node, V2DI_type_node, NULL_TREE);
+ /* Normal vector unops. */
+ tree v4sf_ftype_v4sf
+ = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
+ /* APPLE LOCAL begin mainline */
+ tree v16qi_ftype_v16qi
+ = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
+ tree v8hi_ftype_v8hi
+ = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
+ tree v4si_ftype_v4si
+ = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
+ tree v8qi_ftype_v8qi
+ = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
+ tree v4hi_ftype_v4hi
+ = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
+ /* APPLE LOCAL end mainline */
+
+ /* Normal vector binops. */
+ tree v4sf_ftype_v4sf_v4sf
+ = build_function_type_list (V4SF_type_node,
+ V4SF_type_node, V4SF_type_node, NULL_TREE);
+ tree v8qi_ftype_v8qi_v8qi
+ = build_function_type_list (V8QI_type_node,
+ V8QI_type_node, V8QI_type_node, NULL_TREE);
+ tree v4hi_ftype_v4hi_v4hi
+ = build_function_type_list (V4HI_type_node,
+ V4HI_type_node, V4HI_type_node, NULL_TREE);
+ tree v2si_ftype_v2si_v2si
+ = build_function_type_list (V2SI_type_node,
+ V2SI_type_node, V2SI_type_node, NULL_TREE);
+ /* APPLE LOCAL begin 4656532 use V1DImode for _m64 */
+ tree v1di_ftype_v1di_v1di
+ = build_function_type_list (V1DI_type_node,
+ V1DI_type_node, V1DI_type_node, NULL_TREE);
+ /* APPLE LOCAL begin 4684674 */
+ tree v1di_ftype_v1di_int
+ = build_function_type_list (V1DI_type_node,
+ V1DI_type_node, integer_type_node, NULL_TREE);
+ /* APPLE LOCAL end 4684674 */
+ /* APPLE LOCAL begin 4656532 */
+ tree v1di_ftype_v1di_v1di_int
+ = build_function_type_list (V1DI_type_node,
+ V1DI_type_node,
+ V1DI_type_node,
+ integer_type_node, NULL_TREE);
+ /* APPLE LOCAL end 4656532 */
+ /* APPLE LOCAL end 4656532 use V1DImode for _m64 */
+
+ tree v2si_ftype_v2sf
+ = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
+ tree v2sf_ftype_v2si
+ = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
+ tree v2si_ftype_v2si
+ = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
+ tree v2sf_ftype_v2sf
+ = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
+ tree v2sf_ftype_v2sf_v2sf
+ = build_function_type_list (V2SF_type_node,
+ V2SF_type_node, V2SF_type_node, NULL_TREE);
+ tree v2si_ftype_v2sf_v2sf
+ = build_function_type_list (V2SI_type_node,
+ V2SF_type_node, V2SF_type_node, NULL_TREE);
+ tree pint_type_node = build_pointer_type (integer_type_node);
+ tree pdouble_type_node = build_pointer_type (double_type_node);
+ tree pcdouble_type_node = build_pointer_type (
+ build_type_variant (double_type_node, 1, 0));
+ tree int_ftype_v2df_v2df
+ = build_function_type_list (integer_type_node,
+ V2DF_type_node, V2DF_type_node, NULL_TREE);
+
+ tree void_ftype_pcvoid
+ = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
+ tree v4sf_ftype_v4si
+ = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
+ tree v4si_ftype_v4sf
+ = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
+ tree v2df_ftype_v4si
+ = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
+ tree v4si_ftype_v2df
+ = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
+ tree v2si_ftype_v2df
+ = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
+ tree v4sf_ftype_v2df
+ = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
+ tree v2df_ftype_v2si
+ = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
+ tree v2df_ftype_v4sf
+ = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
+ tree int_ftype_v2df
+ = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
+ tree int64_ftype_v2df
+ = build_function_type_list (long_long_integer_type_node,
+ V2DF_type_node, NULL_TREE);
+ tree v2df_ftype_v2df_int
+ = build_function_type_list (V2DF_type_node,
+ V2DF_type_node, integer_type_node, NULL_TREE);
+ tree v2df_ftype_v2df_int64
+ = build_function_type_list (V2DF_type_node,
+ V2DF_type_node, long_long_integer_type_node,
+ NULL_TREE);
+ tree v4sf_ftype_v4sf_v2df
+ = build_function_type_list (V4SF_type_node,
+ V4SF_type_node, V2DF_type_node, NULL_TREE);
+ tree v2df_ftype_v2df_v4sf
+ = build_function_type_list (V2DF_type_node,
+ V2DF_type_node, V4SF_type_node, NULL_TREE);
+ tree v2df_ftype_v2df_v2df_int
+ = build_function_type_list (V2DF_type_node,
+ V2DF_type_node, V2DF_type_node,
+ integer_type_node,
+ NULL_TREE);
+ tree v2df_ftype_v2df_pcdouble
+ = build_function_type_list (V2DF_type_node,
+ V2DF_type_node, pcdouble_type_node, NULL_TREE);
+ tree void_ftype_pdouble_v2df
+ = build_function_type_list (void_type_node,
+ pdouble_type_node, V2DF_type_node, NULL_TREE);
+ tree void_ftype_pint_int
+ = build_function_type_list (void_type_node,
+ pint_type_node, integer_type_node, NULL_TREE);
+ tree void_ftype_v16qi_v16qi_pchar
+ = build_function_type_list (void_type_node,
+ V16QI_type_node, V16QI_type_node,
+ pchar_type_node, NULL_TREE);
+ tree v2df_ftype_pcdouble
+ = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
+ tree v2df_ftype_v2df_v2df
+ = build_function_type_list (V2DF_type_node,
+ V2DF_type_node, V2DF_type_node, NULL_TREE);
+ tree v16qi_ftype_v16qi_v16qi
+ = build_function_type_list (V16QI_type_node,
+ V16QI_type_node, V16QI_type_node, NULL_TREE);
+ tree v8hi_ftype_v8hi_v8hi
+ = build_function_type_list (V8HI_type_node,
+ V8HI_type_node, V8HI_type_node, NULL_TREE);
+ tree v4si_ftype_v4si_v4si
+ = build_function_type_list (V4SI_type_node,
+ V4SI_type_node, V4SI_type_node, NULL_TREE);
+ tree v2di_ftype_v2di_v2di
+ = build_function_type_list (V2DI_type_node,
+ V2DI_type_node, V2DI_type_node, NULL_TREE);
+ tree v2di_ftype_v2df_v2df
+ = build_function_type_list (V2DI_type_node,
+ V2DF_type_node, V2DF_type_node, NULL_TREE);
+ tree v2df_ftype_v2df
+ = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
+ tree v2di_ftype_v2di_int
+ = build_function_type_list (V2DI_type_node,
+ V2DI_type_node, integer_type_node, NULL_TREE);
+ /* APPLE LOCAL begin mainline */
+ tree v2di_ftype_v2di_v2di_int
+ = build_function_type_list (V2DI_type_node, V2DI_type_node,
+ V2DI_type_node, integer_type_node, NULL_TREE);
+ /* APPLE LOCAL end mainline */
+ tree v4si_ftype_v4si_int
+ = build_function_type_list (V4SI_type_node,
+ V4SI_type_node, integer_type_node, NULL_TREE);
+ tree v8hi_ftype_v8hi_int
+ = build_function_type_list (V8HI_type_node,
+ V8HI_type_node, integer_type_node, NULL_TREE);
+ tree v4si_ftype_v8hi_v8hi
+ = build_function_type_list (V4SI_type_node,
+ V8HI_type_node, V8HI_type_node, NULL_TREE);
+ /* APPLE LOCAL begin 5612787 mainline sse4 */
+ /* APPLE LOCAL begin 4656532 use V1DImode for _m64 */
+ tree v1di_ftype_v8qi_v8qi
+ = build_function_type_list (V1DI_type_node,
+ V8QI_type_node, V8QI_type_node, NULL_TREE);
+ tree v1di_ftype_v2si_v2si
+ = build_function_type_list (V1DI_type_node,
+ V2SI_type_node, V2SI_type_node, NULL_TREE);
+ /* APPLE LOCAL end 4656532 use V1DImode for _m64 */
+ /* APPLE LOCAL end 5612787 mainline sse4 */
+
+ tree v2di_ftype_v16qi_v16qi
+ = build_function_type_list (V2DI_type_node,
+ V16QI_type_node, V16QI_type_node, NULL_TREE);
+ tree v2di_ftype_v4si_v4si
+ = build_function_type_list (V2DI_type_node,
+ V4SI_type_node, V4SI_type_node, NULL_TREE);
+ tree int_ftype_v16qi
+ = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
+ tree v16qi_ftype_pcchar
+ = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
+ tree void_ftype_pchar_v16qi
+ = build_function_type_list (void_type_node,
+ pchar_type_node, V16QI_type_node, NULL_TREE);
+
+ /* APPLE LOCAL begin 5612787 mainline sse4 */
+ tree v2di_ftype_v2di_unsigned_unsigned
+ = build_function_type_list (V2DI_type_node, V2DI_type_node,
+ unsigned_type_node, unsigned_type_node,
+ NULL_TREE);
+ tree v2di_ftype_v2di_v2di_unsigned_unsigned
+ = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
+ unsigned_type_node, unsigned_type_node,
+ NULL_TREE);
+ tree v2di_ftype_v2di_v16qi
+ = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
+ NULL_TREE);
+ tree v2df_ftype_v2df_v2df_v2df
+ = build_function_type_list (V2DF_type_node,
+ V2DF_type_node, V2DF_type_node,
+ V2DF_type_node, NULL_TREE);
+ tree v4sf_ftype_v4sf_v4sf_v4sf
+ = build_function_type_list (V4SF_type_node,
+ V4SF_type_node, V4SF_type_node,
+ V4SF_type_node, NULL_TREE);
+ tree v8hi_ftype_v16qi
+ = build_function_type_list (V8HI_type_node, V16QI_type_node,
+ NULL_TREE);
+ tree v4si_ftype_v16qi
+ = build_function_type_list (V4SI_type_node, V16QI_type_node,
+ NULL_TREE);
+ tree v2di_ftype_v16qi
+ = build_function_type_list (V2DI_type_node, V16QI_type_node,
+ NULL_TREE);
+ tree v4si_ftype_v8hi
+ = build_function_type_list (V4SI_type_node, V8HI_type_node,
+ NULL_TREE);
+ tree v2di_ftype_v8hi
+ = build_function_type_list (V2DI_type_node, V8HI_type_node,
+ NULL_TREE);
+ tree v2di_ftype_v4si
+ = build_function_type_list (V2DI_type_node, V4SI_type_node,
+ NULL_TREE);
+ tree v2di_ftype_pv2di
+ = build_function_type_list (V2DI_type_node, pv2di_type_node,
+ NULL_TREE);
+ tree v16qi_ftype_v16qi_v16qi_int
+ = build_function_type_list (V16QI_type_node, V16QI_type_node,
+ V16QI_type_node, integer_type_node,
+ NULL_TREE);
+ tree v16qi_ftype_v16qi_v16qi_v16qi
+ = build_function_type_list (V16QI_type_node, V16QI_type_node,
+ V16QI_type_node, V16QI_type_node,
+ NULL_TREE);
+ tree v8hi_ftype_v8hi_v8hi_int
+ = build_function_type_list (V8HI_type_node, V8HI_type_node,
+ V8HI_type_node, integer_type_node,
+ NULL_TREE);
+ tree v4si_ftype_v4si_v4si_int
+ = build_function_type_list (V4SI_type_node, V4SI_type_node,
+ V4SI_type_node, integer_type_node,
+ NULL_TREE);
+ tree int_ftype_v2di_v2di
+ = build_function_type_list (integer_type_node,
+ V2DI_type_node, V2DI_type_node,
+ NULL_TREE);
+ tree int_ftype_v16qi_int_v16qi_int_int
+ = build_function_type_list (integer_type_node,
+ V16QI_type_node,
+ integer_type_node,
+ V16QI_type_node,
+ integer_type_node,
+ integer_type_node,
+ NULL_TREE);
+ tree v16qi_ftype_v16qi_int_v16qi_int_int
+ = build_function_type_list (V16QI_type_node,
+ V16QI_type_node,
+ integer_type_node,
+ V16QI_type_node,
+ integer_type_node,
+ integer_type_node,
+ NULL_TREE);
+ tree int_ftype_v16qi_v16qi_int
+ = build_function_type_list (integer_type_node,
+ V16QI_type_node,
+ V16QI_type_node,
+ integer_type_node,
+ NULL_TREE);
+ /* APPLE LOCAL end 5612787 mainline sse4 */
+
+ tree float80_type;
+ tree float128_type;
+ tree ftype;
+
+ /* The __float80 type. */
+ if (TYPE_MODE (long_double_type_node) == XFmode)
+ (*lang_hooks.types.register_builtin_type) (long_double_type_node,
+ "__float80");
+ else
+ {
+ /* The __float80 type. */
+ float80_type = make_node (REAL_TYPE);
+ TYPE_PRECISION (float80_type) = 80;
+ layout_type (float80_type);
+ (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
+ }
+
+ if (TARGET_64BIT)
+ {
+ float128_type = make_node (REAL_TYPE);
+ TYPE_PRECISION (float128_type) = 128;
+ layout_type (float128_type);
+ (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
+ }
+
+ /* APPLE LOCAL begin 5612787 mainline sse4 */
+ /* Add all SSE builtins that are more or less simple operations on
+ three operands. */
+ for (i = 0, d = bdesc_sse_3arg;
+ i < ARRAY_SIZE (bdesc_sse_3arg);
+ i++, d++)
+ {
+ /* Use one of the operands; the target can have a different mode for
+ mask-generating compares. */
+ enum machine_mode mode;
+ tree type;
+
+ if (d->name == 0)
+ continue;
+ mode = insn_data[d->icode].operand[1].mode;
+
+ switch (mode)
+ {
+ case V16QImode:
+ type = v16qi_ftype_v16qi_v16qi_int;
+ break;
+ case V8HImode:
+ type = v8hi_ftype_v8hi_v8hi_int;
+ break;
+ case V4SImode:
+ type = v4si_ftype_v4si_v4si_int;
+ break;
+ case V2DImode:
+ type = v2di_ftype_v2di_v2di_int;
+ break;
+ case V2DFmode:
+ type = v2df_ftype_v2df_v2df_int;
+ break;
+ case V4SFmode:
+ type = v4sf_ftype_v4sf_v4sf_int;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ /* Override for variable blends. */
+ switch (d->icode)
+ {
+ case CODE_FOR_sse4_1_blendvpd:
+ type = v2df_ftype_v2df_v2df_v2df;
+ break;
+ case CODE_FOR_sse4_1_blendvps:
+ type = v4sf_ftype_v4sf_v4sf_v4sf;
+ break;
+ case CODE_FOR_sse4_1_pblendvb:
+ type = v16qi_ftype_v16qi_v16qi_v16qi;
+ break;
+ default:
+ break;
+ }
+
+ def_builtin (d->mask, d->name, type, d->code);
+ }
+ /* APPLE LOCAL end 5612787 mainline sse4 */
+
+ /* Add all builtins that are more or less simple operations on two
+ operands. */
+ for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
+ {
+ /* Use one of the operands; the target can have a different mode for
+ mask-generating compares. */
+ enum machine_mode mode;
+ tree type;
+
+ if (d->name == 0)
+ continue;
+ mode = insn_data[d->icode].operand[1].mode;
+
+ switch (mode)
+ {
+ case V16QImode:
+ type = v16qi_ftype_v16qi_v16qi;
+ break;
+ case V8HImode:
+ type = v8hi_ftype_v8hi_v8hi;
+ break;
+ case V4SImode:
+ type = v4si_ftype_v4si_v4si;
+ break;
+ case V2DImode:
+ type = v2di_ftype_v2di_v2di;
+ break;
+ case V2DFmode:
+ type = v2df_ftype_v2df_v2df;
+ break;
+ case V4SFmode:
+ type = v4sf_ftype_v4sf_v4sf;
+ break;
+ case V8QImode:
+ type = v8qi_ftype_v8qi_v8qi;
+ break;
+ case V4HImode:
+ type = v4hi_ftype_v4hi_v4hi;
+ break;
+ case V2SImode:
+ type = v2si_ftype_v2si_v2si;
+ break;
+ /* APPLE LOCAL begin 4656532 use V1DImode for _m64 */
+ case V1DImode:
+ type = v1di_ftype_v1di_v1di;
+ /* APPLE LOCAL end 4656532 use V1DImode for _m64 */
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ /* Override for comparisons. */
+ if (d->icode == CODE_FOR_sse_maskcmpv4sf3
+ || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
+ type = v4si_ftype_v4sf_v4sf;
+
+ if (d->icode == CODE_FOR_sse2_maskcmpv2df3
+ || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
+ type = v2di_ftype_v2df_v2df;
+
+ def_builtin (d->mask, d->name, type, d->code);
+ }
+ /* APPLE LOCAL begin mainline */
+ /* Add all builtins that are more or less simple operations on 1 operand. */
+ for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
+ {
+ enum machine_mode mode;
+ tree type;
+
+ if (d->name == 0)
+ continue;
+ mode = insn_data[d->icode].operand[1].mode;
+
+ switch (mode)
+ {
+ case V16QImode:
+ type = v16qi_ftype_v16qi;
+ break;
+ case V8HImode:
+ type = v8hi_ftype_v8hi;
+ break;
+ case V4SImode:
+ type = v4si_ftype_v4si;
+ break;
+ case V2DFmode:
+ type = v2df_ftype_v2df;
+ break;
+ case V4SFmode:
+ type = v4sf_ftype_v4sf;
+ break;
+ case V8QImode:
+ type = v8qi_ftype_v8qi;
+ break;
+ case V4HImode:
+ type = v4hi_ftype_v4hi;
+ break;
+ case V2SImode:
+ type = v2si_ftype_v2si;
+ break;
+
+ default:
+ abort ();
+ }
+
+ def_builtin (d->mask, d->name, type, d->code);
+ }
+ /* APPLE LOCAL end mainline */
+ /* APPLE LOCAL begin 5612787 mainline sse4 */
+ /* pcmpestr[im] insns. */
+ for (i = 0, d = bdesc_pcmpestr;
+ i < ARRAY_SIZE (bdesc_pcmpestr);
+ i++, d++)
+ {
+ if (d->code == IX86_BUILTIN_PCMPESTRM128)
+ ftype = v16qi_ftype_v16qi_int_v16qi_int_int;
+ else
+ ftype = int_ftype_v16qi_int_v16qi_int_int;
+ def_builtin (d->mask, d->name, ftype, d->code);
+ }
+
+ /* pcmpistr[im] insns. */
+ for (i = 0, d = bdesc_pcmpistr;
+ i < ARRAY_SIZE (bdesc_pcmpistr);
+ i++, d++)
+ {
+ if (d->code == IX86_BUILTIN_PCMPISTRM128)
+ ftype = v16qi_ftype_v16qi_v16qi_int;
+ else
+ ftype = int_ftype_v16qi_v16qi_int;
+ def_builtin (d->mask, d->name, ftype, d->code);
+ }
+ /* APPLE LOCAL end 5612787 mainline sse4 */
+ /* Add the remaining MMX insns with somewhat more complicated types. */
+ def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
+ /* APPLE LOCAL begin 4656532 use V1DImode for _m64 */
+ def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_v1di, IX86_BUILTIN_PSLLW);
+ def_builtin (MASK_MMX, "__builtin_ia32_psllwi", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSLLWI);
+ def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_v1di, IX86_BUILTIN_PSLLD);
+ def_builtin (MASK_MMX, "__builtin_ia32_pslldi", v2si_ftype_v2si_int, IX86_BUILTIN_PSLLDI);
+ def_builtin (MASK_MMX, "__builtin_ia32_psllq", v1di_ftype_v1di_v1di, IX86_BUILTIN_PSLLQ);
+ def_builtin (MASK_MMX, "__builtin_ia32_psllqi", v1di_ftype_v1di_int, IX86_BUILTIN_PSLLQI);
+
+ def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_v1di, IX86_BUILTIN_PSRLW);
+ def_builtin (MASK_MMX, "__builtin_ia32_psrlwi", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSRLWI);
+ def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_v1di, IX86_BUILTIN_PSRLD);
+ def_builtin (MASK_MMX, "__builtin_ia32_psrldi", v2si_ftype_v2si_int, IX86_BUILTIN_PSRLDI);
+ def_builtin (MASK_MMX, "__builtin_ia32_psrlq", v1di_ftype_v1di_v1di, IX86_BUILTIN_PSRLQ);
+ def_builtin (MASK_MMX, "__builtin_ia32_psrlqi", v1di_ftype_v1di_int, IX86_BUILTIN_PSRLQI);
+
+ def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_v1di, IX86_BUILTIN_PSRAW);
+ def_builtin (MASK_MMX, "__builtin_ia32_psrawi", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSRAWI);
+ def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_v1di, IX86_BUILTIN_PSRAD);
+ def_builtin (MASK_MMX, "__builtin_ia32_psradi", v2si_ftype_v2si_int, IX86_BUILTIN_PSRADI);
+ /* APPLE LOCAL end 4656532 use V1DImode for _m64 */
+
+ def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
+ def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
+
+ /* APPLE LOCAL 4299257 */
+ /* comi insns. */
+ for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
+ if (d->mask == MASK_SSE2)
+ def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
+ else
+ def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
+
+ /* APPLE LOCAL begin 4299257 */
+ /* ucomi insns. */
+ for (i = 0, d = bdesc_ucomi; i < ARRAY_SIZE (bdesc_ucomi); i++, d++)
+ if (d->mask == MASK_SSE2)
+ def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
+ else
+ def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
+ /* APPLE LOCAL end 4299257 */
+
+ /* APPLE LOCAL begin 5612787 mainline sse4 */
+ /* ptest insns. */
+ for (i = 0, d = bdesc_ptest; i < ARRAY_SIZE (bdesc_ptest); i++, d++)
+ def_builtin (d->mask, d->name, int_ftype_v2di_v2di, d->code);
+ /* APPLE LOCAL end 5612787 mainline sse4 */
+
+ def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
+ def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
+ def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
+
+ def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
+ def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
+ def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
+ def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
+ def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
+ def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
+ def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
+ def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
+ def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
+ def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
+ def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
+
+ def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
+
+ def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
+ def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
+
+ def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
+ def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
+ def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
+ def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
+
+ def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
+ def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
+ def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
+ /* APPLE LOCAL begin 4656532 use V1DImode for _m64 */
+ def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pv1di_v1di, IX86_BUILTIN_MOVNTQ);
+
+ def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
+
+ def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", v1di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
+ /* APPLE LOCAL end 4656532 use V1DImode for _m64 */
+
+ def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
+ def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
+ def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
+ def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
+ def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
+ def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
+
+ def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
+
+ /* Original 3DNow! */
+ def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
+ def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
+ def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
+ def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
+ def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
+ def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
+ def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
+ def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
+ def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
+ def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
+ def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
+ def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
+ def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
+ def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
+ def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
+ def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
+ def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
+ def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
+ def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
+ def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
+
+ /* 3DNow! extension as used in the Athlon CPU. */
+ def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
+ def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
+ def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
+ def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
+ def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
+ def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
+
+ /* SSE2 */
+ def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
+
+ def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
+ def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
+
+ def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
+ def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
+
+ def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
+ def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
+ def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
+ def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
+ def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
+
+ def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
+ def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
+ def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
+ def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
+
+ def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
+ def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
+
+ def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
+
+ def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
+ def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
+
+ def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
+ def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
+ def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
+ def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
+ def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
+
+ def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
+
+ def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
+ def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
+ def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
+ def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
+
+ def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
+ def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
+ def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
+
+ def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
+ def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
+ def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
+ def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
+
+ def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
+ def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
+ def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
+
+ def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
+ def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
+
+ /* APPLE LOCAL 4656532 use V1DImode for _m64 */
+ def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", v1di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
+ def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
+
+ def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSLLW128);
+ def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSLLD128);
+ def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
+
+ def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRLW128);
+ def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRLD128);
+ def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
+
+ def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRAW128);
+ def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRAD128);
+
+ def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
+ /* APPLE LOCAL 5919583 */
+ def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128_byteshift", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128_BYTESHIFT);
+ def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
+ def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
+ def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
+
+ def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
+ /* APPLE LOCAL 5919583 */
+ def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128_byteshift", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128_BYTESHIFT);
+ def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
+ def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
+ def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
+
+ def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
+ def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
+
+ def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
+
+ /* Prescott New Instructions. */
+ def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
+ void_ftype_pcvoid_unsigned_unsigned,
+ IX86_BUILTIN_MONITOR);
+ def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
+ void_ftype_unsigned_unsigned,
+ IX86_BUILTIN_MWAIT);
+ def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
+ v4sf_ftype_v4sf,
+ IX86_BUILTIN_MOVSHDUP);
+ def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
+ v4sf_ftype_v4sf,
+ IX86_BUILTIN_MOVSLDUP);
+ def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
+ v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
+
+ /* APPLE LOCAL begin 4099020 */
+ ftype = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
+ def_builtin (MASK_SSE, "__builtin_ia32_movqv4si", ftype, IX86_BUILTIN_MOVQ);
+ ftype = build_function_type_list (V4SI_type_node, pv2si_type_node, NULL_TREE);
+ def_builtin (MASK_SSE, "__builtin_ia32_loadlv4si", ftype, IX86_BUILTIN_LOADQ);
+ ftype = build_function_type_list (void_type_node, pv2si_type_node, V4SI_type_node, NULL_TREE);
+ def_builtin (MASK_SSE, "__builtin_ia32_storelv4si", ftype, IX86_BUILTIN_STOREQ);
+ /* APPLE LOCAL end 4099020 */
+
+ /* APPLE LOCAL begin 4656532 */
+ /* Merom New Instructions. */
+ def_builtin (MASK_SSSE3, "__builtin_ia32_palignr128",
+ v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128);
+ def_builtin (MASK_SSSE3, "__builtin_ia32_palignr", v1di_ftype_v1di_v1di_int,
+ IX86_BUILTIN_PALIGNR);
+
+ /* APPLE LOCAL end 4656532 */
+ /* APPLE LOCAL begin 5612787 mainline sse4 */
+ /* SSE4.1. */
+ def_builtin (MASK_SSE4_1, "__builtin_ia32_movntdqa", v2di_ftype_pv2di, IX86_BUILTIN_MOVNTDQA);
+ def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovsxbw128", v8hi_ftype_v16qi, IX86_BUILTIN_PMOVSXBW128);
+ def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovsxbd128", v4si_ftype_v16qi, IX86_BUILTIN_PMOVSXBD128);
+ def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovsxbq128", v2di_ftype_v16qi, IX86_BUILTIN_PMOVSXBQ128);
+ def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovsxwd128", v4si_ftype_v8hi, IX86_BUILTIN_PMOVSXWD128);
+ def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovsxwq128", v2di_ftype_v8hi, IX86_BUILTIN_PMOVSXWQ128);
+ def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovsxdq128", v2di_ftype_v4si, IX86_BUILTIN_PMOVSXDQ128);
+ def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovzxbw128", v8hi_ftype_v16qi, IX86_BUILTIN_PMOVZXBW128);
+ def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovzxbd128", v4si_ftype_v16qi, IX86_BUILTIN_PMOVZXBD128);
+ def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovzxbq128", v2di_ftype_v16qi, IX86_BUILTIN_PMOVZXBQ128);
+ def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovzxwd128", v4si_ftype_v8hi, IX86_BUILTIN_PMOVZXWD128);
+ def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovzxwq128", v2di_ftype_v8hi, IX86_BUILTIN_PMOVZXWQ128);
+ def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovzxdq128", v2di_ftype_v4si, IX86_BUILTIN_PMOVZXDQ128);
+ def_builtin (MASK_SSE4_1, "__builtin_ia32_pmuldq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULDQ128);
+ def_builtin_const (MASK_SSE4_1, "__builtin_ia32_roundpd", v2df_ftype_v2df_int, IX86_BUILTIN_ROUNDPD);
+ def_builtin_const (MASK_SSE4_1, "__builtin_ia32_roundps", v4sf_ftype_v4sf_int, IX86_BUILTIN_ROUNDPS);
+ def_builtin_const (MASK_SSE4_1, "__builtin_ia32_roundsd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_ROUNDSD);
+ def_builtin_const (MASK_SSE4_1, "__builtin_ia32_roundss", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_ROUNDSS);
+
+ /* SSE4.2. */
+ ftype = build_function_type_list (unsigned_type_node,
+ unsigned_type_node,
+ unsigned_char_type_node,
+ NULL_TREE);
+ def_builtin (MASK_SSE4_2, "__builtin_ia32_crc32qi", ftype, IX86_BUILTIN_CRC32QI);
+ ftype = build_function_type_list (unsigned_type_node,
+ unsigned_type_node,
+ short_unsigned_type_node,
+ NULL_TREE);
+ def_builtin (MASK_SSE4_2, "__builtin_ia32_crc32hi", ftype, IX86_BUILTIN_CRC32HI);
+ ftype = build_function_type_list (unsigned_type_node,
+ unsigned_type_node,
+ unsigned_type_node,
+ NULL_TREE);
+ def_builtin (MASK_SSE4_2, "__builtin_ia32_crc32si", ftype, IX86_BUILTIN_CRC32SI);
+ ftype = build_function_type_list (long_long_unsigned_type_node,
+ long_long_unsigned_type_node,
+ long_long_unsigned_type_node,
+ NULL_TREE);
+ def_builtin (MASK_SSE4_2, "__builtin_ia32_crc32di", ftype, IX86_BUILTIN_CRC32DI);
+
+ /* AMDFAM10 SSE4A New built-ins */
+ def_builtin (MASK_SSE4A, "__builtin_ia32_movntsd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTSD);
+ def_builtin (MASK_SSE4A, "__builtin_ia32_movntss", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTSS);
+ def_builtin (MASK_SSE4A, "__builtin_ia32_extrqi", v2di_ftype_v2di_unsigned_unsigned, IX86_BUILTIN_EXTRQI);
+ def_builtin (MASK_SSE4A, "__builtin_ia32_extrq", v2di_ftype_v2di_v16qi, IX86_BUILTIN_EXTRQ);
+ def_builtin (MASK_SSE4A, "__builtin_ia32_insertqi", v2di_ftype_v2di_v2di_unsigned_unsigned, IX86_BUILTIN_INSERTQI);
+ def_builtin (MASK_SSE4A, "__builtin_ia32_insertq", v2di_ftype_v2di_v2di, IX86_BUILTIN_INSERTQ);
+ /* APPLE LOCAL end 5612787 mainline sse4 */
+ /* Access to the vec_init patterns. */
+ ftype = build_function_type_list (V2SI_type_node, integer_type_node,
+ integer_type_node, NULL_TREE);
+ def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si",
+ ftype, IX86_BUILTIN_VEC_INIT_V2SI);
+
+ ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
+ short_integer_type_node,
+ short_integer_type_node,
+ short_integer_type_node, NULL_TREE);
+ def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi",
+ ftype, IX86_BUILTIN_VEC_INIT_V4HI);
+
+ ftype = build_function_type_list (V8QI_type_node, char_type_node,
+ char_type_node, char_type_node,
+ char_type_node, char_type_node,
+ char_type_node, char_type_node,
+ char_type_node, NULL_TREE);
+ def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi",
+ ftype, IX86_BUILTIN_VEC_INIT_V8QI);
+
+ /* Access to the vec_extract patterns. */
+ ftype = build_function_type_list (double_type_node, V2DF_type_node,
+ integer_type_node, NULL_TREE);
+ def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v2df",
+ ftype, IX86_BUILTIN_VEC_EXT_V2DF);
+
+ ftype = build_function_type_list (long_long_integer_type_node,
+ V2DI_type_node, integer_type_node,
+ NULL_TREE);
+ def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v2di",
+ ftype, IX86_BUILTIN_VEC_EXT_V2DI);
+
+ ftype = build_function_type_list (float_type_node, V4SF_type_node,
+ integer_type_node, NULL_TREE);
+ def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf",
+ ftype, IX86_BUILTIN_VEC_EXT_V4SF);
+
+ ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
+ integer_type_node, NULL_TREE);
+ def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v4si",
+ ftype, IX86_BUILTIN_VEC_EXT_V4SI);
+
+ /* APPLE LOCAL begin radar 4469713 */
+ /* The return type of the builtin function should be an unsigned instead
+ of a signed type. */
+ ftype = build_function_type_list (unsigned_intHI_type_node, V8HI_type_node,
+ /* APPLE LOCAL end radar 4469713 */
+ integer_type_node, NULL_TREE);
+ def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v8hi",
+ ftype, IX86_BUILTIN_VEC_EXT_V8HI);
+
+ ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
+ integer_type_node, NULL_TREE);
+ def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi",
+ ftype, IX86_BUILTIN_VEC_EXT_V4HI);
+
+ ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
+ integer_type_node, NULL_TREE);
+ def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si",
+ ftype, IX86_BUILTIN_VEC_EXT_V2SI);
+
+ ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
+ integer_type_node, NULL_TREE);
+ def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
+
+ /* APPLE LOCAL begin 5612787 mainline sse4 */
+ /* Access to the vec_set patterns. */
+ ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
+ intDI_type_node,
+ integer_type_node, NULL_TREE);
+ def_builtin (MASK_SSE4_1 | MASK_64BIT, "__builtin_ia32_vec_set_v2di", ftype, IX86_BUILTIN_VEC_SET_V2DI);
+
+ ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
+ float_type_node,
+ integer_type_node, NULL_TREE);
+ def_builtin (MASK_SSE4_1, "__builtin_ia32_vec_set_v4sf", ftype, IX86_BUILTIN_VEC_SET_V4SF);
+
+ ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
+ intSI_type_node,
+ integer_type_node, NULL_TREE);
+ def_builtin (MASK_SSE4_1, "__builtin_ia32_vec_set_v4si", ftype, IX86_BUILTIN_VEC_SET_V4SI);
+ /* APPLE LOCAL end 5612787 mainline sse4 */
+
+ ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
+ intHI_type_node,
+ integer_type_node, NULL_TREE);
+ def_builtin (MASK_SSE2, "__builtin_ia32_vec_set_v8hi",
+ ftype, IX86_BUILTIN_VEC_SET_V8HI);
+
+ ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
+ intHI_type_node,
+ integer_type_node, NULL_TREE);
+ def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi",
+ ftype, IX86_BUILTIN_VEC_SET_V4HI);
+ /* APPLE LOCAL begin 5612787 mainline sse4 */
+ ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
+ intQI_type_node,
+ integer_type_node, NULL_TREE);
+ def_builtin (MASK_SSE4_1, "__builtin_ia32_vec_set_v16qi", ftype, IX86_BUILTIN_VEC_SET_V16QI);
+ /* APPLE LOCAL end 5612787 mainline sse4 */
+}
+
+/* Errors in the source file can cause expand_expr to return const0_rtx
+ where we expect a vector. To avoid crashing, use one of the vector
+ clear instructions. */
+static rtx
+safe_vector_operand (rtx x, enum machine_mode mode)
+{
+ if (x == const0_rtx)
+ x = CONST0_RTX (mode);
+ return x;
+}
+
+/* APPLE LOCAL begin 5612787 mainline sse4 */
+/* Subroutine of ix86_expand_builtin to take care of SSE insns with
+ 4 operands. The third argument must be a constant smaller than 8
+ bits or xmm0. */
+
+static rtx
+ix86_expand_sse_4_operands_builtin (enum insn_code icode, tree exp,
+ rtx target)
+{
+ rtx pat;
+ tree arg0 = TREE_VALUE (exp);
+ tree arg1 = TREE_VALUE (TREE_CHAIN (exp));
+ tree arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (exp)));
+ rtx op0 = expand_normal (arg0);
+ rtx op1 = expand_normal (arg1);
+ rtx op2 = expand_normal (arg2);
+ enum machine_mode tmode = insn_data[icode].operand[0].mode;
+ enum machine_mode mode1 = insn_data[icode].operand[1].mode;
+ enum machine_mode mode2 = insn_data[icode].operand[2].mode;
+ enum machine_mode mode3 = insn_data[icode].operand[3].mode;
+
+ if (VECTOR_MODE_P (mode1))
+ op0 = safe_vector_operand (op0, mode1);
+ if (VECTOR_MODE_P (mode2))
+ op1 = safe_vector_operand (op1, mode2);
+ if (VECTOR_MODE_P (mode3))
+ op2 = safe_vector_operand (op2, mode3);
+
+ if (optimize
+ || target == 0
+ || GET_MODE (target) != tmode
+ || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+ target = gen_reg_rtx (tmode);
+
+ if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
+ op0 = copy_to_mode_reg (mode1, op0);
+ if ((optimize && !register_operand (op1, mode2))
+ || !(*insn_data[icode].operand[2].predicate) (op1, mode2))
+ op1 = copy_to_mode_reg (mode2, op1);
+
+ if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
+ switch (icode)
+ {
+ case CODE_FOR_sse4_1_blendvpd:
+ case CODE_FOR_sse4_1_blendvps:
+ case CODE_FOR_sse4_1_pblendvb:
+ op2 = copy_to_mode_reg (mode3, op2);
+ break;
+
+ case CODE_FOR_sse4_1_roundsd:
+ case CODE_FOR_sse4_1_roundss:
+ error ("the third argument must be a 4-bit immediate");
+ return const0_rtx;
+
+ default:
+ error ("the third argument must be an 8-bit immediate");
+ return const0_rtx;
+ }
+
+ pat = GEN_FCN (icode) (target, op0, op1, op2);
+ if (! pat)
+ return 0;
+ emit_insn (pat);
+ return target;
+}
+
+/* Subroutine of ix86_expand_builtin to take care of crc32 insns. */
+
+static rtx
+ix86_expand_crc32 (enum insn_code icode, tree exp, rtx target)
+{
+ rtx pat;
+ tree arg0 = TREE_VALUE (exp);
+ tree arg1 = TREE_VALUE (TREE_CHAIN (exp));
+ rtx op0 = expand_normal (arg0);
+ rtx op1 = expand_normal (arg1);
+ enum machine_mode tmode = insn_data[icode].operand[0].mode;
+ enum machine_mode mode0 = insn_data[icode].operand[1].mode;
+ enum machine_mode mode1 = insn_data[icode].operand[2].mode;
+
+ if (optimize
+ || !target
+ || GET_MODE (target) != tmode
+ || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+ target = gen_reg_rtx (tmode);
+
+ if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
+ op0 = copy_to_mode_reg (mode0, op0);
+ if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
+ {
+ op1 = copy_to_reg (op1);
+ op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
+ }
+
+ pat = GEN_FCN (icode) (target, op0, op1);
+ if (! pat)
+ return 0;
+ emit_insn (pat);
+ return target;
+}
+/* APPLE LOCAL end 5612787 mainline sse4 */
+
+/* Subroutine of ix86_expand_builtin to take care of binop insns. */
+
+static rtx
+ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
+{
+ rtx pat, xops[3];
+ tree arg0 = TREE_VALUE (arglist);
+ tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
+ rtx op0 = expand_normal (arg0);
+ rtx op1 = expand_normal (arg1);
+ enum machine_mode tmode = insn_data[icode].operand[0].mode;
+ enum machine_mode mode0 = insn_data[icode].operand[1].mode;
+ enum machine_mode mode1 = insn_data[icode].operand[2].mode;
+
+ if (VECTOR_MODE_P (mode0))
+ op0 = safe_vector_operand (op0, mode0);
+ if (VECTOR_MODE_P (mode1))
+ op1 = safe_vector_operand (op1, mode1);
+
+ if (optimize || !target
+ || GET_MODE (target) != tmode
+ || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+ target = gen_reg_rtx (tmode);
+
+ if (GET_MODE (op1) == SImode && mode1 == TImode)
+ {
+ rtx x = gen_reg_rtx (V4SImode);
+ emit_insn (gen_sse2_loadd (x, op1));
+ op1 = gen_lowpart (TImode, x);
+ }
+
+ /* The insn must want input operands in the same modes as the
+ result. */
+ gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
+ && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
+
+ if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
+ op0 = copy_to_mode_reg (mode0, op0);
+ if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
+ op1 = copy_to_mode_reg (mode1, op1);
+
+ /* ??? Using ix86_fixup_binary_operands is problematic when
+ we've got mismatched modes. Fake it. */
+
+ xops[0] = target;
+ xops[1] = op0;
+ xops[2] = op1;
+
+ if (tmode == mode0 && tmode == mode1)
+ {
+ target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
+ op0 = xops[1];
+ op1 = xops[2];
+ }
+ else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
+ {
+ op0 = force_reg (mode0, op0);
+ op1 = force_reg (mode1, op1);
+ target = gen_reg_rtx (tmode);
+ }
+
+ pat = GEN_FCN (icode) (target, op0, op1);
+ if (! pat)
+ return 0;
+ emit_insn (pat);
+ return target;
+}
+
+/* Subroutine of ix86_expand_builtin to take care of stores. */
+
+static rtx
+ix86_expand_store_builtin (enum insn_code icode, tree arglist)
+{
+ rtx pat;
+ tree arg0 = TREE_VALUE (arglist);
+ tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
+ rtx op0 = expand_normal (arg0);
+ rtx op1 = expand_normal (arg1);
+ enum machine_mode mode0 = insn_data[icode].operand[0].mode;
+ enum machine_mode mode1 = insn_data[icode].operand[1].mode;
+
+ if (VECTOR_MODE_P (mode1))
+ op1 = safe_vector_operand (op1, mode1);
+
+ op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
+ op1 = copy_to_mode_reg (mode1, op1);
+
+ pat = GEN_FCN (icode) (op0, op1);
+ if (pat)
+ emit_insn (pat);
+ return 0;
+}
+
+/* Subroutine of ix86_expand_builtin to take care of unop insns. */
+
+static rtx
+ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
+ rtx target, int do_load)
+{
+ rtx pat;
+ tree arg0 = TREE_VALUE (arglist);
+ rtx op0 = expand_normal (arg0);
+ enum machine_mode tmode = insn_data[icode].operand[0].mode;
+ enum machine_mode mode0 = insn_data[icode].operand[1].mode;
+
+ if (optimize || !target
+ || GET_MODE (target) != tmode
+ || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+ target = gen_reg_rtx (tmode);
+ if (do_load)
+ op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
+ else
+ {
+ if (VECTOR_MODE_P (mode0))
+ op0 = safe_vector_operand (op0, mode0);
+
+ if ((optimize && !register_operand (op0, mode0))
+ || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
+ op0 = copy_to_mode_reg (mode0, op0);
+ }
+
+ /* APPLE LOCAL begin 5612787 mainline sse4 */
+ switch (icode)
+ {
+ case CODE_FOR_sse4_1_roundpd:
+ case CODE_FOR_sse4_1_roundps:
+ {
+ tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
+ rtx op1 = expand_normal (arg1);
+ enum machine_mode mode1 = insn_data[icode].operand[2].mode;
+
+ if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
+ {
+ error ("the second argument must be a 4-bit immediate");
+ return const0_rtx;
+ }
+ pat = GEN_FCN (icode) (target, op0, op1);
+ }
+ break;
+ default:
+ pat = GEN_FCN (icode) (target, op0);
+ break;
+ }
+ /* APPLE LOCAL end 5612787 mainline sse4 */
+
+ if (! pat)
+ return 0;
+ emit_insn (pat);
+ return target;
+}
+
+/* Subroutine of ix86_expand_builtin to take care of three special unop insns:
+ sqrtss, rsqrtss, rcpss. */
+
+static rtx
+ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
+{
+ rtx pat;
+ tree arg0 = TREE_VALUE (arglist);
+ rtx op1, op0 = expand_normal (arg0);
+ enum machine_mode tmode = insn_data[icode].operand[0].mode;
+ enum machine_mode mode0 = insn_data[icode].operand[1].mode;
+
+ if (optimize || !target
+ || GET_MODE (target) != tmode
+ || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+ target = gen_reg_rtx (tmode);
+
+ if (VECTOR_MODE_P (mode0))
+ op0 = safe_vector_operand (op0, mode0);
+
+ if ((optimize && !register_operand (op0, mode0))
+ || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
+ op0 = copy_to_mode_reg (mode0, op0);
+
+ op1 = op0;
+ if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
+ op1 = copy_to_mode_reg (mode0, op1);
+
+ pat = GEN_FCN (icode) (target, op0, op1);
+ if (! pat)
+ return 0;
+ emit_insn (pat);
+ return target;
+}
+
+/* Subroutine of ix86_expand_builtin to take care of comparison insns. */
+
+static rtx
+ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
+ rtx target)
+{
+ rtx pat;
+ tree arg0 = TREE_VALUE (arglist);
+ tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
+ rtx op0 = expand_normal (arg0);
+ rtx op1 = expand_normal (arg1);
+ rtx op2;
+ enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
+ enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
+ enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
+ enum rtx_code comparison = d->comparison;
+
+ if (VECTOR_MODE_P (mode0))
+ op0 = safe_vector_operand (op0, mode0);
+ if (VECTOR_MODE_P (mode1))
+ op1 = safe_vector_operand (op1, mode1);
+
+ /* Swap operands if we have a comparison that isn't available in
+ hardware. */
+ if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
+ {
+ rtx tmp = gen_reg_rtx (mode1);
+ emit_move_insn (tmp, op1);
+ op1 = op0;
+ op0 = tmp;
+ }
+
+ if (optimize || !target
+ || GET_MODE (target) != tmode
+ || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
+ target = gen_reg_rtx (tmode);
+
+ if ((optimize && !register_operand (op0, mode0))
+ || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
+ op0 = copy_to_mode_reg (mode0, op0);
+ if ((optimize && !register_operand (op1, mode1))
+ || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
+ op1 = copy_to_mode_reg (mode1, op1);
+
+ op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
+ pat = GEN_FCN (d->icode) (target, op0, op1, op2);
+ if (! pat)
+ return 0;
+ emit_insn (pat);
+ return target;
+}
+
+/* Subroutine of ix86_expand_builtin to take care of comi insns. */
+
+static rtx
+ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
+ rtx target)
+{
+ rtx pat;
+ tree arg0 = TREE_VALUE (arglist);
+ tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
+ rtx op0 = expand_normal (arg0);
+ rtx op1 = expand_normal (arg1);
+ rtx op2;
+ enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
+ enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
+ enum rtx_code comparison = d->comparison;
+
+ if (VECTOR_MODE_P (mode0))
+ op0 = safe_vector_operand (op0, mode0);
+ if (VECTOR_MODE_P (mode1))
+ op1 = safe_vector_operand (op1, mode1);
+
+ /* Swap operands if we have a comparison that isn't available in
+ hardware. */
+ if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
+ {
+ rtx tmp = op1;
+ op1 = op0;
+ op0 = tmp;
+ }
+
+ target = gen_reg_rtx (SImode);
+ emit_move_insn (target, const0_rtx);
+ target = gen_rtx_SUBREG (QImode, target, 0);
+
+ if ((optimize && !register_operand (op0, mode0))
+ || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
+ op0 = copy_to_mode_reg (mode0, op0);
+ if ((optimize && !register_operand (op1, mode1))
+ || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
+ op1 = copy_to_mode_reg (mode1, op1);
+
+ op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
+ pat = GEN_FCN (d->icode) (op0, op1);
+ if (! pat)
+ return 0;
+ emit_insn (pat);
+ emit_insn (gen_rtx_SET (VOIDmode,
+ gen_rtx_STRICT_LOW_PART (VOIDmode, target),
+ gen_rtx_fmt_ee (comparison, QImode,
+ SET_DEST (pat),
+ const0_rtx)));
+
+ return SUBREG_REG (target);
+}
+
+/* APPLE LOCAL begin 4299257 */
+/* Subroutine of ix86_expand_builtin to take care of ucomi insns. */
+
+static rtx
+ix86_expand_sse_ucomi (const struct builtin_description *d, tree arglist,
+ rtx target)
+{
+ tree arg0 = TREE_VALUE (arglist);
+ tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
+ rtx op0 = expand_normal (arg0);
+ rtx op1 = expand_normal (arg1);
+ enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
+ enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
+ enum machine_mode scalar_mode;
+ enum rtx_code comparison = d->comparison;
+
+ if (VECTOR_MODE_P (mode0))
+ op0 = safe_vector_operand (op0, mode0);
+ if (VECTOR_MODE_P (mode1))
+ op1 = safe_vector_operand (op1, mode1);
+
+ /* Swap operands if we have a comparison that isn't available in
+ hardware. */
+ if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
+ {
+ rtx tmp = op1;
+ op1 = op0;
+ op0 = tmp;
+ }
+
+ target = gen_reg_rtx (SImode);
+ emit_move_insn (target, const0_rtx);
+ target = gen_rtx_SUBREG (QImode, target, 0);
+
+ gcc_assert (mode0 == V4SFmode || mode0 == V2DFmode);
+ gcc_assert (mode1 == V4SFmode || mode1 == V2DFmode);
+
+ scalar_mode = (mode0 == V4SFmode) ? SFmode : DFmode;
+ op0 = gen_rtx_SUBREG (scalar_mode, copy_to_mode_reg (mode0, op0), 0);
+ op1 = gen_rtx_SUBREG (scalar_mode, copy_to_mode_reg (mode1, op1), 0);
+
+ ix86_compare_op0 = op0;
+ ix86_compare_op1 = op1;
+ if (ix86_expand_setcc (comparison, target))
+ return SUBREG_REG (target);
+
+ return NULL_RTX;
+}
+/* APPLE LOCAL end 4299257 */
+
+/* APPLE LOCAL begin 5612787 mainline sse4 */
+/* Subroutine of ix86_expand_builtin to take care of ptest insns. */
+
+static rtx
+ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
+ rtx target)
+{
+ rtx pat;
+ tree arg0 = TREE_VALUE (exp);
+ tree arg1 = TREE_VALUE (TREE_CHAIN (exp));
+ rtx op0 = expand_normal (arg0);
+ rtx op1 = expand_normal (arg1);
+ enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
+ enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
+ enum rtx_code comparison = d->comparison;
+
+ if (VECTOR_MODE_P (mode0))
+ op0 = safe_vector_operand (op0, mode0);
+ if (VECTOR_MODE_P (mode1))
+ op1 = safe_vector_operand (op1, mode1);
+
+ target = gen_reg_rtx (SImode);
+ emit_move_insn (target, const0_rtx);
+ target = gen_rtx_SUBREG (QImode, target, 0);
+
+ if ((optimize && !register_operand (op0, mode0))
+ || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
+ op0 = copy_to_mode_reg (mode0, op0);
+ if ((optimize && !register_operand (op1, mode1))
+ || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
+ op1 = copy_to_mode_reg (mode1, op1);
+
+ pat = GEN_FCN (d->icode) (op0, op1);
+ if (! pat)
+ return 0;
+ emit_insn (pat);
+ emit_insn (gen_rtx_SET (VOIDmode,
+ gen_rtx_STRICT_LOW_PART (VOIDmode, target),
+ gen_rtx_fmt_ee (comparison, QImode,
+ SET_DEST (pat),
+ const0_rtx)));
+
+ return SUBREG_REG (target);
+}
+
+/* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
+
+static rtx
+ix86_expand_sse_pcmpestr (const struct builtin_description *d,
+ tree exp, rtx target)
+{
+ rtx pat;
+ tree arg0 = TREE_VALUE (exp);
+ tree arg1 = TREE_VALUE (TREE_CHAIN (exp));
+ tree arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (exp)));
+ tree arg3 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (exp))));
+ tree arg4 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (exp)))));
+ rtx scratch0, scratch1;
+ rtx op0 = expand_normal (arg0);
+ rtx op1 = expand_normal (arg1);
+ rtx op2 = expand_normal (arg2);
+ rtx op3 = expand_normal (arg3);
+ rtx op4 = expand_normal (arg4);
+ enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
+
+ tmode0 = insn_data[d->icode].operand[0].mode;
+ tmode1 = insn_data[d->icode].operand[1].mode;
+ modev2 = insn_data[d->icode].operand[2].mode;
+ modei3 = insn_data[d->icode].operand[3].mode;
+ modev4 = insn_data[d->icode].operand[4].mode;
+ modei5 = insn_data[d->icode].operand[5].mode;
+ modeimm = insn_data[d->icode].operand[6].mode;
+
+ if (VECTOR_MODE_P (modev2))
+ op0 = safe_vector_operand (op0, modev2);
+ if (VECTOR_MODE_P (modev4))
+ op2 = safe_vector_operand (op2, modev4);
+
+ if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
+ op0 = copy_to_mode_reg (modev2, op0);
+ if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3))
+ op1 = copy_to_mode_reg (modei3, op1);
+ if ((optimize && !register_operand (op2, modev4))
+ || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4))
+ op2 = copy_to_mode_reg (modev4, op2);
+ if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5))
+ op3 = copy_to_mode_reg (modei5, op3);
+
+ if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm))
+ {
+ error ("the fifth argument must be a 8-bit immediate");
+ return const0_rtx;
+ }
+
+ if (d->code == IX86_BUILTIN_PCMPESTRI128)
+ {
+ if (optimize || !target
+ || GET_MODE (target) != tmode0
+ || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
+ target = gen_reg_rtx (tmode0);
+
+ scratch1 = gen_reg_rtx (tmode1);
+
+ pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
+ }
+ else if (d->code == IX86_BUILTIN_PCMPESTRM128)
+ {
+ if (optimize || !target
+ || GET_MODE (target) != tmode1
+ || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
+ target = gen_reg_rtx (tmode1);
+
+ scratch0 = gen_reg_rtx (tmode0);
+
+ pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
+ }
+ else
+ {
+ gcc_assert (d->flag);
+
+ scratch0 = gen_reg_rtx (tmode0);
+ scratch1 = gen_reg_rtx (tmode1);
+
+ pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
+ }
+
+ if (! pat)
+ return 0;
+
+ emit_insn (pat);
+
+ if (d->flag)
+ {
+ target = gen_reg_rtx (SImode);
+ emit_move_insn (target, const0_rtx);
+ target = gen_rtx_SUBREG (QImode, target, 0);
+
+ emit_insn
+ (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
+ gen_rtx_fmt_ee (EQ, QImode,
+ gen_rtx_REG ((enum machine_mode) d->flag,
+ FLAGS_REG),
+ const0_rtx)));
+ return SUBREG_REG (target);
+ }
+ else
+ return target;
+}
+
+
+/* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
+
+static rtx
+ix86_expand_sse_pcmpistr (const struct builtin_description *d,
+ tree exp, rtx target)
+{
+ rtx pat;
+ tree arg0 = TREE_VALUE (exp);
+ tree arg1 = TREE_VALUE (TREE_CHAIN (exp));
+ tree arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (exp)));
+ rtx scratch0, scratch1;
+ rtx op0 = expand_normal (arg0);
+ rtx op1 = expand_normal (arg1);
+ rtx op2 = expand_normal (arg2);
+ enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
+
+ tmode0 = insn_data[d->icode].operand[0].mode;
+ tmode1 = insn_data[d->icode].operand[1].mode;
+ modev2 = insn_data[d->icode].operand[2].mode;
+ modev3 = insn_data[d->icode].operand[3].mode;
+ modeimm = insn_data[d->icode].operand[4].mode;
+
+ if (VECTOR_MODE_P (modev2))
+ op0 = safe_vector_operand (op0, modev2);
+ if (VECTOR_MODE_P (modev3))
+ op1 = safe_vector_operand (op1, modev3);
+
+ if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
+ op0 = copy_to_mode_reg (modev2, op0);
+ if ((optimize && !register_operand (op1, modev3))
+ || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3))
+ op1 = copy_to_mode_reg (modev3, op1);
+
+ if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm))
+ {
+ error ("the third argument must be a 8-bit immediate");
+ return const0_rtx;
+ }
+
+ if (d->code == IX86_BUILTIN_PCMPISTRI128)
+ {
+ if (optimize || !target
+ || GET_MODE (target) != tmode0
+ || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
+ target = gen_reg_rtx (tmode0);
+
+ scratch1 = gen_reg_rtx (tmode1);
+
+ pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
+ }
+ else if (d->code == IX86_BUILTIN_PCMPISTRM128)
+ {
+ if (optimize || !target
+ || GET_MODE (target) != tmode1
+ || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
+ target = gen_reg_rtx (tmode1);
+
+ scratch0 = gen_reg_rtx (tmode0);
+
+ pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
+ }
+ else
+ {
+ gcc_assert (d->flag);
+
+ scratch0 = gen_reg_rtx (tmode0);
+ scratch1 = gen_reg_rtx (tmode1);
+
+ pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
+ }
+
+ if (! pat)
+ return 0;
+
+ emit_insn (pat);
+
+ if (d->flag)
+ {
+ target = gen_reg_rtx (SImode);
+ emit_move_insn (target, const0_rtx);
+ target = gen_rtx_SUBREG (QImode, target, 0);
+
+ emit_insn
+ (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
+ gen_rtx_fmt_ee (EQ, QImode,
+ gen_rtx_REG ((enum machine_mode) d->flag,
+ FLAGS_REG),
+ const0_rtx)));
+ return SUBREG_REG (target);
+ }
+ else
+ return target;
+}
+/* APPLE LOCAL end 5612787 mainline sse4 */
+
+/* Return the integer constant in ARG. Constrain it to be in the range
+ of the subparts of VEC_TYPE; issue an error if not. */
+
+static int
+get_element_number (tree vec_type, tree arg)
+{
+ unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
+
+ if (!host_integerp (arg, 1)
+ || (elt = tree_low_cst (arg, 1), elt > max))
+ {
+ error ("selector must be an integer constant in the range 0..%wi", max);
+ return 0;
+ }
+
+ return elt;
+}
+
+/* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
+ ix86_expand_vector_init. We DO have language-level syntax for this, in
+ the form of (type){ init-list }. Except that since we can't place emms
+ instructions from inside the compiler, we can't allow the use of MMX
+ registers unless the user explicitly asks for it. So we do *not* define
+ vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
+ we have builtins invoked by mmintrin.h that gives us license to emit
+ these sorts of instructions. */
+
+static rtx
+ix86_expand_vec_init_builtin (tree type, tree arglist, rtx target)
+{
+ enum machine_mode tmode = TYPE_MODE (type);
+ enum machine_mode inner_mode = GET_MODE_INNER (tmode);
+ int i, n_elt = GET_MODE_NUNITS (tmode);
+ rtvec v = rtvec_alloc (n_elt);
+
+ gcc_assert (VECTOR_MODE_P (tmode));
+
+ for (i = 0; i < n_elt; ++i, arglist = TREE_CHAIN (arglist))
+ {
+ rtx x = expand_normal (TREE_VALUE (arglist));
+ RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
+ }
+
+ gcc_assert (arglist == NULL);
+
+ if (!target || !register_operand (target, tmode))
+ target = gen_reg_rtx (tmode);
+
+ ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
+ return target;
+}
+
+/* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
+ ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
+ had a language-level syntax for referencing vector elements. */
+
+static rtx
+ix86_expand_vec_ext_builtin (tree arglist, rtx target)
+{
+ enum machine_mode tmode, mode0;
+ tree arg0, arg1;
+ int elt;
+ rtx op0;
+
+ arg0 = TREE_VALUE (arglist);
+ arg1 = TREE_VALUE (TREE_CHAIN (arglist));
+
+ op0 = expand_normal (arg0);
+ elt = get_element_number (TREE_TYPE (arg0), arg1);
+
+ tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
+ mode0 = TYPE_MODE (TREE_TYPE (arg0));
+ gcc_assert (VECTOR_MODE_P (mode0));
+
+ op0 = force_reg (mode0, op0);
+
+ if (optimize || !target || !register_operand (target, tmode))
+ target = gen_reg_rtx (tmode);
+
+ ix86_expand_vector_extract (true, target, op0, elt);
+
+ return target;
+}
+
+/* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
+ ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
+ a language-level syntax for referencing vector elements. */
+
+static rtx
+ix86_expand_vec_set_builtin (tree arglist)
+{
+ enum machine_mode tmode, mode1;
+ tree arg0, arg1, arg2;
+ int elt;
+ rtx op0, op1, target;
+
+ arg0 = TREE_VALUE (arglist);
+ arg1 = TREE_VALUE (TREE_CHAIN (arglist));
+ arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
+
+ tmode = TYPE_MODE (TREE_TYPE (arg0));
+ mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
+ gcc_assert (VECTOR_MODE_P (tmode));
+
+ op0 = expand_expr (arg0, NULL_RTX, tmode, 0);
+ op1 = expand_expr (arg1, NULL_RTX, mode1, 0);
+ elt = get_element_number (TREE_TYPE (arg0), arg2);
+
+ if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
+ op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
+
+ op0 = force_reg (tmode, op0);
+ op1 = force_reg (mode1, op1);
+
+ /* OP0 is the source of these builtin functions and shouldn't be
+ modified. Create a copy, use it and return it as target. */
+ target = gen_reg_rtx (tmode);
+ emit_move_insn (target, op0);
+ ix86_expand_vector_set (true, target, op1, elt);
+
+ return target;
+}
+
+/* Expand an expression EXP that calls a built-in function,
+ with result going to TARGET if that's convenient
+ (and in mode MODE if that's convenient).
+ SUBTARGET may be used as the target for computing one of EXP's operands.
+ IGNORE is nonzero if the value is to be ignored. */
+
+static rtx
+ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
+ enum machine_mode mode ATTRIBUTE_UNUSED,
+ int ignore ATTRIBUTE_UNUSED)
+{
+ const struct builtin_description *d;
+ size_t i;
+ enum insn_code icode;
+ tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
+ tree arglist = TREE_OPERAND (exp, 1);
+ /* APPLE LOCAL begin 5612787 mainline sse4 */
+ tree arg0, arg1, arg2, arg3;
+ rtx op0, op1, op2, op3, pat;
+ /* APPLE LOCAL ssse3 */
+ enum machine_mode tmode, mode0, mode1, mode2, mode3, mode4;
+ /* APPLE LOCAL end 5612787 mainline sse4 */
+ unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
+
+ switch (fcode)
+ {
+ case IX86_BUILTIN_EMMS:
+ emit_insn (gen_mmx_emms ());
+ return 0;
+
+ case IX86_BUILTIN_SFENCE:
+ emit_insn (gen_sse_sfence ());
+ return 0;
+
+ case IX86_BUILTIN_MASKMOVQ:
+ case IX86_BUILTIN_MASKMOVDQU:
+ icode = (fcode == IX86_BUILTIN_MASKMOVQ
+ ? CODE_FOR_mmx_maskmovq
+ : CODE_FOR_sse2_maskmovdqu);
+ /* Note the arg order is different from the operand order. */
+ arg1 = TREE_VALUE (arglist);
+ arg2 = TREE_VALUE (TREE_CHAIN (arglist));
+ arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+ op2 = expand_normal (arg2);
+ mode0 = insn_data[icode].operand[0].mode;
+ mode1 = insn_data[icode].operand[1].mode;
+ mode2 = insn_data[icode].operand[2].mode;
+
+ op0 = force_reg (Pmode, op0);
+ op0 = gen_rtx_MEM (mode1, op0);
+
+ if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
+ op0 = copy_to_mode_reg (mode0, op0);
+ if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
+ op1 = copy_to_mode_reg (mode1, op1);
+ if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
+ op2 = copy_to_mode_reg (mode2, op2);
+ pat = GEN_FCN (icode) (op0, op1, op2);
+ if (! pat)
+ return 0;
+ emit_insn (pat);
+ return 0;
+
+ case IX86_BUILTIN_SQRTSS:
+ return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, arglist, target);
+ case IX86_BUILTIN_RSQRTSS:
+ return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, arglist, target);
+ case IX86_BUILTIN_RCPSS:
+ return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, arglist, target);
+
+ /* APPLE LOCAL begin 4099020 */
+ case IX86_BUILTIN_LOADQ:
+ return ix86_expand_unop_builtin (CODE_FOR_sse_loadqv4si, arglist, target, 1);
+
+ case IX86_BUILTIN_MOVQ:
+ return ix86_expand_unop_builtin (CODE_FOR_sse_movqv4si, arglist, target, 0);
+
+ case IX86_BUILTIN_STOREQ:
+ return ix86_expand_store_builtin (CODE_FOR_sse_storeqv4si, arglist);
+ /* APPLE LOCAL end 4099020 */
+
+ case IX86_BUILTIN_LOADUPS:
+ return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
+
+ case IX86_BUILTIN_STOREUPS:
+ return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
+
+ case IX86_BUILTIN_LOADHPS:
+ case IX86_BUILTIN_LOADLPS:
+ case IX86_BUILTIN_LOADHPD:
+ case IX86_BUILTIN_LOADLPD:
+ icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
+ : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
+ : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
+ : CODE_FOR_sse2_loadlpd);
+ arg0 = TREE_VALUE (arglist);
+ arg1 = TREE_VALUE (TREE_CHAIN (arglist));
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+ tmode = insn_data[icode].operand[0].mode;
+ mode0 = insn_data[icode].operand[1].mode;
+ mode1 = insn_data[icode].operand[2].mode;
+
+ op0 = force_reg (mode0, op0);
+ op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
+ if (optimize || target == 0
+ || GET_MODE (target) != tmode
+ || !register_operand (target, tmode))
+ target = gen_reg_rtx (tmode);
+ pat = GEN_FCN (icode) (target, op0, op1);
+ if (! pat)
+ return 0;
+ emit_insn (pat);
+ return target;
+
+ case IX86_BUILTIN_STOREHPS:
+ case IX86_BUILTIN_STORELPS:
+ icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
+ : CODE_FOR_sse_storelps);
+ arg0 = TREE_VALUE (arglist);
+ arg1 = TREE_VALUE (TREE_CHAIN (arglist));
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+ mode0 = insn_data[icode].operand[0].mode;
+ mode1 = insn_data[icode].operand[1].mode;
+
+ op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
+ op1 = force_reg (mode1, op1);
+
+ pat = GEN_FCN (icode) (op0, op1);
+ if (! pat)
+ return 0;
+ emit_insn (pat);
+ return const0_rtx;
+
+ case IX86_BUILTIN_MOVNTPS:
+ return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
+ case IX86_BUILTIN_MOVNTQ:
+ /* APPLE LOCAL 4656532 use V1DImode for _m64 */
+ return ix86_expand_store_builtin (CODE_FOR_sse_movntv1di, arglist);
+
+ case IX86_BUILTIN_LDMXCSR:
+ op0 = expand_normal (TREE_VALUE (arglist));
+ target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
+ emit_move_insn (target, op0);
+ emit_insn (gen_sse_ldmxcsr (target));
+ return 0;
+
+ case IX86_BUILTIN_STMXCSR:
+ target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
+ emit_insn (gen_sse_stmxcsr (target));
+ return copy_to_mode_reg (SImode, target);
+
+ case IX86_BUILTIN_SHUFPS:
+ case IX86_BUILTIN_SHUFPD:
+ icode = (fcode == IX86_BUILTIN_SHUFPS
+ ? CODE_FOR_sse_shufps
+ : CODE_FOR_sse2_shufpd);
+ arg0 = TREE_VALUE (arglist);
+ arg1 = TREE_VALUE (TREE_CHAIN (arglist));
+ arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+ op2 = expand_normal (arg2);
+ tmode = insn_data[icode].operand[0].mode;
+ mode0 = insn_data[icode].operand[1].mode;
+ mode1 = insn_data[icode].operand[2].mode;
+ mode2 = insn_data[icode].operand[3].mode;
+
+ if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
+ op0 = copy_to_mode_reg (mode0, op0);
+ if ((optimize && !register_operand (op1, mode1))
+ || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
+ op1 = copy_to_mode_reg (mode1, op1);
+ if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
+ {
+ /* @@@ better error message */
+ error ("mask must be an immediate");
+ return gen_reg_rtx (tmode);
+ }
+ if (optimize || target == 0
+ || GET_MODE (target) != tmode
+ || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+ target = gen_reg_rtx (tmode);
+ pat = GEN_FCN (icode) (target, op0, op1, op2);
+ if (! pat)
+ return 0;
+ emit_insn (pat);
+ return target;
+
+ case IX86_BUILTIN_PSHUFW:
+ case IX86_BUILTIN_PSHUFD:
+ case IX86_BUILTIN_PSHUFHW:
+ case IX86_BUILTIN_PSHUFLW:
+ icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
+ : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
+ : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
+ : CODE_FOR_mmx_pshufw);
+ arg0 = TREE_VALUE (arglist);
+ arg1 = TREE_VALUE (TREE_CHAIN (arglist));
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+ tmode = insn_data[icode].operand[0].mode;
+ mode1 = insn_data[icode].operand[1].mode;
+ mode2 = insn_data[icode].operand[2].mode;
+
+ if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
+ op0 = copy_to_mode_reg (mode1, op0);
+ if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
+ {
+ /* @@@ better error message */
+ error ("mask must be an immediate");
+ return const0_rtx;
+ }
+ if (target == 0
+ || GET_MODE (target) != tmode
+ || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+ target = gen_reg_rtx (tmode);
+ pat = GEN_FCN (icode) (target, op0, op1);
+ if (! pat)
+ return 0;
+ emit_insn (pat);
+ return target;
+
+ case IX86_BUILTIN_PSLLWI128:
+ icode = CODE_FOR_ashlv8hi3;
+ goto do_pshifti;
+ case IX86_BUILTIN_PSLLDI128:
+ icode = CODE_FOR_ashlv4si3;
+ goto do_pshifti;
+ case IX86_BUILTIN_PSLLQI128:
+ icode = CODE_FOR_ashlv2di3;
+ goto do_pshifti;
+ case IX86_BUILTIN_PSRAWI128:
+ icode = CODE_FOR_ashrv8hi3;
+ goto do_pshifti;
+ case IX86_BUILTIN_PSRADI128:
+ icode = CODE_FOR_ashrv4si3;
+ goto do_pshifti;
+ case IX86_BUILTIN_PSRLWI128:
+ icode = CODE_FOR_lshrv8hi3;
+ goto do_pshifti;
+ case IX86_BUILTIN_PSRLDI128:
+ icode = CODE_FOR_lshrv4si3;
+ goto do_pshifti;
+ case IX86_BUILTIN_PSRLQI128:
+ icode = CODE_FOR_lshrv2di3;
+ goto do_pshifti;
+ do_pshifti:
+ arg0 = TREE_VALUE (arglist);
+ arg1 = TREE_VALUE (TREE_CHAIN (arglist));
+ op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
+ op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
+
+ /* APPLE LOCAL begin radar 5543378 mainline candidate */
+ if (GET_CODE (op1) == CONST_INT)
+ {
+ if (INTVAL (op1) < 0 || INTVAL (op1) > 255)
+ op1 = GEN_INT (255);
+ }
+ else
+ {
+ mode2 = insn_data[icode].operand[2].mode;
+ if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
+ {
+ op1 = copy_to_reg (op1);
+ if (GET_MODE (op1) != mode2)
+ op1 = convert_to_mode (mode2, op1, 0);
+ }
+ }
+ /* APPLE LOCAL end radar 5543378 mainline candidate */
+
+ tmode = insn_data[icode].operand[0].mode;
+ mode1 = insn_data[icode].operand[1].mode;
+ if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
+ op0 = copy_to_reg (op0);
+
+ target = gen_reg_rtx (tmode);
+ pat = GEN_FCN (icode) (target, op0, op1);
+ if (!pat)
+ return 0;
+ emit_insn (pat);
+ return target;
+
+ case IX86_BUILTIN_PSLLW128:
+ icode = CODE_FOR_ashlv8hi3;
+ goto do_pshift;
+ case IX86_BUILTIN_PSLLD128:
+ icode = CODE_FOR_ashlv4si3;
+ goto do_pshift;
+ case IX86_BUILTIN_PSLLQ128:
+ icode = CODE_FOR_ashlv2di3;
+ goto do_pshift;
+ case IX86_BUILTIN_PSRAW128:
+ icode = CODE_FOR_ashrv8hi3;
+ goto do_pshift;
+ case IX86_BUILTIN_PSRAD128:
+ icode = CODE_FOR_ashrv4si3;
+ goto do_pshift;
+ case IX86_BUILTIN_PSRLW128:
+ icode = CODE_FOR_lshrv8hi3;
+ goto do_pshift;
+ case IX86_BUILTIN_PSRLD128:
+ icode = CODE_FOR_lshrv4si3;
+ goto do_pshift;
+ case IX86_BUILTIN_PSRLQ128:
+ icode = CODE_FOR_lshrv2di3;
+ goto do_pshift;
+ do_pshift:
+ arg0 = TREE_VALUE (arglist);
+ arg1 = TREE_VALUE (TREE_CHAIN (arglist));
+ op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
+ op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
+
+ tmode = insn_data[icode].operand[0].mode;
+ mode1 = insn_data[icode].operand[1].mode;
+
+ if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
+ op0 = copy_to_reg (op0);
+
+ op1 = simplify_gen_subreg (TImode, op1, GET_MODE (op1), 0);
+ if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
+ op1 = copy_to_reg (op1);
+
+ target = gen_reg_rtx (tmode);
+ pat = GEN_FCN (icode) (target, op0, op1);
+ if (!pat)
+ return 0;
+ emit_insn (pat);
+ return target;
+
+ /* APPLE LOCAL begin 5919583 */
+ case IX86_BUILTIN_PSLLDQI128:
+ case IX86_BUILTIN_PSRLDQI128:
+ case IX86_BUILTIN_PSLLDQI128_BYTESHIFT:
+ case IX86_BUILTIN_PSRLDQI128_BYTESHIFT:
+ icode = ((fcode == IX86_BUILTIN_PSLLDQI128
+ || fcode == IX86_BUILTIN_PSLLDQI128_BYTESHIFT)
+ ? CODE_FOR_sse2_ashlti3
+ : CODE_FOR_sse2_lshrti3);
+ /* APPLE LOCAL end 5919583 */
+ arg0 = TREE_VALUE (arglist);
+ arg1 = TREE_VALUE (TREE_CHAIN (arglist));
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+ tmode = insn_data[icode].operand[0].mode;
+ mode1 = insn_data[icode].operand[1].mode;
+ mode2 = insn_data[icode].operand[2].mode;
+
+ /* APPLE LOCAL begin 591583 */
+ if (! CONST_INT_P (op1))
+ {
+ error ("shift must be an immediate");
+ return const0_rtx;
+ }
+ /* The _mm_srli_si128/_mm_slli_si128 primitives are defined with
+ a byte-shift count; inside of GCC, we prefer to specify the
+ width of a shift in bits. The original non-BYTESHIFT
+ primitives were problematic due to the "*8" in their macro
+ bodies; we have moved the "*8" here to resolve this. The
+ original builtins are still supported because many developers
+ rely upon them. */
+ if (fcode == IX86_BUILTIN_PSLLDQI128_BYTESHIFT
+ || fcode == IX86_BUILTIN_PSRLDQI128_BYTESHIFT)
+ op1 = gen_rtx_CONST_INT (SImode, INTVAL (op1) * 8);
+ /* APPLE LOCAL end 591583 */
+ if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
+ {
+ op0 = copy_to_reg (op0);
+ op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
+ }
+ if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
+ {
+ error ("shift must be an immediate");
+ return const0_rtx;
+ }
+ target = gen_reg_rtx (V2DImode);
+ pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0),
+ op0, op1);
+ if (! pat)
+ return 0;
+ emit_insn (pat);
+ return target;
+
+ case IX86_BUILTIN_FEMMS:
+ emit_insn (gen_mmx_femms ());
+ return NULL_RTX;
+
+ case IX86_BUILTIN_PAVGUSB:
+ return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, arglist, target);
+
+ case IX86_BUILTIN_PF2ID:
+ return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, arglist, target, 0);
+
+ case IX86_BUILTIN_PFACC:
+ return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, arglist, target);
+
+ case IX86_BUILTIN_PFADD:
+ return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, arglist, target);
+
+ case IX86_BUILTIN_PFCMPEQ:
+ return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, arglist, target);
+
+ case IX86_BUILTIN_PFCMPGE:
+ return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, arglist, target);
+
+ case IX86_BUILTIN_PFCMPGT:
+ return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, arglist, target);
+
+ case IX86_BUILTIN_PFMAX:
+ return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, arglist, target);
+
+ case IX86_BUILTIN_PFMIN:
+ return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, arglist, target);
+
+ case IX86_BUILTIN_PFMUL:
+ return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, arglist, target);
+
+ case IX86_BUILTIN_PFRCP:
+ return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, arglist, target, 0);
+
+ case IX86_BUILTIN_PFRCPIT1:
+ return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, arglist, target);
+
+ case IX86_BUILTIN_PFRCPIT2:
+ return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, arglist, target);
+
+ case IX86_BUILTIN_PFRSQIT1:
+ return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, arglist, target);
+
+ case IX86_BUILTIN_PFRSQRT:
+ return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, arglist, target, 0);
+
+ case IX86_BUILTIN_PFSUB:
+ return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, arglist, target);
+
+ case IX86_BUILTIN_PFSUBR:
+ return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, arglist, target);
+
+ case IX86_BUILTIN_PI2FD:
+ return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, arglist, target, 0);
+
+ case IX86_BUILTIN_PMULHRW:
+ return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, arglist, target);
+
+ case IX86_BUILTIN_PF2IW:
+ return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, arglist, target, 0);
+
+ case IX86_BUILTIN_PFNACC:
+ return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, arglist, target);
+
+ case IX86_BUILTIN_PFPNACC:
+ return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, arglist, target);
+
+ case IX86_BUILTIN_PI2FW:
+ return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, arglist, target, 0);
+
+ case IX86_BUILTIN_PSWAPDSI:
+ return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, arglist, target, 0);
+
+ case IX86_BUILTIN_PSWAPDSF:
+ return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, arglist, target, 0);
+
+ case IX86_BUILTIN_SQRTSD:
+ return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, arglist, target);
+ case IX86_BUILTIN_LOADUPD:
+ return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
+ case IX86_BUILTIN_STOREUPD:
+ return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
+
+ case IX86_BUILTIN_MFENCE:
+ emit_insn (gen_sse2_mfence ());
+ return 0;
+ case IX86_BUILTIN_LFENCE:
+ emit_insn (gen_sse2_lfence ());
+ return 0;
+
+ case IX86_BUILTIN_CLFLUSH:
+ arg0 = TREE_VALUE (arglist);
+ op0 = expand_normal (arg0);
+ icode = CODE_FOR_sse2_clflush;
+ if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
+ op0 = copy_to_mode_reg (Pmode, op0);
+
+ emit_insn (gen_sse2_clflush (op0));
+ return 0;
+
+ case IX86_BUILTIN_MOVNTPD:
+ return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
+ case IX86_BUILTIN_MOVNTDQ:
+ return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
+ case IX86_BUILTIN_MOVNTI:
+ return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
+
+ case IX86_BUILTIN_LOADDQU:
+ return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
+ case IX86_BUILTIN_STOREDQU:
+ return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
+
+ case IX86_BUILTIN_MONITOR:
+ arg0 = TREE_VALUE (arglist);
+ arg1 = TREE_VALUE (TREE_CHAIN (arglist));
+ arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+ op2 = expand_normal (arg2);
+ if (!REG_P (op0))
+ op0 = copy_to_mode_reg (Pmode, op0);
+ if (!REG_P (op1))
+ op1 = copy_to_mode_reg (SImode, op1);
+ if (!REG_P (op2))
+ op2 = copy_to_mode_reg (SImode, op2);
+ if (!TARGET_64BIT)
+ emit_insn (gen_sse3_monitor (op0, op1, op2));
+ else
+ emit_insn (gen_sse3_monitor64 (op0, op1, op2));
+ return 0;
+
+ case IX86_BUILTIN_MWAIT:
+ arg0 = TREE_VALUE (arglist);
+ arg1 = TREE_VALUE (TREE_CHAIN (arglist));
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+ if (!REG_P (op0))
+ op0 = copy_to_mode_reg (SImode, op0);
+ if (!REG_P (op1))
+ op1 = copy_to_mode_reg (SImode, op1);
+ emit_insn (gen_sse3_mwait (op0, op1));
+ return 0;
+
+ case IX86_BUILTIN_LDDQU:
+ return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, arglist,
+ target, 1);
+ /* APPLE LOCAL begin mainline */
+ case IX86_BUILTIN_PALIGNR:
+ case IX86_BUILTIN_PALIGNR128:
+ if (fcode == IX86_BUILTIN_PALIGNR)
+ {
+ /* APPLE LOCAL begin 4656532 use V1DImode for _m64 */
+ icode = CODE_FOR_ssse3_palignrv1di;
+ mode = V1DImode;
+ /* APPLE LOCAL end 4656532 use V1DImode for _m64 */
+ }
+ else
+ {
+ icode = CODE_FOR_ssse3_palignrti;
+ mode = V2DImode;
+ }
+ arg0 = TREE_VALUE (arglist);
+ arg1 = TREE_VALUE (TREE_CHAIN (arglist));
+ arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
+ op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
+ op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
+ op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
+ tmode = insn_data[icode].operand[0].mode;
+ mode1 = insn_data[icode].operand[1].mode;
+ mode2 = insn_data[icode].operand[2].mode;
+ mode3 = insn_data[icode].operand[3].mode;
+
+ if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
+ {
+ op0 = copy_to_reg (op0);
+ op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
+ }
+ if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
+ {
+ op1 = copy_to_reg (op1);
+ op1 = simplify_gen_subreg (mode2, op1, GET_MODE (op1), 0);
+ }
+ if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
+ {
+ error ("shift must be an immediate");
+ return const0_rtx;
+ }
+ target = gen_reg_rtx (mode);
+ pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, mode, 0),
+ op0, op1, op2);
+ if (! pat)
+ return 0;
+ emit_insn (pat);
+ return target;
+
+ /* APPLE LOCAL end mainline */
+ /* APPLE LOCAL begin 5612787 mainline sse4 */
+ case IX86_BUILTIN_MOVNTDQA:
+ return ix86_expand_unop_builtin (CODE_FOR_sse4_1_movntdqa, arglist,
+ target, 1);
+
+ case IX86_BUILTIN_MOVNTSD:
+ return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df, exp);
+
+ case IX86_BUILTIN_MOVNTSS:
+ return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv4sf, exp);
+
+ case IX86_BUILTIN_INSERTQ:
+ case IX86_BUILTIN_EXTRQ:
+ icode = (fcode == IX86_BUILTIN_EXTRQ
+ ? CODE_FOR_sse4a_extrq
+ : CODE_FOR_sse4a_insertq);
+ arg0 = TREE_VALUE (arglist);
+ arg1 = TREE_VALUE (TREE_CHAIN (arglist));
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+ tmode = insn_data[icode].operand[0].mode;
+ mode1 = insn_data[icode].operand[1].mode;
+ mode2 = insn_data[icode].operand[2].mode;
+ if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
+ op0 = copy_to_mode_reg (mode1, op0);
+ if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
+ op1 = copy_to_mode_reg (mode2, op1);
+ if (optimize || target == 0
+ || GET_MODE (target) != tmode
+ || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+ target = gen_reg_rtx (tmode);
+ pat = GEN_FCN (icode) (target, op0, op1);
+ if (! pat)
+ return NULL_RTX;
+ emit_insn (pat);
+ return target;
+
+ case IX86_BUILTIN_EXTRQI:
+ icode = CODE_FOR_sse4a_extrqi;
+ arg0 = TREE_VALUE (arglist);
+ arg1 = TREE_VALUE (TREE_CHAIN (arglist));
+ arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+ op2 = expand_normal (arg2);
+ tmode = insn_data[icode].operand[0].mode;
+ mode1 = insn_data[icode].operand[1].mode;
+ mode2 = insn_data[icode].operand[2].mode;
+ mode3 = insn_data[icode].operand[3].mode;
+ if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
+ op0 = copy_to_mode_reg (mode1, op0);
+ if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
+ {
+ error ("index mask must be an immediate");
+ return gen_reg_rtx (tmode);
+ }
+ if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
+ {
+ error ("length mask must be an immediate");
+ return gen_reg_rtx (tmode);
+ }
+ if (optimize || target == 0
+ || GET_MODE (target) != tmode
+ || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+ target = gen_reg_rtx (tmode);
+ pat = GEN_FCN (icode) (target, op0, op1, op2);
+ if (! pat)
+ return NULL_RTX;
+ emit_insn (pat);
+ return target;
+
+ case IX86_BUILTIN_INSERTQI:
+ icode = CODE_FOR_sse4a_insertqi;
+ arg0 = TREE_VALUE (exp);
+ arg1 = TREE_VALUE (TREE_CHAIN (arglist));
+ arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
+ arg3 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+ op2 = expand_normal (arg2);
+ op3 = expand_normal (arg3);
+ tmode = insn_data[icode].operand[0].mode;
+ mode1 = insn_data[icode].operand[1].mode;
+ mode2 = insn_data[icode].operand[2].mode;
+ mode3 = insn_data[icode].operand[3].mode;
+ mode4 = insn_data[icode].operand[4].mode;
+
+ if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
+ op0 = copy_to_mode_reg (mode1, op0);
+
+ if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
+ op1 = copy_to_mode_reg (mode2, op1);
+
+ if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
+ {
+ error ("index mask must be an immediate");
+ return gen_reg_rtx (tmode);
+ }
+ if (! (*insn_data[icode].operand[4].predicate) (op3, mode4))
+ {
+ error ("length mask must be an immediate");
+ return gen_reg_rtx (tmode);
+ }
+ if (optimize || target == 0
+ || GET_MODE (target) != tmode
+ || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+ target = gen_reg_rtx (tmode);
+ pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
+ if (! pat)
+ return NULL_RTX;
+ emit_insn (pat);
+ return target;
+ /* APPLE LOCAL end 5612787 mainline sse4 */
+
+ case IX86_BUILTIN_VEC_INIT_V2SI:
+ case IX86_BUILTIN_VEC_INIT_V4HI:
+ case IX86_BUILTIN_VEC_INIT_V8QI:
+ return ix86_expand_vec_init_builtin (TREE_TYPE (exp), arglist, target);
+
+ case IX86_BUILTIN_VEC_EXT_V2DF:
+ case IX86_BUILTIN_VEC_EXT_V2DI:
+ case IX86_BUILTIN_VEC_EXT_V4SF:
+ case IX86_BUILTIN_VEC_EXT_V4SI:
+ case IX86_BUILTIN_VEC_EXT_V8HI:
+ case IX86_BUILTIN_VEC_EXT_V16QI:
+ case IX86_BUILTIN_VEC_EXT_V2SI:
+ case IX86_BUILTIN_VEC_EXT_V4HI:
+ return ix86_expand_vec_ext_builtin (arglist, target);
+
+ /* APPLE LOCAL begin 5612787 mainline sse4 */
+ case IX86_BUILTIN_VEC_SET_V2DI:
+ case IX86_BUILTIN_VEC_SET_V4SF:
+ case IX86_BUILTIN_VEC_SET_V4SI:
+ /* APPLE LOCAL end 5612787 mainline sse4 */
+ case IX86_BUILTIN_VEC_SET_V8HI:
+ case IX86_BUILTIN_VEC_SET_V4HI:
+ /* APPLE LOCAL 5612787 mainline sse4 */
+ case IX86_BUILTIN_VEC_SET_V16QI:
+ return ix86_expand_vec_set_builtin (arglist);
+
+ /* APPLE LOCAL begin 5612787 mainline sse4 */
+ case IX86_BUILTIN_INFQ:
+ {
+ REAL_VALUE_TYPE inf;
+ rtx tmp;
+
+ real_inf (&inf);
+ tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
+
+ tmp = validize_mem (force_const_mem (mode, tmp));
+
+ if (target == 0)
+ target = gen_reg_rtx (mode);
+
+ emit_move_insn (target, tmp);
+ return target;
+ }
+
+ case IX86_BUILTIN_FABSQ:
+ return ix86_expand_unop_builtin (CODE_FOR_abstf2, arglist, target, 0);
+ /* APPLE LOCAL end 5612787 mainline sse4 */
+
+ default:
+ break;
+ }
+
+ /* APPLE LOCAL begin 5612787 mainline sse4 */
+ for (i = 0, d = bdesc_sse_3arg;
+ i < ARRAY_SIZE (bdesc_sse_3arg);
+ i++, d++)
+ if (d->code == fcode)
+ return ix86_expand_sse_4_operands_builtin (d->icode,
+ arglist,
+ target);
+ /* APPLE LOCAL end 5612787 mainline sse4 */
+
+ for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
+ if (d->code == fcode)
+ {
+ /* Compares are treated specially. */
+ if (d->icode == CODE_FOR_sse_maskcmpv4sf3
+ || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
+ || d->icode == CODE_FOR_sse2_maskcmpv2df3
+ || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
+ return ix86_expand_sse_compare (d, arglist, target);
+
+ return ix86_expand_binop_builtin (d->icode, arglist, target);
+ }
+
+ for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
+ if (d->code == fcode)
+ return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
+
+ for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
+ if (d->code == fcode)
+ return ix86_expand_sse_comi (d, arglist, target);
+
+ /* APPLE LOCAL begin 4299257 */
+ for (i = 0, d = bdesc_ucomi; i < ARRAY_SIZE (bdesc_ucomi); i++, d++)
+ if (d->code == fcode)
+ return ix86_expand_sse_ucomi (d, arglist, target);
+ /* APPLE LOCAL end 4299257 */
+
+ /* APPLE LOCAL begin 5612787 mainline sse4 */
+ for (i = 0, d = bdesc_ptest; i < ARRAY_SIZE (bdesc_ptest); i++, d++)
+ if (d->code == fcode)
+ return ix86_expand_sse_ptest (d, arglist, target);
+
+ for (i = 0, d = bdesc_crc32; i < ARRAY_SIZE (bdesc_crc32); i++, d++)
+ if (d->code == fcode)
+ return ix86_expand_crc32 (d->icode, arglist, target);
+
+ for (i = 0, d = bdesc_pcmpestr;
+ i < ARRAY_SIZE (bdesc_pcmpestr);
+ i++, d++)
+ if (d->code == fcode)
+ return ix86_expand_sse_pcmpestr (d, arglist, target);
+
+ for (i = 0, d = bdesc_pcmpistr;
+ i < ARRAY_SIZE (bdesc_pcmpistr);
+ i++, d++)
+ if (d->code == fcode)
+ return ix86_expand_sse_pcmpistr (d, arglist, target);
+ /* APPLE LOCAL end 5612787 mainline sse4 */
+
+ gcc_unreachable ();
+}
+
+/* Store OPERAND to the memory after reload is completed. This means
+ that we can't easily use assign_stack_local. */
+rtx
+ix86_force_to_memory (enum machine_mode mode, rtx operand)
+{
+ rtx result;
+
+ gcc_assert (reload_completed);
+ if (TARGET_RED_ZONE)
+ {
+ result = gen_rtx_MEM (mode,
+ gen_rtx_PLUS (Pmode,
+ stack_pointer_rtx,
+ GEN_INT (-RED_ZONE_SIZE)));
+ emit_move_insn (result, operand);
+ }
+ else if (!TARGET_RED_ZONE && TARGET_64BIT)
+ {
+ switch (mode)
+ {
+ case HImode:
+ case SImode:
+ operand = gen_lowpart (DImode, operand);
+ /* FALLTHRU */
+ case DImode:
+ emit_insn (
+ gen_rtx_SET (VOIDmode,
+ gen_rtx_MEM (DImode,
+ gen_rtx_PRE_DEC (DImode,
+ stack_pointer_rtx)),
+ operand));
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ result = gen_rtx_MEM (mode, stack_pointer_rtx);
+ }
+ else
+ {
+ switch (mode)
+ {
+ case DImode:
+ {
+ rtx operands[2];
+ split_di (&operand, 1, operands, operands + 1);
+ emit_insn (
+ gen_rtx_SET (VOIDmode,
+ gen_rtx_MEM (SImode,
+ gen_rtx_PRE_DEC (Pmode,
+ stack_pointer_rtx)),
+ operands[1]));
+ emit_insn (
+ gen_rtx_SET (VOIDmode,
+ gen_rtx_MEM (SImode,
+ gen_rtx_PRE_DEC (Pmode,
+ stack_pointer_rtx)),
+ operands[0]));
+ }
+ break;
+ case HImode:
+ /* Store HImodes as SImodes. */
+ operand = gen_lowpart (SImode, operand);
+ /* FALLTHRU */
+ case SImode:
+ emit_insn (
+ gen_rtx_SET (VOIDmode,
+ gen_rtx_MEM (GET_MODE (operand),
+ gen_rtx_PRE_DEC (SImode,
+ stack_pointer_rtx)),
+ operand));
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ result = gen_rtx_MEM (mode, stack_pointer_rtx);
+ }
+ return result;
+}
+
+/* Free operand from the memory. */
+void
+ix86_free_from_memory (enum machine_mode mode)
+{
+ if (!TARGET_RED_ZONE)
+ {
+ int size;
+
+ if (mode == DImode || TARGET_64BIT)
+ size = 8;
+ else
+ size = 4;
+ /* Use LEA to deallocate stack space. In peephole2 it will be converted
+ to pop or add instruction if registers are available. */
+ emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+ gen_rtx_PLUS (Pmode, stack_pointer_rtx,
+ GEN_INT (size))));
+ }
+}
+
+/* Put float CONST_DOUBLE in the constant pool instead of fp regs.
+ QImode must go into class Q_REGS.
+ Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
+ movdf to do mem-to-mem moves through integer regs. */
+enum reg_class
+ix86_preferred_reload_class (rtx x, enum reg_class class)
+{
+ enum machine_mode mode = GET_MODE (x);
+
+ /* We're only allowed to return a subclass of CLASS. Many of the
+ following checks fail for NO_REGS, so eliminate that early. */
+ if (class == NO_REGS)
+ return NO_REGS;
+
+ /* All classes can load zeros. */
+ if (x == CONST0_RTX (mode))
+ return class;
+
+ /* Force constants into memory if we are loading a (nonzero) constant into
+ an MMX or SSE register. This is because there are no MMX/SSE instructions
+ to load from a constant. */
+ if (CONSTANT_P (x)
+ && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)))
+ return NO_REGS;
+
+ /* APPLE LOCAL begin */
+ /* MERGE FIXME - ensure that 3501055 is fixed. */
+ /* MERGE FIXME - ensure that 4206991 is fixed. */
+ /* APPLE LOCAL end */
+ /* Prefer SSE regs only, if we can use them for math. */
+ if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
+ return SSE_CLASS_P (class) ? class : NO_REGS;
+
+ /* Floating-point constants need more complex checks. */
+ if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
+ {
+ /* General regs can load everything. */
+ if (reg_class_subset_p (class, GENERAL_REGS))
+ return class;
+
+ /* Floats can load 0 and 1 plus some others. Note that we eliminated
+ zero above. We only want to wind up preferring 80387 registers if
+ we plan on doing computation with them. */
+ if (TARGET_80387
+ && standard_80387_constant_p (x))
+ {
+ /* Limit class to non-sse. */
+ if (class == FLOAT_SSE_REGS)
+ return FLOAT_REGS;
+ if (class == FP_TOP_SSE_REGS)
+ return FP_TOP_REG;
+ if (class == FP_SECOND_SSE_REGS)
+ return FP_SECOND_REG;
+ if (class == FLOAT_INT_REGS || class == FLOAT_REGS)
+ return class;
+ }
+
+ return NO_REGS;
+ }
+
+ /* Generally when we see PLUS here, it's the function invariant
+ (plus soft-fp const_int). Which can only be computed into general
+ regs. */
+ if (GET_CODE (x) == PLUS)
+ return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS;
+
+ /* QImode constants are easy to load, but non-constant QImode data
+ must go into Q_REGS. */
+ if (GET_MODE (x) == QImode && !CONSTANT_P (x))
+ {
+ if (reg_class_subset_p (class, Q_REGS))
+ return class;
+ if (reg_class_subset_p (Q_REGS, class))
+ return Q_REGS;
+ return NO_REGS;
+ }
+
+ return class;
+}
+
+/* Discourage putting floating-point values in SSE registers unless
+ SSE math is being used, and likewise for the 387 registers. */
+enum reg_class
+ix86_preferred_output_reload_class (rtx x, enum reg_class class)
+{
+ enum machine_mode mode = GET_MODE (x);
+
+ /* Restrict the output reload class to the register bank that we are doing
+ math on. If we would like not to return a subset of CLASS, reject this
+ alternative: if reload cannot do this, it will still use its choice. */
+ mode = GET_MODE (x);
+ if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
+ return MAYBE_SSE_CLASS_P (class) ? SSE_REGS : NO_REGS;
+
+ if (TARGET_80387 && SCALAR_FLOAT_MODE_P (mode))
+ {
+ if (class == FP_TOP_SSE_REGS)
+ return FP_TOP_REG;
+ else if (class == FP_SECOND_SSE_REGS)
+ return FP_SECOND_REG;
+ else
+ return FLOAT_CLASS_P (class) ? class : NO_REGS;
+ }
+
+ return class;
+}
+
+/* If we are copying between general and FP registers, we need a memory
+ location. The same is true for SSE and MMX registers.
+
+ The macro can't work reliably when one of the CLASSES is class containing
+ registers from multiple units (SSE, MMX, integer). We avoid this by never
+ combining those units in single alternative in the machine description.
+ Ensure that this constraint holds to avoid unexpected surprises.
+
+ When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
+ enforce these sanity checks. */
+
+int
+ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
+ enum machine_mode mode, int strict)
+{
+ if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
+ || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
+ || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
+ || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
+ || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
+ || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
+ {
+ gcc_assert (!strict);
+ return true;
+ }
+
+ if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
+ return true;
+
+ /* ??? This is a lie. We do have moves between mmx/general, and for
+ mmx/sse2. But by saying we need secondary memory we discourage the
+ register allocator from using the mmx registers unless needed. */
+ if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
+ return true;
+
+ if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
+ {
+ /* SSE1 doesn't have any direct moves from other classes. */
+ if (!TARGET_SSE2)
+ return true;
+
+ /* If the target says that inter-unit moves are more expensive
+ than moving through memory, then don't generate them. */
+ if (!TARGET_INTER_UNIT_MOVES && !optimize_size)
+ return true;
+
+ /* Between SSE and general, we have moves no larger than word size. */
+ if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
+ return true;
+
+ /* ??? For the cost of one register reformat penalty, we could use
+ the same instructions to move SFmode and DFmode data, but the
+ relevant move patterns don't support those alternatives. */
+ if (mode == SFmode || mode == DFmode)
+ return true;
+ }
+
+ return false;
+}
+
+/* Return true if the registers in CLASS cannot represent the change from
+ modes FROM to TO. */
+
+bool
+ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
+ enum reg_class class)
+{
+ if (from == to)
+ return false;
+
+ /* x87 registers can't do subreg at all, as all values are reformatted
+ to extended precision. */
+ if (MAYBE_FLOAT_CLASS_P (class))
+ return true;
+
+ if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
+ {
+ /* Vector registers do not support QI or HImode loads. If we don't
+ disallow a change to these modes, reload will assume it's ok to
+ drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
+ the vec_dupv4hi pattern. */
+ if (GET_MODE_SIZE (from) < 4)
+ return true;
+
+ /* Vector registers do not support subreg with nonzero offsets, which
+ are otherwise valid for integer registers. Since we can't see
+ whether we have a nonzero offset from here, prohibit all
+ nonparadoxical subregs changing size. */
+ if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
+ return true;
+ }
+
+ return false;
+}
+
+/* Return the cost of moving data from a register in class CLASS1 to
+ one in class CLASS2.
+
+ It is not required that the cost always equal 2 when FROM is the same as TO;
+ on some machines it is expensive to move between registers if they are not
+ general registers. */
+
+int
+ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
+ enum reg_class class2)
+{
+ /* In case we require secondary memory, compute cost of the store followed
+ by load. In order to avoid bad register allocation choices, we need
+ for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
+
+ if (ix86_secondary_memory_needed (class1, class2, mode, 0))
+ {
+ int cost = 1;
+
+ cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
+ MEMORY_MOVE_COST (mode, class1, 1));
+ cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
+ MEMORY_MOVE_COST (mode, class2, 1));
+
+ /* In case of copying from general_purpose_register we may emit multiple
+ stores followed by single load causing memory size mismatch stall.
+ Count this as arbitrarily high cost of 20. */
+ if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
+ cost += 20;
+
+ /* In the case of FP/MMX moves, the registers actually overlap, and we
+ have to switch modes in order to treat them differently. */
+ if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
+ || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
+ cost += 20;
+
+ return cost;
+ }
+
+ /* Moves between SSE/MMX and integer unit are expensive. */
+ if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
+ || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
+ return ix86_cost->mmxsse_to_integer;
+ if (MAYBE_FLOAT_CLASS_P (class1))
+ return ix86_cost->fp_move;
+ if (MAYBE_SSE_CLASS_P (class1))
+ return ix86_cost->sse_move;
+ if (MAYBE_MMX_CLASS_P (class1))
+ return ix86_cost->mmx_move;
+ return 2;
+}
+
+/* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
+
+bool
+ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
+{
+ /* Flags and only flags can only hold CCmode values. */
+ if (CC_REGNO_P (regno))
+ return GET_MODE_CLASS (mode) == MODE_CC;
+ if (GET_MODE_CLASS (mode) == MODE_CC
+ || GET_MODE_CLASS (mode) == MODE_RANDOM
+ || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
+ return 0;
+ if (FP_REGNO_P (regno))
+ return VALID_FP_MODE_P (mode);
+ if (SSE_REGNO_P (regno))
+ {
+ /* We implement the move patterns for all vector modes into and
+ out of SSE registers, even when no operation instructions
+ are available. */
+ return (VALID_SSE_REG_MODE (mode)
+ || VALID_SSE2_REG_MODE (mode)
+ || VALID_MMX_REG_MODE (mode)
+ || VALID_MMX_REG_MODE_3DNOW (mode));
+ }
+ if (MMX_REGNO_P (regno))
+ {
+ /* We implement the move patterns for 3DNOW modes even in MMX mode,
+ so if the register is available at all, then we can move data of
+ the given mode into or out of it. */
+ return (VALID_MMX_REG_MODE (mode)
+ || VALID_MMX_REG_MODE_3DNOW (mode));
+ }
+
+ if (mode == QImode)
+ {
+ /* Take care for QImode values - they can be in non-QI regs,
+ but then they do cause partial register stalls. */
+ if (regno < 4 || TARGET_64BIT)
+ return 1;
+ if (!TARGET_PARTIAL_REG_STALL)
+ return 1;
+ return reload_in_progress || reload_completed;
+ }
+ /* We handle both integer and floats in the general purpose registers. */
+ else if (VALID_INT_MODE_P (mode))
+ return 1;
+ else if (VALID_FP_MODE_P (mode))
+ return 1;
+ /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
+ on to use that value in smaller contexts, this can easily force a
+ pseudo to be allocated to GENERAL_REGS. Since this is no worse than
+ supporting DImode, allow it. */
+ else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
+ return 1;
+
+ return 0;
+}
+
+/* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
+ tieable integer mode. */
+
+static bool
+ix86_tieable_integer_mode_p (enum machine_mode mode)
+{
+ switch (mode)
+ {
+ case HImode:
+ case SImode:
+ return true;
+
+ case QImode:
+ return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
+
+ case DImode:
+ /* APPLE LOCAL 5695218 convert int to logical bool */
+ return !!TARGET_64BIT;
+
+ default:
+ return false;
+ }
+}
+
+/* Return true if MODE1 is accessible in a register that can hold MODE2
+ without copying. That is, all register classes that can hold MODE2
+ can also hold MODE1. */
+
+bool
+ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
+{
+ if (mode1 == mode2)
+ return true;
+
+ if (ix86_tieable_integer_mode_p (mode1)
+ && ix86_tieable_integer_mode_p (mode2))
+ return true;
+
+ /* MODE2 being XFmode implies fp stack or general regs, which means we
+ can tie any smaller floating point modes to it. Note that we do not
+ tie this with TFmode. */
+ if (mode2 == XFmode)
+ return mode1 == SFmode || mode1 == DFmode;
+
+ /* MODE2 being DFmode implies fp stack, general or sse regs, which means
+ that we can tie it with SFmode. */
+ if (mode2 == DFmode)
+ return mode1 == SFmode;
+
+ /* If MODE2 is only appropriate for an SSE register, then tie with
+ any other mode acceptable to SSE registers. */
+ if (GET_MODE_SIZE (mode2) >= 8
+ && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
+ return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1);
+
+ /* If MODE2 is appropriate for an MMX (or SSE) register, then tie
+ with any other mode acceptable to MMX registers. */
+ if (GET_MODE_SIZE (mode2) == 8
+ && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
+ return ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1);
+
+ return false;
+}
+
+/* Return the cost of moving data of mode M between a
+ register and memory. A value of 2 is the default; this cost is
+ relative to those in `REGISTER_MOVE_COST'.
+
+ If moving between registers and memory is more expensive than
+ between two registers, you should define this macro to express the
+ relative cost.
+
+ Model also increased moving costs of QImode registers in non
+ Q_REGS classes.
+ */
+int
+ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
+{
+ if (FLOAT_CLASS_P (class))
+ {
+ int index;
+ switch (mode)
+ {
+ case SFmode:
+ index = 0;
+ break;
+ case DFmode:
+ index = 1;
+ break;
+ case XFmode:
+ index = 2;
+ break;
+ default:
+ return 100;
+ }
+ return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
+ }
+ if (SSE_CLASS_P (class))
+ {
+ int index;
+ switch (GET_MODE_SIZE (mode))
+ {
+ case 4:
+ index = 0;
+ break;
+ case 8:
+ index = 1;
+ break;
+ case 16:
+ index = 2;
+ break;
+ default:
+ return 100;
+ }
+ return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
+ }
+ if (MMX_CLASS_P (class))
+ {
+ int index;
+ switch (GET_MODE_SIZE (mode))
+ {
+ case 4:
+ index = 0;
+ break;
+ case 8:
+ index = 1;
+ break;
+ default:
+ return 100;
+ }
+ return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
+ }
+ switch (GET_MODE_SIZE (mode))
+ {
+ case 1:
+ if (in)
+ return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
+ : ix86_cost->movzbl_load);
+ else
+ return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
+ : ix86_cost->int_store[0] + 4);
+ break;
+ case 2:
+ return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
+ default:
+ /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
+ if (mode == TFmode)
+ mode = XFmode;
+ return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
+ * (((int) GET_MODE_SIZE (mode)
+ + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
+ }
+}
+
+/* Compute a (partial) cost for rtx X. Return true if the complete
+ cost has been computed, and false if subexpressions should be
+ scanned. In either case, *TOTAL contains the cost result. */
+
+static bool
+ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
+{
+ enum machine_mode mode = GET_MODE (x);
+
+ switch (code)
+ {
+ case CONST_INT:
+ case CONST:
+ case LABEL_REF:
+ case SYMBOL_REF:
+ if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
+ *total = 3;
+ else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
+ *total = 2;
+ else if (flag_pic && SYMBOLIC_CONST (x)
+ && (!TARGET_64BIT
+ || (!GET_CODE (x) != LABEL_REF
+ && (GET_CODE (x) != SYMBOL_REF
+ || !SYMBOL_REF_LOCAL_P (x)))))
+ *total = 1;
+ else
+ *total = 0;
+ return true;
+
+ case CONST_DOUBLE:
+ if (mode == VOIDmode)
+ *total = 0;
+ else
+ switch (standard_80387_constant_p (x))
+ {
+ case 1: /* 0.0 */
+ *total = 1;
+ break;
+ default: /* Other constants */
+ *total = 2;
+ break;
+ case 0:
+ case -1:
+ /* Start with (MEM (SYMBOL_REF)), since that's where
+ it'll probably end up. Add a penalty for size. */
+ *total = (COSTS_N_INSNS (1)
+ + (flag_pic != 0 && !TARGET_64BIT)
+ + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
+ break;
+ }
+ return true;
+
+ case ZERO_EXTEND:
+ /* The zero extensions is often completely free on x86_64, so make
+ it as cheap as possible. */
+ if (TARGET_64BIT && mode == DImode
+ && GET_MODE (XEXP (x, 0)) == SImode)
+ *total = 1;
+ else if (TARGET_ZERO_EXTEND_WITH_AND)
+ *total = ix86_cost->add;
+ else
+ *total = ix86_cost->movzx;
+ return false;
+
+ case SIGN_EXTEND:
+ *total = ix86_cost->movsx;
+ return false;
+
+ case ASHIFT:
+ if (GET_CODE (XEXP (x, 1)) == CONST_INT
+ && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
+ {
+ HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
+ if (value == 1)
+ {
+ *total = ix86_cost->add;
+ return false;
+ }
+ if ((value == 2 || value == 3)
+ && ix86_cost->lea <= ix86_cost->shift_const)
+ {
+ *total = ix86_cost->lea;
+ return false;
+ }
+ }
+ /* FALLTHRU */
+
+ case ROTATE:
+ case ASHIFTRT:
+ case LSHIFTRT:
+ case ROTATERT:
+ if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
+ {
+ if (GET_CODE (XEXP (x, 1)) == CONST_INT)
+ {
+ if (INTVAL (XEXP (x, 1)) > 32)
+ *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
+ else
+ *total = ix86_cost->shift_const * 2;
+ }
+ else
+ {
+ if (GET_CODE (XEXP (x, 1)) == AND)
+ *total = ix86_cost->shift_var * 2;
+ else
+ *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
+ }
+ }
+ else
+ {
+ if (GET_CODE (XEXP (x, 1)) == CONST_INT)
+ *total = ix86_cost->shift_const;
+ else
+ *total = ix86_cost->shift_var;
+ }
+ return false;
+
+ case MULT:
+ if (FLOAT_MODE_P (mode))
+ {
+ *total = ix86_cost->fmul;
+ return false;
+ }
+ else
+ {
+ rtx op0 = XEXP (x, 0);
+ rtx op1 = XEXP (x, 1);
+ int nbits;
+ if (GET_CODE (XEXP (x, 1)) == CONST_INT)
+ {
+ unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
+ for (nbits = 0; value != 0; value &= value - 1)
+ nbits++;
+ }
+ else
+ /* This is arbitrary. */
+ nbits = 7;
+
+ /* Compute costs correctly for widening multiplication. */
+ if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
+ && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
+ == GET_MODE_SIZE (mode))
+ {
+ int is_mulwiden = 0;
+ enum machine_mode inner_mode = GET_MODE (op0);
+
+ if (GET_CODE (op0) == GET_CODE (op1))
+ is_mulwiden = 1, op1 = XEXP (op1, 0);
+ else if (GET_CODE (op1) == CONST_INT)
+ {
+ if (GET_CODE (op0) == SIGN_EXTEND)
+ is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
+ == INTVAL (op1);
+ else
+ is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
+ }
+
+ if (is_mulwiden)
+ op0 = XEXP (op0, 0), mode = GET_MODE (op0);
+ }
+
+ *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
+ + nbits * ix86_cost->mult_bit
+ + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
+
+ return true;
+ }
+
+ case DIV:
+ case UDIV:
+ case MOD:
+ case UMOD:
+ if (FLOAT_MODE_P (mode))
+ *total = ix86_cost->fdiv;
+ else
+ *total = ix86_cost->divide[MODE_INDEX (mode)];
+ return false;
+
+ case PLUS:
+ if (FLOAT_MODE_P (mode))
+ *total = ix86_cost->fadd;
+ else if (GET_MODE_CLASS (mode) == MODE_INT
+ && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
+ {
+ if (GET_CODE (XEXP (x, 0)) == PLUS
+ && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
+ && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
+ && CONSTANT_P (XEXP (x, 1)))
+ {
+ HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
+ if (val == 2 || val == 4 || val == 8)
+ {
+ *total = ix86_cost->lea;
+ *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
+ *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
+ outer_code);
+ *total += rtx_cost (XEXP (x, 1), outer_code);
+ return true;
+ }
+ }
+ else if (GET_CODE (XEXP (x, 0)) == MULT
+ && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
+ {
+ HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
+ if (val == 2 || val == 4 || val == 8)
+ {
+ *total = ix86_cost->lea;
+ *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
+ *total += rtx_cost (XEXP (x, 1), outer_code);
+ return true;
+ }
+ }
+ else if (GET_CODE (XEXP (x, 0)) == PLUS)
+ {
+ *total = ix86_cost->lea;
+ *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
+ *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
+ *total += rtx_cost (XEXP (x, 1), outer_code);
+ return true;
+ }
+ }
+ /* FALLTHRU */
+
+ case MINUS:
+ if (FLOAT_MODE_P (mode))
+ {
+ *total = ix86_cost->fadd;
+ return false;
+ }
+ /* FALLTHRU */
+
+ case AND:
+ case IOR:
+ case XOR:
+ if (!TARGET_64BIT && mode == DImode)
+ {
+ *total = (ix86_cost->add * 2
+ + (rtx_cost (XEXP (x, 0), outer_code)
+ << (GET_MODE (XEXP (x, 0)) != DImode))
+ + (rtx_cost (XEXP (x, 1), outer_code)
+ << (GET_MODE (XEXP (x, 1)) != DImode)));
+ return true;
+ }
+ /* FALLTHRU */
+
+ case NEG:
+ if (FLOAT_MODE_P (mode))
+ {
+ *total = ix86_cost->fchs;
+ return false;
+ }
+ /* FALLTHRU */
+
+ case NOT:
+ if (!TARGET_64BIT && mode == DImode)
+ *total = ix86_cost->add * 2;
+ else
+ *total = ix86_cost->add;
+ return false;
+
+ case COMPARE:
+ if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
+ && XEXP (XEXP (x, 0), 1) == const1_rtx
+ && GET_CODE (XEXP (XEXP (x, 0), 2)) == CONST_INT
+ && XEXP (x, 1) == const0_rtx)
+ {
+ /* This kind of construct is implemented using test[bwl].
+ Treat it as if we had an AND. */
+ *total = (ix86_cost->add
+ + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
+ + rtx_cost (const1_rtx, outer_code));
+ return true;
+ }
+ return false;
+
+ case FLOAT_EXTEND:
+ if (!TARGET_SSE_MATH
+ || mode == XFmode
+ || (mode == DFmode && !TARGET_SSE2))
+ /* For standard 80387 constants, raise the cost to prevent
+ compress_float_constant() to generate load from memory. */
+ switch (standard_80387_constant_p (XEXP (x, 0)))
+ {
+ case -1:
+ case 0:
+ *total = 0;
+ break;
+ case 1: /* 0.0 */
+ *total = 1;
+ break;
+ default:
+ *total = (x86_ext_80387_constants & TUNEMASK
+ || optimize_size
+ ? 1 : 0);
+ }
+ return false;
+
+ case ABS:
+ if (FLOAT_MODE_P (mode))
+ *total = ix86_cost->fabs;
+ return false;
+
+ case SQRT:
+ if (FLOAT_MODE_P (mode))
+ *total = ix86_cost->fsqrt;
+ return false;
+
+ case UNSPEC:
+ if (XINT (x, 1) == UNSPEC_TP)
+ *total = 0;
+ return false;
+
+ default:
+ return false;
+ }
+}
+
+#if TARGET_MACHO
+
+static int current_machopic_label_num;
+
+/* Given a symbol name and its associated stub, write out the
+ definition of the stub. */
+
+void
+machopic_output_stub (FILE *file, const char *symb, const char *stub)
+{
+ unsigned int length;
+ char *binder_name, *symbol_name, lazy_ptr_name[32];
+ int label = ++current_machopic_label_num;
+
+ /* For 64-bit we shouldn't get here. */
+ gcc_assert (!TARGET_64BIT);
+
+ /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
+ symb = (*targetm.strip_name_encoding) (symb);
+
+ length = strlen (stub);
+ binder_name = alloca (length + 32);
+ GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
+
+ length = strlen (symb);
+ symbol_name = alloca (length + 32);
+ GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
+
+ sprintf (lazy_ptr_name, "L%d$lz", label);
+
+ /* APPLE LOCAL begin deep branch prediction pic-base */
+ /* APPLE LOCAL begin AT&T-style stub 4164563 */
+ /* Choose one of four possible sections for this stub. */
+ if (MACHOPIC_ATT_STUB)
+ switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
+ else if (MACHOPIC_PURE)
+ /* APPLE LOCAL end AT&T-style stub 4164563 */
+ {
+ if (TARGET_DEEP_BRANCH_PREDICTION)
+ switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
+ else
+ switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
+ }
+ else
+ /* APPLE LOCAL end deep branch prediction pic-base */
+ switch_to_section (darwin_sections[machopic_symbol_stub_section]);
+
+ fprintf (file, "%s:\n", stub);
+ fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
+
+ /* APPLE LOCAL begin use %ecx in stubs 4146993 */
+ /* APPLE LOCAL begin deep branch prediction pic-base */
+ /* APPLE LOCAL begin AT&T-style stub 4164563 */
+ if (MACHOPIC_ATT_STUB)
+ {
+ fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
+ }
+ else if (MACHOPIC_PURE)
+ /* APPLE LOCAL end AT&T-style stub 4164563 */
+ {
+ /* PIC stub. */
+ if (TARGET_DEEP_BRANCH_PREDICTION)
+ {
+ /* 25-byte PIC stub using "CALL get_pc_thunk". */
+ rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
+ output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
+ fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n", label, lazy_ptr_name, label);
+ }
+ else
+ {
+ /* 26-byte PIC stub using inline picbase: "CALL L42 ! L42: pop %eax". */
+ fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%ecx\n", label, label);
+ fprintf (file, "\tmovl %s-LPC$%d(%%ecx),%%ecx\n", lazy_ptr_name, label);
+ }
+ fprintf (file, "\tjmp\t*%%ecx\n");
+ }
+ else /* 16-byte -mdynamic-no-pic stub. */
+ fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
+
+ /* APPLE LOCAL begin AT&T-style stub 4164563 */
+ /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
+ it needs no stub-binding-helper. */
+ if (MACHOPIC_ATT_STUB)
+ return;
+ /* APPLE LOCAL end AT&T-style stub 4164563 */
+
+ /* The "stub_binding_helper" is a fragment that gets executed only
+ once, the first time this stub is invoked (then it becomes "dead
+ code"). It asks the dynamic linker to set the
+ lazy_symbol_pointer to point at the function we want
+ (e.g. printf) so that subsequent invocations of this stub go
+ directly to that dynamically-linked callee. Other UN*X systems
+ use similar stubs, but those are generated by the static linker
+ and never appear in assembly files. */
+ /* APPLE LOCAL end deep branch prediction pic-base */
+ fprintf (file, "%s:\n", binder_name);
+
+ /* APPLE LOCAL begin deep branch prediction pic-base * tabify insns */
+ if (MACHOPIC_PURE)
+ {
+ fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
+ fprintf (file, "\tpushl\t%%ecx\n");
+ }
+ else
+ fprintf (file, "\t pushl\t$%s\n", lazy_ptr_name);
+
+ fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
+ /* APPLE LOCAL end deep branch prediction pic-base * tabify insns */
+ /* APPLE LOCAL end use %ecx in stubs 4146993 */
+
+ /* APPLE LOCAL begin deep branch prediction pic-base. */
+ /* N.B. Keep the correspondence of these
+ 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
+ old-pic/new-pic/non-pic stubs; altering this will break
+ compatibility with existing dylibs. */
+ if (MACHOPIC_PURE)
+ {
+ /* PIC stubs. */
+ if (TARGET_DEEP_BRANCH_PREDICTION)
+ /* 25-byte PIC stub using "CALL get_pc_thunk". */
+ switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
+ else
+ /* 26-byte PIC stub using inline picbase: "CALL L42 ! L42: pop %ebx". */
+ switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
+ }
+ else
+ /* 16-byte -mdynamic-no-pic stub. */
+ switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
+
+ fprintf (file, "%s:\n", lazy_ptr_name);
+ fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
+ fprintf (file, "\t.long\t%s\n", binder_name);
+}
+/* APPLE LOCAL end deep branch prediction pic-base */
+
+void
+darwin_x86_file_end (void)
+{
+ darwin_file_end ();
+ ix86_file_end ();
+}
+
+/* APPLE LOCAL begin 4457939 stack alignment mishandled */
+void
+ix86_darwin_init_expanders (void)
+{
+ /* <rdar://problem/4471596> stack alignment is not handled properly
+
+ Please remove this entire function when addressing this
+ Radar. Please be sure to delete the definition of INIT_EXPANDERS
+ in i386/darwin.h as well. */
+ /* Darwin/x86_32 stack pointer will be 16-byte aligned at every
+ CALL, but the frame pointer, when used, will be 8-bytes offset
+ from a 16-byte alignment (the size of the return address and the
+ saved frame pointer). */
+ if (cfun && cfun->emit
+ && cfun->emit->regno_pointer_align)
+ {
+ REGNO_POINTER_ALIGN (STACK_POINTER_REGNUM) = STACK_BOUNDARY;
+ REGNO_POINTER_ALIGN (FRAME_POINTER_REGNUM) = BITS_PER_WORD;
+ REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = BITS_PER_WORD;
+ REGNO_POINTER_ALIGN (ARG_POINTER_REGNUM) = BITS_PER_WORD;
+ }
+}
+/* APPLE LOCAL end 4457939 stack alignment mishandled */
+#endif /* TARGET_MACHO */
+
+/* Order the registers for register allocator. */
+
+void
+x86_order_regs_for_local_alloc (void)
+{
+ int pos = 0;
+ int i;
+
+ /* First allocate the local general purpose registers. */
+ for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+ if (GENERAL_REGNO_P (i) && call_used_regs[i])
+ reg_alloc_order [pos++] = i;
+
+ /* Global general purpose registers. */
+ for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+ if (GENERAL_REGNO_P (i) && !call_used_regs[i])
+ reg_alloc_order [pos++] = i;
+
+ /* x87 registers come first in case we are doing FP math
+ using them. */
+ if (!TARGET_SSE_MATH)
+ for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
+ reg_alloc_order [pos++] = i;
+
+ /* SSE registers. */
+ for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
+ reg_alloc_order [pos++] = i;
+ for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
+ reg_alloc_order [pos++] = i;
+
+ /* x87 registers. */
+ if (TARGET_SSE_MATH)
+ for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
+ reg_alloc_order [pos++] = i;
+
+ for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
+ reg_alloc_order [pos++] = i;
+
+ /* Initialize the rest of array as we do not allocate some registers
+ at all. */
+ while (pos < FIRST_PSEUDO_REGISTER)
+ reg_alloc_order [pos++] = 0;
+}
+
+/* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
+ struct attribute_spec.handler. */
+static tree
+ix86_handle_struct_attribute (tree *node, tree name,
+ tree args ATTRIBUTE_UNUSED,
+ int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
+{
+ tree *type = NULL;
+ if (DECL_P (*node))
+ {
+ if (TREE_CODE (*node) == TYPE_DECL)
+ type = &TREE_TYPE (*node);
+ }
+ else
+ type = node;
+
+ if (!(type && (TREE_CODE (*type) == RECORD_TYPE
+ || TREE_CODE (*type) == UNION_TYPE)))
+ {
+ warning (OPT_Wattributes, "%qs attribute ignored",
+ IDENTIFIER_POINTER (name));
+ *no_add_attrs = true;
+ }
+
+ else if ((is_attribute_p ("ms_struct", name)
+ && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
+ || ((is_attribute_p ("gcc_struct", name)
+ && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
+ {
+ warning (OPT_Wattributes, "%qs incompatible attribute ignored",
+ IDENTIFIER_POINTER (name));
+ *no_add_attrs = true;
+ }
+
+ return NULL_TREE;
+}
+
+static bool
+ix86_ms_bitfield_layout_p (tree record_type)
+{
+ return (TARGET_MS_BITFIELD_LAYOUT &&
+ !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
+ || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
+}
+
+/* Returns an expression indicating where the this parameter is
+ located on entry to the FUNCTION. */
+
+static rtx
+x86_this_parameter (tree function)
+{
+ tree type = TREE_TYPE (function);
+
+ if (TARGET_64BIT)
+ {
+ int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
+ return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
+ }
+
+ if (ix86_function_regparm (type, function) > 0)
+ {
+ tree parm;
+
+ parm = TYPE_ARG_TYPES (type);
+ /* Figure out whether or not the function has a variable number of
+ arguments. */
+ for (; parm; parm = TREE_CHAIN (parm))
+ if (TREE_VALUE (parm) == void_type_node)
+ break;
+ /* If not, the this parameter is in the first argument. */
+ if (parm)
+ {
+ int regno = 0;
+ if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
+ regno = 2;
+ return gen_rtx_REG (SImode, regno);
+ }
+ }
+
+ if (aggregate_value_p (TREE_TYPE (type), type))
+ return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
+ else
+ return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
+}
+
+/* Determine whether x86_output_mi_thunk can succeed. */
+
+static bool
+x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
+ HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
+ HOST_WIDE_INT vcall_offset, tree function)
+{
+ /* 64-bit can handle anything. */
+ if (TARGET_64BIT)
+ return true;
+
+ /* For 32-bit, everything's fine if we have one free register. */
+ if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
+ return true;
+
+ /* Need a free register for vcall_offset. */
+ if (vcall_offset)
+ return false;
+
+ /* Need a free register for GOT references. */
+ if (flag_pic && !(*targetm.binds_local_p) (function))
+ return false;
+
+ /* Otherwise ok. */
+ return true;
+}
+
+/* Output the assembler code for a thunk function. THUNK_DECL is the
+ declaration for the thunk function itself, FUNCTION is the decl for
+ the target function. DELTA is an immediate constant offset to be
+ added to THIS. If VCALL_OFFSET is nonzero, the word at
+ *(*this + vcall_offset) should be added to THIS. */
+
+static void
+x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
+ tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
+ HOST_WIDE_INT vcall_offset, tree function)
+{
+ rtx xops[3];
+ rtx this = x86_this_parameter (function);
+ rtx this_reg, tmp;
+
+ /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
+ pull it in now and let DELTA benefit. */
+ if (REG_P (this))
+ this_reg = this;
+ else if (vcall_offset)
+ {
+ /* Put the this parameter into %eax. */
+ xops[0] = this;
+ xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
+ output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
+ }
+ else
+ this_reg = NULL_RTX;
+
+ /* Adjust the this parameter by a fixed constant. */
+ if (delta)
+ {
+ xops[0] = GEN_INT (delta);
+ xops[1] = this_reg ? this_reg : this;
+ if (TARGET_64BIT)
+ {
+ if (!x86_64_general_operand (xops[0], DImode))
+ {
+ tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
+ xops[1] = tmp;
+ output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
+ xops[0] = tmp;
+ xops[1] = this;
+ }
+ output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
+ }
+ else
+ output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
+ }
+
+ /* Adjust the this parameter by a value stored in the vtable. */
+ if (vcall_offset)
+ {
+ if (TARGET_64BIT)
+ tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
+ else
+ {
+ int tmp_regno = 2 /* ECX */;
+ if (lookup_attribute ("fastcall",
+ TYPE_ATTRIBUTES (TREE_TYPE (function))))
+ tmp_regno = 0 /* EAX */;
+ tmp = gen_rtx_REG (SImode, tmp_regno);
+ }
+
+ xops[0] = gen_rtx_MEM (Pmode, this_reg);
+ xops[1] = tmp;
+ if (TARGET_64BIT)
+ output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
+ else
+ output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
+
+ /* Adjust the this parameter. */
+ xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
+ if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
+ {
+ rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
+ xops[0] = GEN_INT (vcall_offset);
+ xops[1] = tmp2;
+ output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
+ xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
+ }
+ xops[1] = this_reg;
+ if (TARGET_64BIT)
+ output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
+ else
+ output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
+ }
+
+ /* If necessary, drop THIS back to its stack slot. */
+ if (this_reg && this_reg != this)
+ {
+ xops[0] = this_reg;
+ xops[1] = this;
+ output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
+ }
+
+ xops[0] = XEXP (DECL_RTL (function), 0);
+ if (TARGET_64BIT)
+ {
+ if (!flag_pic || (*targetm.binds_local_p) (function))
+ output_asm_insn ("jmp\t%P0", xops);
+ else
+ {
+ tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
+ tmp = gen_rtx_CONST (Pmode, tmp);
+ tmp = gen_rtx_MEM (QImode, tmp);
+ xops[0] = tmp;
+ output_asm_insn ("jmp\t%A0", xops);
+ }
+ }
+ else
+ {
+ if (!flag_pic || (*targetm.binds_local_p) (function))
+ output_asm_insn ("jmp\t%P0", xops);
+ else
+#if TARGET_MACHO
+ if (TARGET_MACHO)
+ {
+ rtx sym_ref = XEXP (DECL_RTL (function), 0);
+ /* APPLE LOCAL begin axe stubs 5571540 */
+ if (darwin_stubs)
+ sym_ref = (gen_rtx_SYMBOL_REF
+ (Pmode,
+ machopic_indirection_name (sym_ref, /*stub_p=*/true)));
+ tmp = gen_rtx_MEM (QImode, sym_ref);
+ /* APPLE LOCAL end axe stubs 5571540 */
+ xops[0] = tmp;
+ output_asm_insn ("jmp\t%0", xops);
+ }
+ else
+#endif /* TARGET_MACHO */
+ {
+ tmp = gen_rtx_REG (SImode, 2 /* ECX */);
+ output_set_got (tmp, NULL_RTX);
+
+ xops[1] = tmp;
+ output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
+ output_asm_insn ("jmp\t{*}%1", xops);
+ }
+ }
+}
+
+static void
+x86_file_start (void)
+{
+ default_file_start ();
+#if TARGET_MACHO
+ darwin_file_start ();
+#endif
+ if (X86_FILE_START_VERSION_DIRECTIVE)
+ fputs ("\t.version\t\"01.01\"\n", asm_out_file);
+ if (X86_FILE_START_FLTUSED)
+ fputs ("\t.global\t__fltused\n", asm_out_file);
+ if (ix86_asm_dialect == ASM_INTEL)
+ fputs ("\t.intel_syntax\n", asm_out_file);
+}
+
+int
+x86_field_alignment (tree field, int computed)
+{
+ enum machine_mode mode;
+ tree type = TREE_TYPE (field);
+
+ if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
+ return computed;
+ /* APPLE LOCAL begin mac68k alignment */
+#if TARGET_MACHO
+ if (OPTION_ALIGN_MAC68K)
+ {
+ if (computed >= 128)
+ return computed;
+ return MIN (computed, 16);
+ }
+#endif
+ /* APPLE LOCAL end mac68k alignment */
+ mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
+ ? get_inner_array_type (type) : type);
+ if (mode == DFmode || mode == DCmode
+ || GET_MODE_CLASS (mode) == MODE_INT
+ || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
+ return MIN (32, computed);
+ return computed;
+}
+
+/* Output assembler code to FILE to increment profiler label # LABELNO
+ for profiling a function entry. */
+void
+x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
+{
+ if (TARGET_64BIT)
+ if (flag_pic)
+ {
+#ifndef NO_PROFILE_COUNTERS
+ fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
+#endif
+ fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
+ }
+ else
+ {
+#ifndef NO_PROFILE_COUNTERS
+ fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
+#endif
+ fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
+ }
+ else if (flag_pic)
+ {
+#ifndef NO_PROFILE_COUNTERS
+ fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
+ LPREFIX, labelno, PROFILE_COUNT_REGISTER);
+#endif
+ fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
+ }
+ else
+ {
+#ifndef NO_PROFILE_COUNTERS
+ fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
+ PROFILE_COUNT_REGISTER);
+#endif
+ fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
+ }
+}
+
+/* We don't have exact information about the insn sizes, but we may assume
+ quite safely that we are informed about all 1 byte insns and memory
+ address sizes. This is enough to eliminate unnecessary padding in
+ 99% of cases. */
+
+static int
+min_insn_size (rtx insn)
+{
+ int l = 0;
+
+ if (!INSN_P (insn) || !active_insn_p (insn))
+ return 0;
+
+ /* Discard alignments we've emit and jump instructions. */
+ if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
+ && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
+ return 0;
+ if (GET_CODE (insn) == JUMP_INSN
+ && (GET_CODE (PATTERN (insn)) == ADDR_VEC
+ || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
+ return 0;
+
+ /* Important case - calls are always 5 bytes.
+ It is common to have many calls in the row. */
+ if (GET_CODE (insn) == CALL_INSN
+ && symbolic_reference_mentioned_p (PATTERN (insn))
+ && !SIBLING_CALL_P (insn))
+ return 5;
+ if (get_attr_length (insn) <= 1)
+ return 1;
+
+ /* For normal instructions we may rely on the sizes of addresses
+ and the presence of symbol to require 4 bytes of encoding.
+ This is not the case for jumps where references are PC relative. */
+ if (GET_CODE (insn) != JUMP_INSN)
+ {
+ l = get_attr_length_address (insn);
+ if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
+ l = 4;
+ }
+ if (l)
+ return 1+l;
+ else
+ return 2;
+}
+
+/* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
+ window. */
+
+static void
+ix86_avoid_jump_misspredicts (void)
+{
+ rtx insn, start = get_insns ();
+ int nbytes = 0, njumps = 0;
+ int isjump = 0;
+
+ /* Look for all minimal intervals of instructions containing 4 jumps.
+ The intervals are bounded by START and INSN. NBYTES is the total
+ size of instructions in the interval including INSN and not including
+ START. When the NBYTES is smaller than 16 bytes, it is possible
+ that the end of START and INSN ends up in the same 16byte page.
+
+ The smallest offset in the page INSN can start is the case where START
+ ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
+ We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
+ */
+ for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+ {
+
+ nbytes += min_insn_size (insn);
+ if (dump_file)
+ fprintf(dump_file, "Insn %i estimated to %i bytes\n",
+ INSN_UID (insn), min_insn_size (insn));
+ if ((GET_CODE (insn) == JUMP_INSN
+ && GET_CODE (PATTERN (insn)) != ADDR_VEC
+ && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
+ || GET_CODE (insn) == CALL_INSN)
+ njumps++;
+ else
+ continue;
+
+ while (njumps > 3)
+ {
+ start = NEXT_INSN (start);
+ if ((GET_CODE (start) == JUMP_INSN
+ && GET_CODE (PATTERN (start)) != ADDR_VEC
+ && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
+ || GET_CODE (start) == CALL_INSN)
+ njumps--, isjump = 1;
+ else
+ isjump = 0;
+ nbytes -= min_insn_size (start);
+ }
+ gcc_assert (njumps >= 0);
+ if (dump_file)
+ fprintf (dump_file, "Interval %i to %i has %i bytes\n",
+ INSN_UID (start), INSN_UID (insn), nbytes);
+
+ if (njumps == 3 && isjump && nbytes < 16)
+ {
+ int padsize = 15 - nbytes + min_insn_size (insn);
+
+ if (dump_file)
+ fprintf (dump_file, "Padding insn %i by %i bytes!\n",
+ INSN_UID (insn), padsize);
+ emit_insn_before (gen_align (GEN_INT (padsize)), insn);
+ }
+ }
+}
+
+/* AMD Athlon works faster
+ when RET is not destination of conditional jump or directly preceded
+ by other jump instruction. We avoid the penalty by inserting NOP just
+ before the RET instructions in such cases. */
+static void
+ix86_pad_returns (void)
+{
+ edge e;
+ edge_iterator ei;
+
+ FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
+ {
+ basic_block bb = e->src;
+ rtx ret = BB_END (bb);
+ rtx prev;
+ bool replace = false;
+
+ if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
+ || !maybe_hot_bb_p (bb))
+ continue;
+ for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
+ if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
+ break;
+ if (prev && GET_CODE (prev) == CODE_LABEL)
+ {
+ edge e;
+ edge_iterator ei;
+
+ FOR_EACH_EDGE (e, ei, bb->preds)
+ if (EDGE_FREQUENCY (e) && e->src->index >= 0
+ && !(e->flags & EDGE_FALLTHRU))
+ replace = true;
+ }
+ if (!replace)
+ {
+ prev = prev_active_insn (ret);
+ if (prev
+ && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
+ || GET_CODE (prev) == CALL_INSN))
+ replace = true;
+ /* Empty functions get branch mispredict even when the jump destination
+ is not visible to us. */
+ if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
+ replace = true;
+ }
+ if (replace)
+ {
+ emit_insn_before (gen_return_internal_long (), ret);
+ delete_insn (ret);
+ }
+ }
+}
+
+/* Implement machine specific optimizations. We implement padding of returns
+ for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
+static void
+ix86_reorg (void)
+{
+ if (TARGET_PAD_RETURNS && optimize && !optimize_size)
+ ix86_pad_returns ();
+ if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
+ ix86_avoid_jump_misspredicts ();
+}
+
+/* Return nonzero when QImode register that must be represented via REX prefix
+ is used. */
+bool
+x86_extended_QIreg_mentioned_p (rtx insn)
+{
+ int i;
+ extract_insn_cached (insn);
+ for (i = 0; i < recog_data.n_operands; i++)
+ if (REG_P (recog_data.operand[i])
+ && REGNO (recog_data.operand[i]) >= 4)
+ return true;
+ return false;
+}
+
+/* Return nonzero when P points to register encoded via REX prefix.
+ Called via for_each_rtx. */
+static int
+extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
+{
+ unsigned int regno;
+ if (!REG_P (*p))
+ return 0;
+ regno = REGNO (*p);
+ return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
+}
+
+/* Return true when INSN mentions register that must be encoded using REX
+ prefix. */
+bool
+x86_extended_reg_mentioned_p (rtx insn)
+{
+ return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
+}
+
+/* Generate an unsigned DImode/SImode to FP conversion. This is the same code
+ optabs would emit if we didn't have TFmode patterns. */
+
+void
+x86_emit_floatuns (rtx operands[2])
+{
+ rtx neglab, donelab, i0, i1, f0, in, out;
+ enum machine_mode mode, inmode;
+
+ inmode = GET_MODE (operands[1]);
+ /* APPLE LOCAL begin 4176531 4424891 */
+ mode = GET_MODE (operands[0]);
+ if (!TARGET_64BIT && mode == DFmode && !optimize_size)
+ {
+ switch (inmode)
+ {
+ case SImode:
+ ix86_expand_convert_uns_SI2DF_sse (operands);
+ break;
+ case DImode:
+ ix86_expand_convert_uns_DI2DF_sse (operands);
+ break;
+ default:
+ abort ();
+ break;
+ }
+ return;
+ }
+ /* APPLE LOCAL end 4176531 4424891 */
+
+ out = operands[0];
+ in = force_reg (inmode, operands[1]);
+ /* APPLE LOCAL begin one line deletion 4424891 */
+ /* APPLE LOCAL end one line deletion 4424891 */
+ neglab = gen_label_rtx ();
+ donelab = gen_label_rtx ();
+ i1 = gen_reg_rtx (Pmode);
+ f0 = gen_reg_rtx (mode);
+
+ emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
+
+ emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
+ emit_jump_insn (gen_jump (donelab));
+ emit_barrier ();
+
+ emit_label (neglab);
+
+ i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
+ i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
+ i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
+ expand_float (f0, i0, 0);
+ emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
+
+ emit_label (donelab);
+}
+
+/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
+ with all elements equal to VAR. Return true if successful. */
+
+static bool
+ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
+ rtx target, rtx val)
+{
+ enum machine_mode smode, wsmode, wvmode;
+ rtx x;
+
+ switch (mode)
+ {
+ case V2SImode:
+ case V2SFmode:
+ if (!mmx_ok)
+ return false;
+ /* FALLTHRU */
+
+ case V2DFmode:
+ case V2DImode:
+ case V4SFmode:
+ case V4SImode:
+ val = force_reg (GET_MODE_INNER (mode), val);
+ x = gen_rtx_VEC_DUPLICATE (mode, val);
+ emit_insn (gen_rtx_SET (VOIDmode, target, x));
+ return true;
+
+ case V4HImode:
+ if (!mmx_ok)
+ return false;
+ if (TARGET_SSE || TARGET_3DNOW_A)
+ {
+ val = gen_lowpart (SImode, val);
+ x = gen_rtx_TRUNCATE (HImode, val);
+ x = gen_rtx_VEC_DUPLICATE (mode, x);
+ emit_insn (gen_rtx_SET (VOIDmode, target, x));
+ return true;
+ }
+ else
+ {
+ smode = HImode;
+ wsmode = SImode;
+ wvmode = V2SImode;
+ goto widen;
+ }
+
+ case V8QImode:
+ if (!mmx_ok)
+ return false;
+ smode = QImode;
+ wsmode = HImode;
+ wvmode = V4HImode;
+ goto widen;
+ case V8HImode:
+ if (TARGET_SSE2)
+ {
+ rtx tmp1, tmp2;
+ /* Extend HImode to SImode using a paradoxical SUBREG. */
+ tmp1 = gen_reg_rtx (SImode);
+ emit_move_insn (tmp1, gen_lowpart (SImode, val));
+ /* Insert the SImode value as low element of V4SImode vector. */
+ tmp2 = gen_reg_rtx (V4SImode);
+ tmp1 = gen_rtx_VEC_MERGE (V4SImode,
+ gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
+ CONST0_RTX (V4SImode),
+ const1_rtx);
+ emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
+ /* Cast the V4SImode vector back to a V8HImode vector. */
+ tmp1 = gen_reg_rtx (V8HImode);
+ emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
+ /* Duplicate the low short through the whole low SImode word. */
+ emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
+ /* Cast the V8HImode vector back to a V4SImode vector. */
+ tmp2 = gen_reg_rtx (V4SImode);
+ emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
+ /* Replicate the low element of the V4SImode vector. */
+ emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
+ /* Cast the V2SImode back to V8HImode, and store in target. */
+ emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
+ return true;
+ }
+ smode = HImode;
+ wsmode = SImode;
+ wvmode = V4SImode;
+ goto widen;
+ case V16QImode:
+ if (TARGET_SSE2)
+ {
+ rtx tmp1, tmp2;
+ /* Extend QImode to SImode using a paradoxical SUBREG. */
+ tmp1 = gen_reg_rtx (SImode);
+ emit_move_insn (tmp1, gen_lowpart (SImode, val));
+ /* Insert the SImode value as low element of V4SImode vector. */
+ tmp2 = gen_reg_rtx (V4SImode);
+ tmp1 = gen_rtx_VEC_MERGE (V4SImode,
+ gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
+ CONST0_RTX (V4SImode),
+ const1_rtx);
+ emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
+ /* Cast the V4SImode vector back to a V16QImode vector. */
+ tmp1 = gen_reg_rtx (V16QImode);
+ emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
+ /* Duplicate the low byte through the whole low SImode word. */
+ emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
+ emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
+ /* Cast the V16QImode vector back to a V4SImode vector. */
+ tmp2 = gen_reg_rtx (V4SImode);
+ emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
+ /* Replicate the low element of the V4SImode vector. */
+ emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
+ /* Cast the V2SImode back to V16QImode, and store in target. */
+ emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
+ return true;
+ }
+ smode = QImode;
+ wsmode = HImode;
+ wvmode = V8HImode;
+ goto widen;
+ widen:
+ /* Replicate the value once into the next wider mode and recurse. */
+ val = convert_modes (wsmode, smode, val, true);
+ x = expand_simple_binop (wsmode, ASHIFT, val,
+ GEN_INT (GET_MODE_BITSIZE (smode)),
+ NULL_RTX, 1, OPTAB_LIB_WIDEN);
+ val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
+
+ x = gen_reg_rtx (wvmode);
+ if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
+ gcc_unreachable ();
+ emit_move_insn (target, gen_lowpart (mode, x));
+ return true;
+
+ default:
+ return false;
+ }
+}
+
+/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
+ whose ONE_VAR element is VAR, and other elements are zero. Return true
+ if successful. */
+
+static bool
+ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
+ rtx target, rtx var, int one_var)
+{
+ enum machine_mode vsimode;
+ rtx new_target;
+ rtx x, tmp;
+
+ switch (mode)
+ {
+ case V2SFmode:
+ case V2SImode:
+ if (!mmx_ok)
+ return false;
+ /* FALLTHRU */
+
+ case V2DFmode:
+ case V2DImode:
+ if (one_var != 0)
+ return false;
+ var = force_reg (GET_MODE_INNER (mode), var);
+ x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
+ emit_insn (gen_rtx_SET (VOIDmode, target, x));
+ return true;
+
+ case V4SFmode:
+ case V4SImode:
+ if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
+ new_target = gen_reg_rtx (mode);
+ else
+ new_target = target;
+ var = force_reg (GET_MODE_INNER (mode), var);
+ x = gen_rtx_VEC_DUPLICATE (mode, var);
+ x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
+ emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
+ if (one_var != 0)
+ {
+ /* We need to shuffle the value to the correct position, so
+ create a new pseudo to store the intermediate result. */
+
+ /* With SSE2, we can use the integer shuffle insns. */
+ if (mode != V4SFmode && TARGET_SSE2)
+ {
+ emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
+ GEN_INT (1),
+ GEN_INT (one_var == 1 ? 0 : 1),
+ GEN_INT (one_var == 2 ? 0 : 1),
+ GEN_INT (one_var == 3 ? 0 : 1)));
+ if (target != new_target)
+ emit_move_insn (target, new_target);
+ return true;
+ }
+
+ /* Otherwise convert the intermediate result to V4SFmode and
+ use the SSE1 shuffle instructions. */
+ if (mode != V4SFmode)
+ {
+ tmp = gen_reg_rtx (V4SFmode);
+ emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
+ }
+ else
+ tmp = new_target;
+
+ emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
+ GEN_INT (1),
+ GEN_INT (one_var == 1 ? 0 : 1),
+ GEN_INT (one_var == 2 ? 0+4 : 1+4),
+ GEN_INT (one_var == 3 ? 0+4 : 1+4)));
+
+ if (mode != V4SFmode)
+ emit_move_insn (target, gen_lowpart (V4SImode, tmp));
+ else if (tmp != target)
+ emit_move_insn (target, tmp);
+ }
+ else if (target != new_target)
+ emit_move_insn (target, new_target);
+ return true;
+
+ case V8HImode:
+ case V16QImode:
+ vsimode = V4SImode;
+ goto widen;
+ case V4HImode:
+ case V8QImode:
+ if (!mmx_ok)
+ return false;
+ vsimode = V2SImode;
+ goto widen;
+ widen:
+ if (one_var != 0)
+ return false;
+
+ /* Zero extend the variable element to SImode and recurse. */
+ var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
+
+ x = gen_reg_rtx (vsimode);
+ if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
+ var, one_var))
+ gcc_unreachable ();
+
+ emit_move_insn (target, gen_lowpart (mode, x));
+ return true;
+
+ default:
+ return false;
+ }
+}
+
+/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
+ consisting of the values in VALS. It is known that all elements
+ except ONE_VAR are constants. Return true if successful. */
+
+static bool
+ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
+ rtx target, rtx vals, int one_var)
+{
+ rtx var = XVECEXP (vals, 0, one_var);
+ enum machine_mode wmode;
+ rtx const_vec, x;
+
+ const_vec = copy_rtx (vals);
+ XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
+ const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
+
+ switch (mode)
+ {
+ case V2DFmode:
+ case V2DImode:
+ case V2SFmode:
+ case V2SImode:
+ /* For the two element vectors, it's just as easy to use
+ the general case. */
+ return false;
+
+ case V4SFmode:
+ case V4SImode:
+ case V8HImode:
+ case V4HImode:
+ break;
+
+ case V16QImode:
+ wmode = V8HImode;
+ goto widen;
+ case V8QImode:
+ wmode = V4HImode;
+ goto widen;
+ widen:
+ /* There's no way to set one QImode entry easily. Combine
+ the variable value with its adjacent constant value, and
+ promote to an HImode set. */
+ x = XVECEXP (vals, 0, one_var ^ 1);
+ if (one_var & 1)
+ {
+ var = convert_modes (HImode, QImode, var, true);
+ var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
+ NULL_RTX, 1, OPTAB_LIB_WIDEN);
+ x = GEN_INT (INTVAL (x) & 0xff);
+ }
+ else
+ {
+ var = convert_modes (HImode, QImode, var, true);
+ x = gen_int_mode (INTVAL (x) << 8, HImode);
+ }
+ if (x != const0_rtx)
+ var = expand_simple_binop (HImode, IOR, var, x, var,
+ 1, OPTAB_LIB_WIDEN);
+
+ x = gen_reg_rtx (wmode);
+ emit_move_insn (x, gen_lowpart (wmode, const_vec));
+ ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
+
+ emit_move_insn (target, gen_lowpart (mode, x));
+ return true;
+
+ default:
+ return false;
+ }
+
+ emit_move_insn (target, const_vec);
+ ix86_expand_vector_set (mmx_ok, target, var, one_var);
+ return true;
+}
+
+/* A subroutine of ix86_expand_vector_init. Handle the most general case:
+ all values variable, and none identical. */
+
+static void
+ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
+ rtx target, rtx vals)
+{
+ enum machine_mode half_mode = GET_MODE_INNER (mode);
+ rtx op0 = NULL, op1 = NULL;
+ bool use_vec_concat = false;
+
+ switch (mode)
+ {
+ case V2SFmode:
+ case V2SImode:
+ if (!mmx_ok && !TARGET_SSE)
+ break;
+ /* FALLTHRU */
+
+ case V2DFmode:
+ case V2DImode:
+ /* For the two element vectors, we always implement VEC_CONCAT. */
+ op0 = XVECEXP (vals, 0, 0);
+ op1 = XVECEXP (vals, 0, 1);
+ use_vec_concat = true;
+ break;
+
+ case V4SFmode:
+ half_mode = V2SFmode;
+ goto half;
+ case V4SImode:
+ half_mode = V2SImode;
+ goto half;
+ half:
+ {
+ rtvec v;
+
+ /* For V4SF and V4SI, we implement a concat of two V2 vectors.
+ Recurse to load the two halves. */
+
+ op0 = gen_reg_rtx (half_mode);
+ v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
+ ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
+
+ op1 = gen_reg_rtx (half_mode);
+ v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
+ ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
+
+ use_vec_concat = true;
+ }
+ break;
+
+ case V8HImode:
+ case V16QImode:
+ case V4HImode:
+ case V8QImode:
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ if (use_vec_concat)
+ {
+ if (!register_operand (op0, half_mode))
+ op0 = force_reg (half_mode, op0);
+ if (!register_operand (op1, half_mode))
+ op1 = force_reg (half_mode, op1);
+
+ emit_insn (gen_rtx_SET (VOIDmode, target,
+ gen_rtx_VEC_CONCAT (mode, op0, op1)));
+ }
+ else
+ {
+ int i, j, n_elts, n_words, n_elt_per_word;
+ enum machine_mode inner_mode;
+ rtx words[4], shift;
+
+ inner_mode = GET_MODE_INNER (mode);
+ n_elts = GET_MODE_NUNITS (mode);
+ n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
+ n_elt_per_word = n_elts / n_words;
+ shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
+
+ for (i = 0; i < n_words; ++i)
+ {
+ rtx word = NULL_RTX;
+
+ for (j = 0; j < n_elt_per_word; ++j)
+ {
+ rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
+ elt = convert_modes (word_mode, inner_mode, elt, true);
+
+ if (j == 0)
+ word = elt;
+ else
+ {
+ word = expand_simple_binop (word_mode, ASHIFT, word, shift,
+ word, 1, OPTAB_LIB_WIDEN);
+ word = expand_simple_binop (word_mode, IOR, word, elt,
+ word, 1, OPTAB_LIB_WIDEN);
+ }
+ }
+
+ words[i] = word;
+ }
+
+ if (n_words == 1)
+ emit_move_insn (target, gen_lowpart (mode, words[0]));
+ else if (n_words == 2)
+ {
+ rtx tmp = gen_reg_rtx (mode);
+ emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
+ emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
+ emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
+ emit_move_insn (target, tmp);
+ }
+ else if (n_words == 4)
+ {
+ rtx tmp = gen_reg_rtx (V4SImode);
+ vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
+ ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
+ emit_move_insn (target, gen_lowpart (mode, tmp));
+ }
+ else
+ gcc_unreachable ();
+ }
+}
+
+/* Initialize vector TARGET via VALS. Suppress the use of MMX
+ instructions unless MMX_OK is true. */
+
+void
+ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
+{
+ enum machine_mode mode = GET_MODE (target);
+ enum machine_mode inner_mode = GET_MODE_INNER (mode);
+ int n_elts = GET_MODE_NUNITS (mode);
+ int n_var = 0, one_var = -1;
+ bool all_same = true, all_const_zero = true;
+ int i;
+ rtx x;
+
+ for (i = 0; i < n_elts; ++i)
+ {
+ x = XVECEXP (vals, 0, i);
+ if (!CONSTANT_P (x))
+ n_var++, one_var = i;
+ else if (x != CONST0_RTX (inner_mode))
+ all_const_zero = false;
+ if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
+ all_same = false;
+ }
+
+ /* Constants are best loaded from the constant pool. */
+ if (n_var == 0)
+ {
+ emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
+ return;
+ }
+
+ /* If all values are identical, broadcast the value. */
+ if (all_same
+ && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
+ XVECEXP (vals, 0, 0)))
+ return;
+
+ /* Values where only one field is non-constant are best loaded from
+ the pool and overwritten via move later. */
+ if (n_var == 1)
+ {
+ if (all_const_zero
+ && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
+ XVECEXP (vals, 0, one_var),
+ one_var))
+ return;
+
+ if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
+ return;
+ }
+
+ ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
+}
+
+void
+ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
+{
+ enum machine_mode mode = GET_MODE (target);
+ enum machine_mode inner_mode = GET_MODE_INNER (mode);
+ bool use_vec_merge = false;
+ rtx tmp;
+
+ switch (mode)
+ {
+ case V2SFmode:
+ case V2SImode:
+ if (mmx_ok)
+ {
+ tmp = gen_reg_rtx (GET_MODE_INNER (mode));
+ ix86_expand_vector_extract (true, tmp, target, 1 - elt);
+ if (elt == 0)
+ tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
+ else
+ tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
+ emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
+ return;
+ }
+ break;
+
+ /* APPLE LOCAL begin 5612787 mainline sse4 */
+ case V2DImode:
+ use_vec_merge = TARGET_SSE4_1;
+ if (use_vec_merge)
+ break;
+
+ case V2DFmode:
+ /* APPLE LOCAL end 5612787 mainline sse4 */
+ {
+ rtx op0, op1;
+
+ /* For the two element vectors, we implement a VEC_CONCAT with
+ the extraction of the other element. */
+
+ tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
+ tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
+
+ if (elt == 0)
+ op0 = val, op1 = tmp;
+ else
+ op0 = tmp, op1 = val;
+
+ tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
+ emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
+ }
+ return;
+
+ case V4SFmode:
+ /* APPLE LOCAL begin 5612787 mainline sse4 */
+ use_vec_merge = TARGET_SSE4_1;
+ if (use_vec_merge)
+ break;
+ /* APPLE LOCAL end 5612787 mainline sse4 */
+ switch (elt)
+ {
+ case 0:
+ use_vec_merge = true;
+ break;
+
+ case 1:
+ /* tmp = target = A B C D */
+ tmp = copy_to_reg (target);
+ /* target = A A B B */
+ emit_insn (gen_sse_unpcklps (target, target, target));
+ /* target = X A B B */
+ ix86_expand_vector_set (false, target, val, 0);
+ /* target = A X C D */
+ emit_insn (gen_sse_shufps_1 (target, target, tmp,
+ GEN_INT (1), GEN_INT (0),
+ GEN_INT (2+4), GEN_INT (3+4)));
+ return;
+
+ case 2:
+ /* tmp = target = A B C D */
+ tmp = copy_to_reg (target);
+ /* tmp = X B C D */
+ ix86_expand_vector_set (false, tmp, val, 0);
+ /* target = A B X D */
+ emit_insn (gen_sse_shufps_1 (target, target, tmp,
+ GEN_INT (0), GEN_INT (1),
+ GEN_INT (0+4), GEN_INT (3+4)));
+ return;
+
+ case 3:
+ /* tmp = target = A B C D */
+ tmp = copy_to_reg (target);
+ /* tmp = X B C D */
+ ix86_expand_vector_set (false, tmp, val, 0);
+ /* target = A B X D */
+ emit_insn (gen_sse_shufps_1 (target, target, tmp,
+ GEN_INT (0), GEN_INT (1),
+ GEN_INT (2+4), GEN_INT (0+4)));
+ return;
+
+ default:
+ gcc_unreachable ();
+ }
+ break;
+
+ case V4SImode:
+ /* APPLE LOCAL begin 5612787 mainline sse4 */
+ use_vec_merge = TARGET_SSE4_1;
+ if (use_vec_merge)
+ break;
+ /* APPLE LOCAL end 5612787 mainline sse4 */
+ /* Element 0 handled by vec_merge below. */
+ if (elt == 0)
+ {
+ use_vec_merge = true;
+ break;
+ }
+
+ if (TARGET_SSE2)
+ {
+ /* With SSE2, use integer shuffles to swap element 0 and ELT,
+ store into element 0, then shuffle them back. */
+
+ rtx order[4];
+
+ order[0] = GEN_INT (elt);
+ order[1] = const1_rtx;
+ order[2] = const2_rtx;
+ order[3] = GEN_INT (3);
+ order[elt] = const0_rtx;
+
+ emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
+ order[1], order[2], order[3]));
+
+ ix86_expand_vector_set (false, target, val, 0);
+
+ emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
+ order[1], order[2], order[3]));
+ }
+ else
+ {
+ /* For SSE1, we have to reuse the V4SF code. */
+ ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
+ gen_lowpart (SFmode, val), elt);
+ }
+ return;
+
+ case V8HImode:
+ use_vec_merge = TARGET_SSE2;
+ break;
+ case V4HImode:
+ use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
+ break;
+
+ case V16QImode:
+ /* APPLE LOCAL begin 5612787 mainline sse4 */
+ use_vec_merge = TARGET_SSE4_1;
+ break;
+ /* APPLE LOCAL end 5612787 mainline sse4 */
+ case V8QImode:
+ default:
+ break;
+ }
+
+ if (use_vec_merge)
+ {
+ tmp = gen_rtx_VEC_DUPLICATE (mode, val);
+ tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
+ emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
+ }
+ else
+ {
+ rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
+
+ emit_move_insn (mem, target);
+
+ tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
+ emit_move_insn (tmp, val);
+
+ emit_move_insn (target, mem);
+ }
+}
+
+void
+ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
+{
+ enum machine_mode mode = GET_MODE (vec);
+ enum machine_mode inner_mode = GET_MODE_INNER (mode);
+ bool use_vec_extr = false;
+ rtx tmp;
+
+ switch (mode)
+ {
+ case V2SImode:
+ case V2SFmode:
+ if (!mmx_ok)
+ break;
+ /* FALLTHRU */
+
+ case V2DFmode:
+ case V2DImode:
+ use_vec_extr = true;
+ break;
+
+ case V4SFmode:
+ /* APPLE LOCAL begin 5612787 mainline sse4 */
+ use_vec_extr = TARGET_SSE4_1;
+ if (use_vec_extr)
+ break;
+ /* APPLE LOCAL end 5612787 mainline sse4 */
+ switch (elt)
+ {
+ case 0:
+ tmp = vec;
+ break;
+
+ case 1:
+ case 3:
+ tmp = gen_reg_rtx (mode);
+ emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
+ GEN_INT (elt), GEN_INT (elt),
+ GEN_INT (elt+4), GEN_INT (elt+4)));
+ break;
+
+ case 2:
+ tmp = gen_reg_rtx (mode);
+ emit_insn (gen_sse_unpckhps (tmp, vec, vec));
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+ vec = tmp;
+ use_vec_extr = true;
+ elt = 0;
+ break;
+
+ case V4SImode:
+ /* APPLE LOCAL begin 5612787 mainline sse4 */
+ use_vec_extr = TARGET_SSE4_1;
+ if (use_vec_extr)
+ break;
+ /* APPLE LOCAL end 5612787 mainline sse4 */
+ if (TARGET_SSE2)
+ {
+ switch (elt)
+ {
+ case 0:
+ tmp = vec;
+ break;
+
+ case 1:
+ case 3:
+ tmp = gen_reg_rtx (mode);
+ emit_insn (gen_sse2_pshufd_1 (tmp, vec,
+ GEN_INT (elt), GEN_INT (elt),
+ GEN_INT (elt), GEN_INT (elt)));
+ break;
+
+ case 2:
+ tmp = gen_reg_rtx (mode);
+ emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+ vec = tmp;
+ use_vec_extr = true;
+ elt = 0;
+ }
+ else
+ {
+ /* For SSE1, we have to reuse the V4SF code. */
+ ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
+ gen_lowpart (V4SFmode, vec), elt);
+ return;
+ }
+ break;
+
+ case V8HImode:
+ use_vec_extr = TARGET_SSE2;
+ break;
+ case V4HImode:
+ use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
+ break;
+
+ case V16QImode:
+ /* APPLE LOCAL begin 5612787 mainline sse4 */
+ use_vec_extr = TARGET_SSE4_1;
+ break;
+ /* APPLE LOCAL end 5612787 mainline sse4 */
+ case V8QImode:
+ /* ??? Could extract the appropriate HImode element and shift. */
+ default:
+ break;
+ }
+
+ if (use_vec_extr)
+ {
+ tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
+ tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
+
+ /* Let the rtl optimizers know about the zero extension performed. */
+ /* APPLE LOCAL 5612787 mainline sse4 */
+ if (inner_mode == QImode || inner_mode == HImode)
+ {
+ tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
+ target = gen_lowpart (SImode, target);
+ }
+
+ emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
+ }
+ else
+ {
+ rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
+
+ emit_move_insn (mem, vec);
+
+ tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
+ emit_move_insn (target, tmp);
+ }
+}
+
+/* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
+ pattern to reduce; DEST is the destination; IN is the input vector. */
+
+void
+ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
+{
+ rtx tmp1, tmp2, tmp3;
+
+ tmp1 = gen_reg_rtx (V4SFmode);
+ tmp2 = gen_reg_rtx (V4SFmode);
+ tmp3 = gen_reg_rtx (V4SFmode);
+
+ emit_insn (gen_sse_movhlps (tmp1, in, in));
+ emit_insn (fn (tmp2, tmp1, in));
+
+ emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
+ GEN_INT (1), GEN_INT (1),
+ GEN_INT (1+4), GEN_INT (1+4)));
+ emit_insn (fn (dest, tmp2, tmp3));
+}
+
+/* Target hook for scalar_mode_supported_p. */
+static bool
+ix86_scalar_mode_supported_p (enum machine_mode mode)
+{
+ if (DECIMAL_FLOAT_MODE_P (mode))
+ return true;
+ else
+ return default_scalar_mode_supported_p (mode);
+}
+
+/* Implements target hook vector_mode_supported_p. */
+static bool
+ix86_vector_mode_supported_p (enum machine_mode mode)
+{
+ if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
+ return true;
+ if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
+ return true;
+ if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
+ return true;
+ if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
+ return true;
+ return false;
+}
+
+/* Worker function for TARGET_MD_ASM_CLOBBERS.
+
+ We do this in the new i386 backend to maintain source compatibility
+ with the old cc0-based compiler. */
+
+static tree
+ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
+ tree inputs ATTRIBUTE_UNUSED,
+ tree clobbers)
+{
+ clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
+ clobbers);
+ clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
+ clobbers);
+ clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
+ clobbers);
+ return clobbers;
+}
+
+/* Return true if this goes in small data/bss. */
+
+static bool
+ix86_in_large_data_p (tree exp)
+{
+ if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
+ return false;
+
+ /* Functions are never large data. */
+ if (TREE_CODE (exp) == FUNCTION_DECL)
+ return false;
+
+ if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
+ {
+ const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
+ if (strcmp (section, ".ldata") == 0
+ || strcmp (section, ".lbss") == 0)
+ return true;
+ return false;
+ }
+ else
+ {
+ HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
+
+ /* If this is an incomplete type with size 0, then we can't put it
+ in data because it might be too big when completed. */
+ if (!size || size > ix86_section_threshold)
+ return true;
+ }
+
+ return false;
+}
+static void
+ix86_encode_section_info (tree decl, rtx rtl, int first)
+{
+ default_encode_section_info (decl, rtl, first);
+
+ if (TREE_CODE (decl) == VAR_DECL
+ && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
+ && ix86_in_large_data_p (decl))
+ SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
+}
+
+/* Worker function for REVERSE_CONDITION. */
+
+enum rtx_code
+ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
+{
+ return (mode != CCFPmode && mode != CCFPUmode
+ ? reverse_condition (code)
+ : reverse_condition_maybe_unordered (code));
+}
+
+/* Output code to perform an x87 FP register move, from OPERANDS[1]
+ to OPERANDS[0]. */
+
+const char *
+output_387_reg_move (rtx insn, rtx *operands)
+{
+ if (REG_P (operands[1])
+ && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
+ {
+ if (REGNO (operands[0]) == FIRST_STACK_REG)
+ return output_387_ffreep (operands, 0);
+ return "fstp\t%y0";
+ }
+ if (STACK_TOP_P (operands[0]))
+ return "fld%z1\t%y1";
+ return "fst\t%y0";
+}
+
+/* Output code to perform a conditional jump to LABEL, if C2 flag in
+ FP status register is set. */
+
+void
+ix86_emit_fp_unordered_jump (rtx label)
+{
+ rtx reg = gen_reg_rtx (HImode);
+ rtx temp;
+
+ emit_insn (gen_x86_fnstsw_1 (reg));
+
+ if (TARGET_USE_SAHF)
+ {
+ emit_insn (gen_x86_sahf_1 (reg));
+
+ temp = gen_rtx_REG (CCmode, FLAGS_REG);
+ temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
+ }
+ else
+ {
+ emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
+
+ temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
+ temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
+ }
+
+ temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
+ gen_rtx_LABEL_REF (VOIDmode, label),
+ pc_rtx);
+ temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
+ emit_jump_insn (temp);
+}
+
+/* Output code to perform a log1p XFmode calculation. */
+
+void ix86_emit_i387_log1p (rtx op0, rtx op1)
+{
+ rtx label1 = gen_label_rtx ();
+ rtx label2 = gen_label_rtx ();
+
+ rtx tmp = gen_reg_rtx (XFmode);
+ rtx tmp2 = gen_reg_rtx (XFmode);
+
+ emit_insn (gen_absxf2 (tmp, op1));
+ emit_insn (gen_cmpxf (tmp,
+ CONST_DOUBLE_FROM_REAL_VALUE (
+ REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
+ XFmode)));
+ emit_jump_insn (gen_bge (label1));
+
+ emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
+ emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1));
+ emit_jump (label2);
+
+ emit_label (label1);
+ emit_move_insn (tmp, CONST1_RTX (XFmode));
+ emit_insn (gen_addxf3 (tmp, op1, tmp));
+ emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
+ emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp));
+
+ emit_label (label2);
+}
+
+/* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
+
+static void
+i386_solaris_elf_named_section (const char *name, unsigned int flags,
+ tree decl)
+{
+ /* With Binutils 2.15, the "@unwind" marker must be specified on
+ every occurrence of the ".eh_frame" section, not just the first
+ one. */
+ if (TARGET_64BIT
+ && strcmp (name, ".eh_frame") == 0)
+ {
+ fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
+ flags & SECTION_WRITE ? "aw" : "a");
+ return;
+ }
+ default_elf_asm_named_section (name, flags, decl);
+}
+
+/* APPLE LOCAL begin regparmandstackparm */
+
+/* Mark this fndecl as using the regparmandstackparm calling convention. */
+static void
+ix86_make_regparmandstackparmee (tree *pt)
+{
+ decl_attributes (pt,
+ tree_cons (get_identifier ("regparmandstackparmee"),
+ NULL_TREE, TYPE_ATTRIBUTES (*pt)), 0);
+}
+
+/* Lookup fndecls marked 'regparmandstackparm', retrieve their $3SSE equivalents. */
+static splay_tree ix86_darwin_regparmandstackparm_st;
+/* Cache for regparmandstackparm fntypes. */
+static splay_tree ix86_darwin_fntype_st;
+
+/* Append "$3SSE" to an ID, returning a new IDENTIFIER_NODE. */
+static tree
+ix86_darwin_regparmandstackparm_mangle_name (tree id)
+{
+ static const char *mangle_suffix = "$3SSE";
+ unsigned int mangle_length = strlen (mangle_suffix);
+ const char *name;
+ unsigned int orig_length;
+ char *buf;
+
+ if (!id)
+ return NULL_TREE;
+
+ name = IDENTIFIER_POINTER (id);
+ orig_length = strlen (name);
+ buf = alloca (orig_length + mangle_length + 1);
+
+ strcpy (buf, name);
+ strcat (buf, mangle_suffix);
+ return get_identifier (buf); /* Expecting get_identifier to reallocate the string. */
+}
+
+/* Given the "normal" TRAD_FNDECL marked with 'regparmandstackparm',
+ return a duplicate fndecl marked 'regparmandstackparmee' (note trailing
+ 'ee'). Enter them as a pair in the splay tree ST, if non-null;
+ looking up the TRAD_FNDECL will return the new one. */
+static tree
+ix86_darwin_regparmandstackparm_dup_fndecl (tree trad_fndecl, splay_tree st)
+{
+ tree fntype;
+ tree new_fndecl;
+
+ fntype = TREE_TYPE (trad_fndecl);
+
+ /* NEW_FNDECL will be compiled with the XMM-based calling
+ convention, and TRAD_FNDECL (the original) will be compiled with
+ the traditional stack-based calling convention. */
+ new_fndecl = copy_node (trad_fndecl);
+ DECL_STRUCT_FUNCTION (new_fndecl) = (struct function *)0;
+ allocate_struct_function (new_fndecl);
+ DECL_STRUCT_FUNCTION (new_fndecl)->function_end_locus
+ = DECL_STRUCT_FUNCTION (trad_fndecl)->function_end_locus;
+ DECL_STRUCT_FUNCTION (new_fndecl)->static_chain_decl =
+ DECL_STRUCT_FUNCTION (trad_fndecl)->static_chain_decl;
+ DECL_RESULT (new_fndecl) = copy_node (DECL_RESULT (trad_fndecl));
+ DECL_CONTEXT (DECL_RESULT (new_fndecl)) = new_fndecl;
+ SET_DECL_ASSEMBLER_NAME (new_fndecl, 0);
+ DECL_NAME (new_fndecl) = ix86_darwin_regparmandstackparm_mangle_name (DECL_NAME (trad_fndecl));
+ TYPE_ATTRIBUTES (TREE_TYPE (new_fndecl))
+ = copy_list (TYPE_ATTRIBUTES (TREE_TYPE (trad_fndecl)));
+ ix86_make_regparmandstackparmee (&TREE_TYPE (new_fndecl));
+ /* Kludge: block copied from tree-inline.c(save_body). Should
+ be refactored into a common shareable routine. */
+ {
+ tree *parg;
+
+ for (parg = &DECL_ARGUMENTS (new_fndecl);
+ *parg;
+ parg = &TREE_CHAIN (*parg))
+ {
+ tree new = copy_node (*parg);
+
+ lang_hooks.dup_lang_specific_decl (new);
+ DECL_ABSTRACT_ORIGIN (new) = DECL_ORIGIN (*parg);
+ DECL_CONTEXT (new) = new_fndecl;
+ /* Note: it may be possible to move the original parameters
+ with the function body, making this splay tree
+ unnecessary. */
+ if (st)
+ splay_tree_insert (st, (splay_tree_key) *parg, (splay_tree_value) new);
+ TREE_CHAIN (new) = TREE_CHAIN (*parg);
+ *parg = new;
+ }
+
+ if (DECL_STRUCT_FUNCTION (trad_fndecl)->static_chain_decl)
+ {
+ tree old = DECL_STRUCT_FUNCTION (trad_fndecl)->static_chain_decl;
+ tree new = copy_node (old);
+
+ lang_hooks.dup_lang_specific_decl (new);
+ DECL_ABSTRACT_ORIGIN (new) = DECL_ORIGIN (old);
+ DECL_CONTEXT (new) = new_fndecl;
+ if (st)
+ splay_tree_insert (st, (splay_tree_key) old, (splay_tree_value) new);
+ TREE_CHAIN (new) = TREE_CHAIN (old);
+ DECL_STRUCT_FUNCTION (new_fndecl)->static_chain_decl = new;
+ }
+
+ if (st)
+ splay_tree_insert (st, (splay_tree_key) DECL_RESULT (trad_fndecl),
+ (splay_tree_value) DECL_RESULT (new_fndecl));
+ }
+#if 0
+ /* Testing Kludge: If TREE_READONLY is set, cgen can and
+ occasionally will delete "pure" (no side-effect) calls to a
+ library function. Cleared here to preclude this when
+ test-building libraries. */
+ TREE_READONLY (new_fndecl) = false;
+#endif
+
+ return new_fndecl;
+}
+
+/* FNDECL has no body, but user has marked it as a regparmandstackparm
+ item. Create a corresponding regparmandstackparm decl for it, and
+ arrange for calls to be redirected to the regparmandstackparm
+ version. */
+static tree
+ix86_darwin_regparmandstackparm_extern_decl (tree trad_fndecl)
+{
+ tree new_fndecl;
+
+ /* new_fndecl = ix86_darwin_regparmandstackparm_dup_fndecl (trad_fndecl, (splay_tree)0); */
+ new_fndecl = copy_node (trad_fndecl);
+ DECL_NAME (new_fndecl) = ix86_darwin_regparmandstackparm_mangle_name (DECL_NAME (trad_fndecl));
+ DECL_STRUCT_FUNCTION (new_fndecl) = (struct function *)0;
+ SET_DECL_ASSEMBLER_NAME (new_fndecl, 0);
+ ix86_make_regparmandstackparmee (&TREE_TYPE (new_fndecl));
+ cgraph_finalize_function (new_fndecl, /* nested = */ true);
+ if (!ix86_darwin_regparmandstackparm_st)
+ ix86_darwin_regparmandstackparm_st
+ = splay_tree_new (splay_tree_compare_pointers, NULL, NULL);
+ splay_tree_insert (ix86_darwin_regparmandstackparm_st,
+ (splay_tree_key) trad_fndecl, (splay_tree_value) new_fndecl);
+ return new_fndecl;
+}
+
+/* Invoked after all functions have been seen and digested, but before
+ any inlining decisions have been made. Walk the callgraph, seeking
+ calls to functions that have regparmandstackparm variants. Rewrite the
+ calls, directing them to the new 'regparmandstackparmee' versions. */
+void
+ix86_darwin_redirect_calls(void)
+{
+ struct cgraph_node *fastcall_node, *node;
+ struct cgraph_edge *edge, *next_edge;
+ tree addr, fastcall_decl, orig_fntype;
+ splay_tree_node call_stn, type_stn;
+
+ if (!flag_unit_at_a_time)
+ return;
+
+ if (!ix86_darwin_fntype_st)
+ ix86_darwin_fntype_st = splay_tree_new (splay_tree_compare_pointers, NULL, NULL);
+
+ if (!ix86_darwin_regparmandstackparm_st)
+ ix86_darwin_regparmandstackparm_st
+ = splay_tree_new (splay_tree_compare_pointers, NULL, NULL);
+
+ /* Extern decls marked "regparmandstackparm" beget regparmandstackparmee
+ decls. */
+ for (node = cgraph_nodes; node; node = node->next)
+ if (!DECL_SAVED_TREE (node->decl)
+ && lookup_attribute ("regparmandstackparm",
+ TYPE_ATTRIBUTES (TREE_TYPE (node->decl)))
+ && !lookup_attribute ("regparmandstackparmee",
+ TYPE_ATTRIBUTES (TREE_TYPE (node->decl))))
+ {
+ fastcall_decl = ix86_darwin_regparmandstackparm_extern_decl (node->decl);
+ splay_tree_insert (ix86_darwin_regparmandstackparm_st,
+ (splay_tree_key) node->decl,
+ (splay_tree_value) fastcall_decl);
+ }
+
+ /* Walk the callgraph, rewriting calls as we go. */
+ for (node = cgraph_nodes; node; node = node->next)
+ {
+ call_stn = splay_tree_lookup (ix86_darwin_regparmandstackparm_st,
+ (splay_tree_key)node->decl);
+ /* If this function was in our splay-tree, we previously created
+ a regparmandstackparm version of it. */
+ if (call_stn)
+ {
+ fastcall_decl = (tree)call_stn->value;
+ fastcall_node = cgraph_node (fastcall_decl);
+ /* Redirect all calls to this fn to the regparmandstackparm
+ version. */
+ for (edge = next_edge = node->callers ; edge ; edge = next_edge)
+ {
+ tree call, stmt;
+ next_edge = next_edge->next_caller;
+ cgraph_redirect_edge_callee (edge, fastcall_node);
+ /* APPLE LOCAL */
+ /* MERGE FIXME call_expr -> call_stmt */
+ stmt = edge->call_stmt;
+ call = get_call_expr_in (stmt);
+ addr = TREE_OPERAND (call, 0);
+ TREE_OPERAND (addr, 0) = fastcall_decl;
+ orig_fntype = TREE_TYPE (addr);
+ /* Likewise, revise the TYPE of the ADDR node between
+ the CALL_EXPR and the FNDECL. This type determines
+ the parameters and calling convention applied to this
+ CALL_EXPR. */
+ type_stn = splay_tree_lookup (ix86_darwin_fntype_st, (splay_tree_value)orig_fntype);
+ if (type_stn)
+ TREE_TYPE (addr) = (tree)type_stn->value;
+ else
+ {
+ ix86_make_regparmandstackparmee (&TREE_TYPE (addr));
+ splay_tree_insert (ix86_darwin_fntype_st,
+ (splay_tree_key)orig_fntype,
+ (splay_tree_value)TREE_TYPE (addr));
+ }
+ }
+ }
+ }
+}
+
+/* Information necessary to re-context a function body. */
+typedef struct {
+ tree old_context;
+ tree new_context;
+ splay_tree decl_map;
+} recontext_data;
+
+/* Visit every node of a function body; if it points at the
+ OLD_CONTEXT, re-direct it to the NEW_CONTEXT. Invoked via
+ walk_tree. DECL_MAP is a splay tree that maps the original
+ parameters to new ones. */
+static tree
+ix86_darwin_re_context_1 (tree *tp, int *walk_subtrees ATTRIBUTE_UNUSED, void *data ATTRIBUTE_UNUSED)
+{
+ tree t;
+ recontext_data *rcd;
+ enum tree_code_class class;
+ splay_tree_node n;
+
+ if (!tp)
+ return NULL_TREE;
+
+ t = *tp;
+ if (!t)
+ return NULL_TREE;
+
+ rcd = (recontext_data *)data;
+ n = splay_tree_lookup (rcd->decl_map, (splay_tree_key) t);
+ if (n)
+ {
+ *tp = (tree)n->value;
+ return NULL_TREE;
+ }
+
+ class = TREE_CODE_CLASS (TREE_CODE (t));
+ if (class != tcc_declaration)
+ return NULL_TREE;
+
+ if (DECL_CONTEXT (t) == rcd->old_context)
+ DECL_CONTEXT (t) = rcd->new_context;
+
+ return NULL_TREE;
+}
+
+/* Walk a function body, updating every pointer to OLD_CONTEXT to
+ NEW_CONTEXT. TP is the top of the function body, and ST is a splay
+ tree of replacements for the parameters. */
+static tree
+ix86_darwin_re_context (tree *tp, tree old_context, tree new_context, splay_tree st)
+{
+ recontext_data rcd;
+ tree ret;
+
+ rcd.old_context = old_context;
+ rcd.new_context = new_context;
+ rcd.decl_map = st;
+
+ ret = walk_tree (tp, ix86_darwin_re_context_1,
+ (void *)&rcd, (struct pointer_set_t *)0);
+ return ret;
+}
+
+/* Given TRAD_FNDECL, create a regparmandstackparm variant and hang the
+ DECL_SAVED_TREE body there. Create a new, one-statement body for
+ TRAD_FNDECL that calls the new one. If the return types are
+ compatible (e.g. non-FP), the call can usually be sibcalled. The
+ inliner will often copy the body from NEW_FNDECL into TRAD_FNDECL,
+ and we do nothing to prevent this. */
+static void
+ix86_darwin_regparmandstackparm_wrapper (tree trad_fndecl)
+{
+ tree new_fndecl;
+ splay_tree st;
+ tree bind, block, call, clone_parm, modify, parmlist, rdecl, rtn, stmt_list, type;
+ tree_stmt_iterator tsi;
+
+ st = splay_tree_new (splay_tree_compare_pointers, NULL, NULL);
+ new_fndecl = ix86_darwin_regparmandstackparm_dup_fndecl (trad_fndecl, st);
+
+ for (parmlist = NULL, clone_parm = DECL_ARGUMENTS (trad_fndecl);
+ clone_parm;
+ clone_parm = TREE_CHAIN (clone_parm))
+ {
+ gcc_assert (clone_parm);
+ DECL_ABSTRACT_ORIGIN (clone_parm) = NULL;
+ parmlist = tree_cons (NULL, clone_parm, parmlist);
+ }
+
+ /* We built this list backwards; fix now. */
+ parmlist = nreverse (parmlist);
+ type = TREE_TYPE (TREE_TYPE (trad_fndecl));
+ call = build_function_call (new_fndecl, parmlist);
+ TREE_TYPE (call) = type;
+ if (type == void_type_node)
+ rtn = call;
+ else if (0 && ix86_return_in_memory (type))
+ {
+ /* Return without a RESULT_DECL: RETURN_EXPR (CALL). */
+ rtn = make_node (RETURN_EXPR);
+ TREE_OPERAND (rtn, 0) = call;
+ TREE_TYPE (rtn) = type;
+ }
+ else /* RETURN_EXPR(MODIFY(RESULT_DECL, CALL)). */
+ {
+ rdecl = make_node (RESULT_DECL);
+ TREE_TYPE (rdecl) = type;
+ DECL_MODE (rdecl) = TYPE_MODE (type);
+ DECL_RESULT (trad_fndecl) = rdecl;
+ DECL_CONTEXT (rdecl) = trad_fndecl;
+ modify = build_modify_expr (rdecl, NOP_EXPR, call);
+ TREE_TYPE (modify) = type;
+ rtn = make_node (RETURN_EXPR);
+ TREE_OPERAND (rtn, 0) = modify;
+ TREE_TYPE (rtn) = type;
+ }
+ stmt_list = alloc_stmt_list ();
+ tsi = tsi_start (stmt_list);
+ tsi_link_after (&tsi, rtn, TSI_NEW_STMT);
+
+ /* This wrapper consists of "return <my_name>$3SSE (<my_arguments>);"
+ thus it has no local variables. */
+ block = make_node (BLOCK);
+ TREE_USED (block) = true;
+ bind = make_node (BIND_EXPR);
+ BIND_EXPR_BLOCK (bind) = block;
+ BIND_EXPR_BODY (bind) = stmt_list;
+ TREE_TYPE (bind) = void_type_node;
+ TREE_SIDE_EFFECTS (bind) = true;
+
+ DECL_SAVED_TREE (trad_fndecl) = bind;
+
+ /* DECL_ABSTRACT_ORIGIN (new_fndecl) = NULL; *//* ? */
+
+ ix86_darwin_re_context (&new_fndecl, trad_fndecl, new_fndecl, st);
+ ix86_darwin_re_context (&DECL_SAVED_TREE (new_fndecl), trad_fndecl, new_fndecl, st);
+ splay_tree_delete (st);
+ gimplify_function_tree (new_fndecl);
+ cgraph_finalize_function (new_fndecl, /* nested = */ true);
+ gimplify_function_tree (trad_fndecl);
+ if (!ix86_darwin_regparmandstackparm_st)
+ ix86_darwin_regparmandstackparm_st
+ = splay_tree_new (splay_tree_compare_pointers, NULL, NULL);
+ splay_tree_insert (ix86_darwin_regparmandstackparm_st,
+ (splay_tree_key) trad_fndecl, (splay_tree_value) new_fndecl);
+}
+
+/* Entry point into the regparmandstackparm stuff. FNDECL might be marked
+ 'regparmandstackparm'; if it is, create the fast version, &etc. */
+void
+ix86_darwin_handle_regparmandstackparm (tree fndecl)
+{
+ static unsigned int already_running = 0;
+
+ /* We don't support variable-argument functions yet. */
+ if (!fndecl || already_running)
+ return;
+
+ already_running++;
+
+ if (lookup_attribute ("regparmandstackparm", TYPE_ATTRIBUTES (TREE_TYPE (fndecl)))
+ && !lookup_attribute ("regparmandstackparmee", TYPE_ATTRIBUTES (TREE_TYPE (fndecl))))
+ {
+ if (DECL_STRUCT_FUNCTION (fndecl) && DECL_STRUCT_FUNCTION (fndecl)->stdarg)
+ error ("regparmandstackparm is incompatible with varargs");
+ else if (DECL_SAVED_TREE (fndecl))
+ ix86_darwin_regparmandstackparm_wrapper (fndecl);
+ }
+
+ already_running--;
+}
+/* APPLE LOCAL end regparmandstackparm */
+
+/* APPLE LOCAL begin CW asm blocks */
+#include <ctype.h>
+#include "config/asm.h"
+
+/* Addition register names accepted for inline assembly that would
+ otherwise not be registers. This table must be sorted for
+ bsearch. */
+static const char *iasm_additional_names[] = {
+ "AH", "AL", "AX", "BH", "BL", "BP", "BX", "CH", "CL", "CX", "DH",
+ "DI", "DL", "DX", "EAX", "EBP", "EBX", "ECX", "EDI", "EDX", "ESI",
+ "ESP", "MM0", "MM1", "MM2", "MM3", "MM4", "MM5", "MM6", "MM7", "R10",
+ "R11", "R12", "R13", "R14", "R15", "R8", "R9", "RAX", "RBP", "RBX",
+ "RCX", "RDI", "RDX", "RSI", "RSP", "SI", "SP", "ST", "ST(1)", "ST(2)",
+ "ST(3)", "ST(4)", "ST(5)", "ST(6)", "ST(7)", "XMM0", "XMM1", "XMM10",
+ "XMM11", "XMM12", "XMM13", "XMM14", "XMM15", "XMM2", "XMM3", "XMM4",
+ "XMM5", "XMM6", "XMM7", "XMM8", "XMM9" };
+
+/* Comparison function for bsearch to find additional register names. */
+static int
+iasm_reg_comp (const void *a, const void *b)
+{
+ char *const*x = a;
+ char *const*y = b;
+ int c = strcasecmp (*x, *y);
+ return c;
+}
+
+/* Translate some register names seen in CW asm into GCC standard
+ forms. */
+
+const char *
+i386_iasm_register_name (const char *regname, char *buf)
+{
+ const char **r;
+
+ /* If we can find the named register, return it. */
+ if (decode_reg_name (regname) >= 0)
+ {
+ if (ASSEMBLER_DIALECT == ASM_INTEL)
+ return regname;
+ sprintf (buf, "%%%s", regname);
+ return buf;
+ }
+
+ /* If we can find a lower case version of any registers in
+ additional_names, return it. */
+ r = bsearch (&regname, iasm_additional_names,
+ sizeof (iasm_additional_names) / sizeof (iasm_additional_names[0]),
+ sizeof (iasm_additional_names[0]), iasm_reg_comp);
+ if (r)
+ {
+ char *p;
+ const char *q;
+ q = regname = *r;
+ p = buf;
+ if (ASSEMBLER_DIALECT != ASM_INTEL)
+ *p++ = '%';
+ regname = p;
+ while ((*p++ = tolower (*q++)))
+ ;
+ if (decode_reg_name (regname) >= 0)
+ return buf;
+ }
+
+ return NULL;
+}
+
+/* Return true iff the opcode wants memory to be stable. We arrange
+ for a memory clobber in these instances. */
+bool
+iasm_memory_clobber (const char *ARG_UNUSED (opcode))
+{
+ return true;
+}
+
+/* Return true iff the operands need swapping. */
+
+bool
+iasm_x86_needs_swapping (const char *opcode)
+{
+ /* Don't swap if output format is the same as input format. */
+ if (ASSEMBLER_DIALECT == ASM_INTEL)
+ return false;
+
+ /* These don't need swapping. */
+ if (strcasecmp (opcode, "bound") == 0)
+ return false;
+ if (strcasecmp (opcode, "invlpga") == 0)
+ return false;
+ if (opcode[0] == ' ' && iasm_is_pseudo (opcode+1))
+ return false;
+
+ return true;
+}
+
+/* Swap operands, given in MS-style asm ordering when the output style
+ is in ATT syntax. */
+
+static tree
+iasm_x86_swap_operands (const char *opcode, tree args)
+{
+ int noperands;
+
+ if (iasm_x86_needs_swapping (opcode) == false)
+ return args;
+
+#if 0
+ /* GAS also checks the type of the arguments to determine if they
+ need swapping. */
+ if ((argtype[0]&Imm) && (argtype[1]&Imm))
+ return args;
+#endif
+ noperands = list_length (args);
+ if (noperands == 2 || noperands == 3)
+ {
+ /* Swap first and last (1 and 2 or 1 and 3). */
+ return nreverse (args);
+ }
+ return args;
+}
+
+/* Map a register name to a high level tree type for a VAR_DECL of
+ that type, whose RTL will refer to the given register. */
+
+static tree
+iasm_type_for (tree arg)
+{
+ tree type = NULL_TREE;
+
+ if (IDENTIFIER_LENGTH (arg) > 2
+ && IDENTIFIER_POINTER (arg)[0] == '%')
+ {
+ enum machine_mode mode = VOIDmode;
+ if (IDENTIFIER_POINTER (arg)[1] == 'e')
+ mode = SImode;
+ else if (/* IDENTIFIER_POINTER (arg)[2] == 'h'
+ || */ IDENTIFIER_POINTER (arg)[2] == 'l')
+ mode = QImode;
+ else if (IDENTIFIER_POINTER (arg)[2] == 'x')
+ mode = HImode;
+ else if (IDENTIFIER_POINTER (arg)[1] == 'r')
+ mode = DImode;
+ else if (IDENTIFIER_POINTER (arg)[1] == 'x')
+ mode = SFmode;
+ else if (IDENTIFIER_POINTER (arg)[1] == 'm')
+ mode = SFmode;
+
+ if (mode != VOIDmode)
+ type = lang_hooks.types.type_for_mode (mode, 1);
+ }
+
+ return type;
+}
+
+/* We raise the code from a named register into a VAR_DECL of an
+ appropriate type that refers to the register so that reload doesn't
+ run out of registers. */
+
+tree
+iasm_raise_reg (tree arg)
+{
+ int regno = decode_reg_name (IDENTIFIER_POINTER (arg));
+ if (regno >= 0)
+ {
+ tree decl = NULL_TREE;
+
+ decl = lookup_name (arg);
+ if (decl == error_mark_node)
+ decl = 0;
+ if (decl == 0)
+ {
+ tree type = iasm_type_for (arg);
+ if (type)
+ {
+ decl = build_decl (VAR_DECL, arg, type);
+ DECL_ARTIFICIAL (decl) = 1;
+ DECL_REGISTER (decl) = 1;
+ C_DECL_REGISTER (decl) = 1;
+ DECL_HARD_REGISTER (decl) = 1;
+ set_user_assembler_name (decl, IDENTIFIER_POINTER (arg));
+ decl = lang_hooks.decls.pushdecl (decl);
+ }
+ }
+
+ if (decl)
+ return decl;
+ }
+
+ return arg;
+}
+
+/* Allow constants and readonly variables to be used in instructions
+ in places that require constants. */
+
+static tree
+iasm_default_conv (tree e)
+{
+ if (e == NULL_TREE)
+ return e;
+
+ if (TREE_CODE (e) == CONST_DECL)
+ e = DECL_INITIAL (e);
+
+ if (DECL_P (e) && DECL_MODE (e) != BLKmode)
+ e = decl_constant_value (e);
+ return e;
+}
+
+/* Return true iff the operand is suitible for as the offset for a
+ memory instruction. */
+
+static bool
+iasm_is_offset (tree v)
+{
+ if (TREE_CODE (v) == INTEGER_CST)
+ return true;
+ if (TREE_CODE (v) == ADDR_EXPR)
+ {
+ v = TREE_OPERAND (v, 0);
+ if (TREE_CODE (v) == VAR_DECL
+ && TREE_STATIC (v)
+ && MEM_P (DECL_RTL (v)))
+ {
+ note_alternative_entry_points ();
+ return true;
+ }
+ if (TREE_CODE (v) == LABEL_DECL)
+ return true;
+ return false;
+ }
+ if (TREE_CODE (v) == VAR_DECL
+ && TREE_STATIC (v)
+ && MEM_P (DECL_RTL (v)))
+ {
+ note_alternative_entry_points ();
+ return true;
+ }
+ if ((TREE_CODE (v) == MINUS_EXPR
+ || TREE_CODE (v) == PLUS_EXPR)
+ && iasm_is_offset (TREE_OPERAND (v, 0))
+ && iasm_is_offset (TREE_OPERAND (v, 1)))
+ return true;
+ if (TREE_CODE (v) == NEGATE_EXPR
+ && iasm_is_offset (TREE_OPERAND (v, 0)))
+ return true;
+
+ return false;
+}
+
+/* Combine two types for [] expressions. */
+
+static tree
+iasm_combine_type (tree type0, tree type1)
+{
+ if (type0 == void_type_node
+ || type0 == NULL_TREE)
+ {
+ if (type1 == void_type_node)
+ return NULL_TREE;
+ return type1;
+ }
+
+ if (type1 == void_type_node
+ || type1 == NULL_TREE)
+ return type0;
+
+ if (type0 == type1)
+ return type0;
+
+ error ("too many types in []");
+
+ return type0;
+}
+
+/* We canonicalize the inputs form of bracket expressions as the input
+ forms are less constrained than what the assembler will accept.
+
+ TOP is the top of the canonical tree we're generating and
+ TREE_OPERAND (, 0) is the offset portion of the expression. ARGP
+ points to the current part of the tree we're walking.
+
+ The tranformations we do:
+
+ (A+O) ==> A
+ (A-O) ==> A
+ (O+A) ==> A
+
+ where O are offset expressions. */
+
+static tree
+iasm_canonicalize_bracket_1 (tree* argp, tree top)
+{
+ tree arg = *argp;
+ tree offset = TREE_OPERAND (top, 0);
+ tree arg0, arg1;
+ tree rtype = NULL_TREE;
+
+ *argp = arg = iasm_default_conv (arg);
+
+ switch (TREE_CODE (arg))
+ {
+ case NOP_EXPR:
+ if (TREE_CODE (TREE_TYPE (arg)) == IDENTIFIER_NODE)
+ {
+ *argp = TREE_OPERAND (arg, 0);
+ return TREE_TYPE (arg);
+ }
+ break;
+
+ case BRACKET_EXPR:
+ rtype = TREE_TYPE (arg);
+ /* fall thru */
+ case PLUS_EXPR:
+ arg0 = TREE_OPERAND (arg, 0);
+ arg1 = TREE_OPERAND (arg, 1);
+
+ arg0 = iasm_default_conv (arg0);
+ arg1 = iasm_default_conv (arg1);
+
+ if (iasm_is_offset (arg0))
+ {
+ if (offset != integer_zero_node)
+ arg0 = build2 (PLUS_EXPR, void_type_node, arg0, offset);
+ TREE_OPERAND (top, 0) = arg0;
+
+ *argp = arg1;
+ if (arg1)
+ return iasm_combine_type (rtype, iasm_canonicalize_bracket_1 (argp, top));
+ }
+ else if (arg1 && iasm_is_offset (arg1))
+ {
+ if (offset != integer_zero_node)
+ arg1 = build2 (PLUS_EXPR, void_type_node, arg1, offset);
+ TREE_OPERAND (top, 0) = arg1;
+ *argp = arg0;
+ return iasm_combine_type (rtype, iasm_canonicalize_bracket_1 (argp, top));
+ }
+ else
+ {
+ rtype = iasm_combine_type (rtype,
+ iasm_canonicalize_bracket_1 (&TREE_OPERAND (arg, 0), top));
+
+ if (arg1)
+ rtype = iasm_combine_type (rtype,
+ iasm_canonicalize_bracket_1 (&TREE_OPERAND (arg, 1), top));
+ if (TREE_OPERAND (arg, 0) == NULL_TREE)
+ {
+ if (TREE_OPERAND (arg, 1))
+ {
+ TREE_OPERAND (arg, 0) = TREE_OPERAND (arg, 1);
+ TREE_OPERAND (arg, 1) = NULL_TREE;
+ }
+ else
+ *argp = NULL_TREE;
+ }
+ else if (TREE_OPERAND (arg, 1) == NULL_TREE && rtype == NULL_TREE)
+ *argp = TREE_OPERAND (arg, 0);
+ if (TREE_CODE (arg) == PLUS_EXPR
+ && TREE_TYPE (arg) == NULL_TREE
+ && TREE_TYPE (TREE_OPERAND (arg, 0))
+ && TREE_TYPE (TREE_OPERAND (arg, 1))
+ && (POINTER_TYPE_P (TREE_TYPE (TREE_OPERAND (arg, 1)))
+ || POINTER_TYPE_P (TREE_TYPE (TREE_OPERAND (arg, 0)))))
+ {
+ tree type = TREE_TYPE (TREE_OPERAND (arg, 1));
+ if (INTEGRAL_TYPE_P (type))
+ type = TREE_TYPE (TREE_OPERAND (arg, 0));
+ TREE_TYPE (arg) = type;
+ }
+ if (TREE_CODE (arg) == PLUS_EXPR
+ && TREE_TYPE (arg) == NULL_TREE
+ && TREE_TYPE (TREE_OPERAND (arg, 0))
+ && TREE_TYPE (TREE_OPERAND (arg, 0)) == TREE_TYPE (TREE_OPERAND (arg, 1)))
+ {
+ tree type = TREE_TYPE (TREE_OPERAND (arg, 0));
+ TREE_TYPE (arg) = type;
+ }
+ }
+ return rtype;
+
+ case MINUS_EXPR:
+ rtype = iasm_canonicalize_bracket_1 (&TREE_OPERAND (arg, 0), top);
+ arg0 = TREE_OPERAND (arg, 0);
+ arg1 = TREE_OPERAND (arg, 1);
+ arg1 = iasm_default_conv (arg1);
+ if (iasm_is_offset (arg1))
+ {
+ offset = TREE_OPERAND (top, 0);
+ if (offset == integer_zero_node)
+ arg1 = fold (build1 (NEGATE_EXPR,
+ TREE_TYPE (arg1),
+ arg1));
+ else
+ arg1 = build2 (MINUS_EXPR, void_type_node, offset, arg1);
+ TREE_OPERAND (top, 0) = arg1;
+ *argp = arg0;
+ return iasm_combine_type (rtype, iasm_canonicalize_bracket_1 (argp, top));;
+ }
+ return rtype;
+
+ case PARM_DECL:
+ case VAR_DECL:
+ {
+ *argp = iasm_addr (arg);
+ break;
+ }
+
+ case IDENTIFIER_NODE:
+ {
+ *argp = iasm_raise_reg (arg);
+ break;
+ }
+
+ case MULT_EXPR:
+ if (TREE_TYPE (arg) == NULL_TREE)
+ {
+ if (TREE_CODE (TREE_OPERAND (arg, 1)) == IDENTIFIER_NODE)
+ TREE_OPERAND (arg, 1) = iasm_raise_reg (TREE_OPERAND (arg, 1));
+ if (TREE_CODE (TREE_OPERAND (arg, 0)) == IDENTIFIER_NODE)
+ TREE_OPERAND (arg, 0) = iasm_raise_reg (TREE_OPERAND (arg, 0));
+ if (TREE_TYPE (TREE_OPERAND (arg, 0))
+ && TREE_TYPE (TREE_OPERAND (arg, 1)))
+ TREE_TYPE (arg) = TREE_TYPE (TREE_OPERAND (arg, 0));
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ return NULL_TREE;
+}
+
+/* Form an indirection for an inline asm address expression operand.
+ We give a warning when we think the optimizer might have to be used
+ to reform complex addresses, &stack_var + %eax + 4 for example,
+ after gimplification rips the address apart. */
+
+static tree
+iasm_indirect (tree addr)
+{
+ if (TREE_CODE (addr) == ADDR_EXPR
+ && TREE_CODE (TREE_TYPE (TREE_OPERAND (addr, 0))) != ARRAY_TYPE
+ /* && TREE_CODE (TREE_OPERAND (addr, 0)) == ARRAY_REF */)
+ return TREE_OPERAND (addr, 0);
+
+ addr = fold (build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (addr)), addr));
+
+ if (! optimize && TREE_CODE (addr) == INDIRECT_REF)
+ warning (0, "addressing mode too complex when not optimizing, will consume extra register(s)");
+
+ return addr;
+}
+
+/* For an address addition for an inline asm address expression. We
+ try and form ARRAY_REFs, as they will go through gimplification
+ without being ripped apart. */
+
+static tree
+iasm_add (tree addr, tree off)
+{
+ if (integer_zerop (off))
+ return addr;
+
+ /* We have to convert the offset to an int type, as we rip apart
+ trees whose type has been converted to a pointer type for the
+ offset already. */
+ return pointer_int_sum (PLUS_EXPR, addr, convert (integer_type_node, off));
+}
+
+/* We canonicalize the inputs form of bracket expressions as the input
+ forms are less constrained than what the assembler will accept. */
+
+static tree
+iasm_canonicalize_bracket (tree arg)
+{
+ tree rtype;
+
+ gcc_assert (TREE_CODE (arg) == BRACKET_EXPR);
+
+ /* Let the normal operand printer output this without trying to
+ decompose it into parts so that things like (%esp + 20) + 4 can
+ be output as 24(%esp) by the optimizer instead of 4(%0) and
+ burning an "R" with (%esp + 20). */
+ if (TREE_OPERAND (arg, 1) == NULL_TREE
+ && TREE_TYPE (TREE_OPERAND (arg, 0))
+ && POINTER_TYPE_P (TREE_TYPE (TREE_OPERAND (arg, 0))))
+ {
+ if (TREE_CODE (TREE_OPERAND (arg, 0)) == VAR_DECL
+ || TREE_CODE (TREE_OPERAND (arg, 0)) == PARM_DECL)
+ return arg;
+ return iasm_indirect (TREE_OPERAND (arg, 0));
+ }
+
+ /* Ensure that 0 is an offset */
+ if (TREE_OPERAND (arg, 0)
+ && iasm_is_offset (TREE_OPERAND (arg, 0)))
+ {
+ /* we win if 0 is an offset already. */
+ }
+ else if (TREE_OPERAND (arg, 1) == NULL_TREE)
+ {
+ /* Move 0 to 1, if 1 is empty and 0 isn't already an offset */
+ TREE_OPERAND (arg, 1) = TREE_OPERAND (arg, 0);
+ TREE_OPERAND (arg, 0) = integer_zero_node;
+ }
+ else
+ {
+ tree swp;
+ /* Just have to force it now */
+ swp = iasm_build_bracket (TREE_OPERAND (arg, 0), TREE_OPERAND (arg, 1));
+ TREE_OPERAND (arg, 0) = integer_zero_node;
+ TREE_OPERAND (arg, 1) = swp;
+ }
+
+ if (TREE_OPERAND (arg, 1))
+ {
+ rtype = iasm_canonicalize_bracket_1 (&TREE_OPERAND (arg, 1), arg);
+ if (rtype)
+ TREE_TYPE (arg) = iasm_combine_type (TREE_TYPE (arg), rtype);
+ }
+
+ /* For correctness, pointer types should be raised to the tree
+ level, as they denote address calculations with stack based
+ objects, and we want print_operand to print the entire address so
+ that it can combine contants and hard registers into the address.
+ Unfortunnately we might have to rely upon the optimizer to reform
+ the address after the gimplification pass rips it apart. */
+
+ /* Handle [INTEGER_CST][ptr][op3] */
+ if (TREE_OPERAND (arg, 1)
+ && TREE_CODE (TREE_OPERAND (arg, 0)) == INTEGER_CST
+ && TREE_CODE (TREE_OPERAND (arg, 1)) == BRACKET_EXPR
+ && TREE_TYPE (TREE_OPERAND (TREE_OPERAND (arg, 1), 0))
+ && POINTER_TYPE_P (TREE_TYPE (TREE_OPERAND (TREE_OPERAND (arg, 1), 0)))
+ && TREE_TYPE (TREE_TYPE (TREE_OPERAND (TREE_OPERAND (arg, 1), 0))) != void_type_node
+ && (TREE_TYPE (arg) == void_type_node
+ || (TREE_TYPE (arg) == get_identifier ("word")
+ && (TYPE_MODE (TREE_TYPE (TREE_TYPE (TREE_OPERAND (TREE_OPERAND (arg, 1), 0))))
+ == HImode))))
+ {
+ tree op3 = TREE_OPERAND (TREE_OPERAND (arg, 1), 1);
+ tree addr = iasm_add (TREE_OPERAND (TREE_OPERAND (arg, 1), 0),
+ TREE_OPERAND (arg, 0));
+ tree type;
+ addr = iasm_indirect (addr);
+ if (op3 == NULL_TREE)
+ return addr;
+ type = TREE_TYPE (addr);
+ type = build_pointer_type (type);
+ addr = build1 (ADDR_EXPR, type, addr);
+ addr = fold (build2 (PLUS_EXPR, type, addr, op3));
+ return iasm_indirect (addr);
+ }
+
+ /* Handle ptr + INTEGER_CST */
+ if (TREE_OPERAND (arg, 1)
+ && TREE_TYPE (arg) == void_type_node
+ && TREE_TYPE (TREE_OPERAND (arg, 1))
+ && POINTER_TYPE_P (TREE_TYPE (TREE_OPERAND (arg, 1)))
+ && TREE_TYPE (TREE_TYPE (TREE_OPERAND (arg, 1))) != void_type_node)
+ {
+ if (TREE_CODE (TREE_OPERAND (arg, 1)) == ADDR_EXPR)
+ {
+ if (TREE_OPERAND (arg, 0) == integer_zero_node)
+ return TREE_OPERAND (TREE_OPERAND (arg, 1), 0);
+ if (TREE_CODE (TREE_OPERAND (arg, 0)) == INTEGER_CST)
+ return iasm_indirect (iasm_add (TREE_OPERAND (arg, 1), TREE_OPERAND (arg, 0)));
+ }
+ if (TREE_CODE (TREE_OPERAND (arg, 1)) == PLUS_EXPR)
+ {
+ if (TREE_OPERAND (arg, 0) == integer_zero_node)
+ return iasm_indirect (TREE_OPERAND (arg, 1));
+ if (TREE_CODE (TREE_OPERAND (arg, 0)) == INTEGER_CST)
+ return iasm_indirect (iasm_add (TREE_OPERAND (arg, 1), TREE_OPERAND (arg, 0)));
+ }
+ }
+ return arg;
+}
+
+/* We canonicalize the instruction by swapping operands and rewritting
+ the opcode if the output style is in ATT syntax. */
+
+tree
+iasm_x86_canonicalize_operands (const char **opcode_p, tree iargs, void *ep)
+{
+ iasm_md_extra_info *e = ep;
+ static char buf[40];
+ tree args = iargs;
+ int argnum = 1;
+ const char *opcode = *opcode_p;
+ bool fp_style = false;
+ bool fpi_style = false;
+
+ /* Don't transform if output format is the same as input format. */
+ if (ASSEMBLER_DIALECT == ASM_INTEL)
+ return iargs;
+
+ if (strncasecmp (opcode, "f", 1) == 0)
+ fp_style = true;
+
+ if (fp_style
+ && strncasecmp (opcode+1, "i", 1) == 0)
+ fpi_style = true;
+
+ while (args)
+ {
+ tree arg = TREE_VALUE (args);
+
+ /* Handle st(3) */
+ if (TREE_CODE (arg) == COMPOUND_EXPR
+ && TREE_CODE (TREE_OPERAND (arg, 0)) == IDENTIFIER_NODE
+ && strcasecmp (IDENTIFIER_POINTER (TREE_OPERAND (arg, 0)), "%st") == 0
+ && TREE_CODE (TREE_OPERAND (arg, 1)) == INTEGER_CST)
+ {
+ int v = tree_low_cst (TREE_OPERAND (arg, 1), 0);
+
+ if (v < 0 || v > 7)
+ {
+ error ("unknown floating point register st(%d)", v);
+ v = 0;
+ }
+
+ /* Rewrite %st(0) to %st. */
+ if (v == 0)
+ TREE_VALUE (args) = TREE_OPERAND (arg, 0);
+ else
+ {
+ char buf[20];
+ sprintf (buf, "%%st(%d)", v);
+ TREE_VALUE (args) = get_identifier (buf);
+ }
+ }
+ else if (TREE_CODE (arg) == BRACKET_EXPR)
+ TREE_VALUE (args) = arg = iasm_canonicalize_bracket (arg);
+
+ switch (TREE_CODE (arg))
+ {
+ case ARRAY_REF:
+ case VAR_DECL:
+ case PARM_DECL:
+ case INDIRECT_REF:
+ if (TYPE_MODE (TREE_TYPE (arg)) == QImode)
+ e->mod[argnum-1] = 'b';
+ else if (TYPE_MODE (TREE_TYPE (arg)) == HImode)
+ e->mod[argnum-1] = fpi_style ? 's' : 'w';
+ else if (TYPE_MODE (TREE_TYPE (arg)) == SImode)
+ e->mod[argnum-1] = fpi_style ? 'l' : (fp_style ? 's' : 'l');
+ else if (TYPE_MODE (TREE_TYPE (arg)) == DImode)
+ e->mod[argnum-1] = 'q';
+ else if (TYPE_MODE (TREE_TYPE (arg)) == SFmode)
+ e->mod[argnum-1] = 's';
+ else if (TYPE_MODE (TREE_TYPE (arg)) == DFmode)
+ e->mod[argnum-1] = 'l';
+ else if (TYPE_MODE (TREE_TYPE (arg)) == XFmode)
+ e->mod[argnum-1] = 't';
+ break;
+ case BRACKET_EXPR:
+ /* We use the TREE_TYPE to indicate the type of operand, it
+ it set with code like: inc dword ptr [eax]. */
+ if (TREE_CODE (TREE_TYPE (arg)) == IDENTIFIER_NODE)
+ {
+ const char *s = IDENTIFIER_POINTER (TREE_TYPE (arg));
+ if (strcasecmp (s, "byte") == 0)
+ e->mod[argnum-1] = 'b';
+ else if (strcasecmp (s, "word") == 0)
+ e->mod[argnum-1] = fpi_style ? 's' : 'w';
+ else if (strcasecmp (s, "dword") == 0)
+ e->mod[argnum-1] = fpi_style ? 'l' : (fp_style ? 's' : 'l');
+ else if (strcasecmp (s, "qword") == 0)
+ e->mod[argnum-1] = 'q';
+ else if (strcasecmp (s, "real4") == 0)
+ e->mod[argnum-1] = 's';
+ else if (strcasecmp (s, "real8") == 0)
+ e->mod[argnum-1] = 'l';
+ else if (strcasecmp (s, "real10") == 0)
+ e->mod[argnum-1] = 't';
+ else if (strcasecmp (s, "tbyte") == 0)
+ e->mod[argnum-1] = 't';
+ }
+ break;
+ case LABEL_DECL:
+ e->mod[argnum-1] = 'l';
+ break;
+ case IDENTIFIER_NODE:
+ if (IDENTIFIER_LENGTH (arg) > 2
+ && IDENTIFIER_POINTER (arg)[0] == '%')
+ {
+ if (IDENTIFIER_POINTER (arg)[1] == 'e')
+ e->mod[argnum-1] = 'l';
+ else if (IDENTIFIER_POINTER (arg)[2] == 'h'
+ || IDENTIFIER_POINTER (arg)[2] == 'l')
+ e->mod[argnum-1] = 'b';
+ else if (IDENTIFIER_POINTER (arg)[2] == 'x')
+ e->mod[argnum-1] = 'w';
+ }
+ break;
+ default:
+ break;
+ }
+ args = TREE_CHAIN (args);
+ ++argnum;
+ }
+ --argnum;
+
+ args = iasm_x86_swap_operands (opcode, iargs);
+ if (opcode[0] == ' ' && iasm_is_pseudo (opcode+1))
+ e->pseudo = true;
+
+ if (strcasecmp (opcode, "movs") == 0
+ || strcasecmp (opcode, "scas") == 0
+ || strcasecmp (opcode, "stos") == 0
+ || strcasecmp (opcode, "xlat") == 0)
+ args = NULL_TREE;
+ else if (strcasecmp (opcode, "cmovpo") == 0)
+ opcode = "cmovnp";
+ else if (strcasecmp (opcode, "cmovpe") == 0)
+ opcode = "cmovp";
+ else if (strcasecmp (opcode, "outs") == 0
+ && TREE_CHAIN (args))
+ {
+ e->mod[0] = e->mod[1];
+ }
+ else if (strcasecmp (opcode, "ins") == 0
+ && TREE_CHAIN (args))
+ {
+ e->mod[1] = 0;
+ }
+ /* movsx isn't part of the AT&T syntax, they spell it movs. */
+ else if (strcasecmp (opcode, "movsx") == 0)
+ opcode = "movs";
+ else if (strcasecmp (opcode, "pushfd") == 0)
+ *opcode_p = "pushf";
+ else if (strcasecmp (opcode, "popfd") == 0)
+ *opcode_p = "popf";
+
+ /* movzx isn't part of the AT&T syntax, they spell it movz. */
+ if (strcasecmp (opcode, "movzx") == 0)
+ {
+ /* Silly extention of the day, A zero extended move that has the
+ same before and after size is accepted and it just a normal
+ move. */
+ if (argnum == 2
+ && (e->mod[0] == e->mod[1]
+ || e->mod[1] == 0))
+ opcode = "mov";
+ else
+ opcode = "movz";
+ }
+
+ if (strncasecmp (opcode, "f", 1) == 0 &&
+ (!(strcasecmp (opcode, "fldcw") == 0)))
+ {
+ if (e->mod[0] == 'w')
+ e->mod[0] = 's';
+ if (e->mod[1] == 'w')
+ e->mod[1] = 's';
+ }
+ else if (strcasecmp (opcode, "mov") == 0)
+ {
+ /* The 32-bit integer instructions can be used on floats. */
+ if (e->mod[0] == 's')
+ e->mod[0] = 'l';
+ if (e->mod[1] == 's')
+ e->mod[1] = 'l';
+ }
+
+ if (e->pseudo)
+ e->mod[0] = e->mod[1] = 0;
+ else if (strcasecmp (opcode, "clflush") == 0
+ || strcasecmp (opcode, "fbld") == 0
+ || strcasecmp (opcode, "fbstp") == 0
+ || strcasecmp (opcode, "fldt") == 0
+ || strcasecmp (opcode, "fnstcw") == 0
+ || strcasecmp (opcode, "fnstsw") == 0
+ || strcasecmp (opcode, "fstcw") == 0
+ || strcasecmp (opcode, "fstsw") == 0
+ || strcasecmp (opcode, "fxrstor") == 0
+ || strcasecmp (opcode, "fxsave") == 0
+ || strcasecmp (opcode, "invlpg") == 0
+ || strcasecmp (opcode, "jmp") == 0
+ || strcasecmp (opcode, "call") == 0
+ || strcasecmp (opcode, "ja") == 0
+ || strcasecmp (opcode, "jae") == 0
+ || strcasecmp (opcode, "jb") == 0
+ || strcasecmp (opcode, "jbe") == 0
+ || strcasecmp (opcode, "jc") == 0
+ || strcasecmp (opcode, "je") == 0
+ || strcasecmp (opcode, "jg") == 0
+ || strcasecmp (opcode, "jge") == 0
+ || strcasecmp (opcode, "jl") == 0
+ || strcasecmp (opcode, "jle") == 0
+ || strcasecmp (opcode, "jna") == 0
+ || strcasecmp (opcode, "jnae") == 0
+ || strcasecmp (opcode, "jnb") == 0
+ || strcasecmp (opcode, "jnc") == 0
+ || strcasecmp (opcode, "jne") == 0
+ || strcasecmp (opcode, "jng") == 0
+ || strcasecmp (opcode, "jnge") == 0
+ || strcasecmp (opcode, "jnl") == 0
+ || strcasecmp (opcode, "jnle") == 0
+ || strcasecmp (opcode, "jno") == 0
+ || strcasecmp (opcode, "jnp") == 0
+ || strcasecmp (opcode, "jns") == 0
+ || strcasecmp (opcode, "jnz") == 0
+ || strcasecmp (opcode, "jo") == 0
+ || strcasecmp (opcode, "jp") == 0
+ || strcasecmp (opcode, "jpe") == 0
+ || strcasecmp (opcode, "jpo") == 0
+ || strcasecmp (opcode, "js") == 0
+ || strcasecmp (opcode, "jz") == 0
+ || strcasecmp (opcode, "ldmxcsr") == 0
+ || strcasecmp (opcode, "lgdt") == 0
+ || strcasecmp (opcode, "lidt") == 0
+ || strcasecmp (opcode, "lldt") == 0
+ || strcasecmp (opcode, "lmsw") == 0
+ || strcasecmp (opcode, "ltr") == 0
+ || strcasecmp (opcode, "movapd") == 0
+ || strcasecmp (opcode, "movaps") == 0
+ || strcasecmp (opcode, "movd") == 0
+ || strcasecmp (opcode, "movhpd") == 0
+ || strcasecmp (opcode, "movhps") == 0
+ || strcasecmp (opcode, "movlpd") == 0
+ || strcasecmp (opcode, "movlps") == 0
+ || strcasecmp (opcode, "movntdq") == 0
+ || strcasecmp (opcode, "movntpd") == 0
+ || strcasecmp (opcode, "movntps") == 0
+ || strcasecmp (opcode, "movntq") == 0
+ || strcasecmp (opcode, "movq") == 0
+ || strcasecmp (opcode, "movsd") == 0
+ || strcasecmp (opcode, "movss") == 0
+ || strcasecmp (opcode, "movupd") == 0
+ || strcasecmp (opcode, "movups") == 0
+ || strcasecmp (opcode, "out") == 0
+ || strcasecmp (opcode, "prefetchnta") == 0
+ || strcasecmp (opcode, "prefetcht0") == 0
+ || strcasecmp (opcode, "prefetcht1") == 0
+ || strcasecmp (opcode, "prefetcht2") == 0
+ || strcasecmp (opcode, "seta") == 0
+ || strcasecmp (opcode, "setae") == 0
+ || strcasecmp (opcode, "setb") == 0
+ || strcasecmp (opcode, "setbe") == 0
+ || strcasecmp (opcode, "setc") == 0
+ || strcasecmp (opcode, "sete") == 0
+ || strcasecmp (opcode, "setg") == 0
+ || strcasecmp (opcode, "setge") == 0
+ || strcasecmp (opcode, "setl") == 0
+ || strcasecmp (opcode, "setle") == 0
+ || strcasecmp (opcode, "setna") == 0
+ || strcasecmp (opcode, "setnae") == 0
+ || strcasecmp (opcode, "setnb") == 0
+ || strcasecmp (opcode, "setnbe") == 0
+ || strcasecmp (opcode, "setnc") == 0
+ || strcasecmp (opcode, "setne") == 0
+ || strcasecmp (opcode, "setng") == 0
+ || strcasecmp (opcode, "setnge") == 0
+ || strcasecmp (opcode, "setnl") == 0
+ || strcasecmp (opcode, "setnle") == 0
+ || strcasecmp (opcode, "setno") == 0
+ || strcasecmp (opcode, "setnp") == 0
+ || strcasecmp (opcode, "setns") == 0
+ || strcasecmp (opcode, "setnz") == 0
+ || strcasecmp (opcode, "seto") == 0
+ || strcasecmp (opcode, "setp") == 0
+ || strcasecmp (opcode, "setpe") == 0
+ || strcasecmp (opcode, "setpo") == 0
+ || strcasecmp (opcode, "sets") == 0
+ || strcasecmp (opcode, "setz") == 0
+ || strcasecmp (opcode, "sldt") == 0
+ || strcasecmp (opcode, "smsw") == 0
+ || strcasecmp (opcode, "stmxcsr") == 0
+ || strcasecmp (opcode, "str") == 0
+ || strcasecmp (opcode, "xlat") == 0)
+ e->mod[0] = 0;
+ else if (strcasecmp (opcode, "lea") == 0
+ || strcasecmp (opcode, "rcl") == 0
+ || strcasecmp (opcode, "rcr") == 0
+ || strcasecmp (opcode, "rol") == 0
+ || strcasecmp (opcode, "ror") == 0
+ || strcasecmp (opcode, "sal") == 0
+ || strcasecmp (opcode, "sar") == 0
+ || strcasecmp (opcode, "shl") == 0
+ || strcasecmp (opcode, "shr") == 0)
+ e->mod[1] = 0;
+
+ if ((argnum == 1 && e->mod[0])
+ || (argnum == 2 && e->mod[0]
+ && (e->mod[0] == e->mod[1]
+ || e->mod[1] == 0)))
+ {
+ sprintf (buf, "%s%c", opcode, e->mod[0]);
+ *opcode_p = buf;
+ }
+ else if (argnum == 2 && e->mod[0] && e->mod[1])
+ {
+ sprintf (buf, "%s%c%c", opcode, e->mod[1], e->mod[0]);
+ *opcode_p = buf;
+ }
+
+ return args;
+}
+
+/* Character used to seperate the prefix words. */
+/* See radr://4141844 for the enhancement to make this uniformly ' '. */
+#define IASM_PREFIX_SEP '/'
+
+void
+iasm_x86_print_prefix (char *buf, tree prefix_list)
+{
+ buf += strlen (buf);
+ while (prefix_list)
+ {
+ tree prefix = TREE_VALUE (prefix_list);
+ size_t len = IDENTIFIER_LENGTH (prefix);
+ memcpy (buf, IDENTIFIER_POINTER (prefix), len);
+ buf += len;
+ buf[0] = IASM_PREFIX_SEP;
+ ++buf;
+ buf[0] = 0;
+ prefix_list = TREE_CHAIN (prefix_list);
+ }
+}
+
+/* Warn when a variables address is used to form a memory address when
+ that address will use an extra register during reload. */
+
+static void
+iasm_warn_extra_reg (tree arg)
+{
+ if (TREE_CODE (arg) == ADDR_EXPR
+ && (TREE_CODE (TREE_OPERAND (arg, 0)) == VAR_DECL
+ || TREE_CODE (TREE_OPERAND (arg, 0)) == PARM_DECL))
+ warning (0, "addressing mode too complex, will consume an extra register");
+}
+
+bool
+iasm_print_op (char *buf, tree arg, unsigned argnum, tree *uses,
+ bool must_be_reg, bool must_not_be_reg, void *ep)
+{
+ iasm_md_extra_info *e = ep;
+ switch (TREE_CODE (arg))
+ {
+ case BRACKET_EXPR:
+ {
+ tree op1 = TREE_OPERAND (arg, 0);
+ tree op2 = TREE_OPERAND (arg, 1);
+ tree op0 = NULL_TREE, op3 = NULL_TREE;
+ tree scale = NULL_TREE;
+
+ if (op2 == NULL_TREE
+ && TREE_TYPE (op1)
+ && POINTER_TYPE_P (TREE_TYPE (op1)))
+ {
+ /* Let the normal operand printer output this without trying to
+ decompose it into parts so that things like (%esp + 20) + 4
+ can be output as 24(%esp) by the optimizer instead of 4(%0)
+ and burning an "R" with (%esp + 20). */
+ iasm_force_constraint ("m", e);
+ iasm_get_register_var (op1, "", buf, argnum, must_be_reg, e);
+ iasm_force_constraint (0, e);
+ break;
+ }
+
+ if (op2
+ && TREE_CODE (op2) == BRACKET_EXPR)
+ {
+ op3 = TREE_OPERAND (op2, 1);
+ op2 = TREE_OPERAND (op2, 0);
+ if (TREE_CODE (op2) == BRACKET_EXPR)
+ {
+ op0 = TREE_OPERAND (op2, 1);
+ op2 = TREE_OPERAND (op2, 0);
+ }
+ }
+ if (op0)
+ return false;
+
+ if (ASSEMBLER_DIALECT == ASM_INTEL)
+ strcat (buf, "[");
+
+ if (op3 == NULL_TREE
+ && op2 && TREE_CODE (op2) == PLUS_EXPR)
+ {
+ op3 = TREE_OPERAND (op2, 0);
+ op2 = TREE_OPERAND (op2, 1);
+ }
+ if (op2 && TREE_CODE (op2) == MULT_EXPR)
+ {
+ tree t;
+ t = op3;
+ op3 = op2;
+ op2 = t;
+ }
+
+ /* Crack out the scaling, if any. */
+ if (ASSEMBLER_DIALECT == ASM_ATT
+ && op3
+ && TREE_CODE (op3) == MULT_EXPR)
+ {
+ if (TREE_CODE (TREE_OPERAND (op3, 1)) == INTEGER_CST)
+ {
+ scale = TREE_OPERAND (op3, 1);
+ op3 = TREE_OPERAND (op3, 0);
+ }
+ else if (TREE_CODE (TREE_OPERAND (op3, 0)) == INTEGER_CST)
+ {
+ scale = TREE_OPERAND (op3, 0);
+ op3 = TREE_OPERAND (op3, 1);
+ }
+ }
+
+ /* Complicated expression as JMP or CALL target. */
+ if (e->modifier && strcmp(e->modifier, "A") == 0)
+ {
+ strcat (buf, "*");
+ e->modifier = 0;
+ }
+ e->as_immediate = true;
+ iasm_print_operand (buf, op1, argnum, uses,
+ must_be_reg, must_not_be_reg, e);
+ e->as_immediate = false;
+
+ /* Just an immediate. */
+ if (op2 == NULL_TREE && op3 == NULL_TREE)
+ break;
+
+ if (ASSEMBLER_DIALECT == ASM_INTEL)
+ strcat (buf, "]");
+ if (ASSEMBLER_DIALECT == ASM_INTEL)
+ strcat (buf, "[");
+ else
+ strcat (buf, "(");
+
+ if (op2)
+ {
+ /* We know by context, this has to be an R. */
+ iasm_force_constraint ("R", e);
+ iasm_warn_extra_reg (op2);
+ iasm_print_operand (buf, op2, argnum, uses,
+ must_be_reg, must_not_be_reg, e);
+ iasm_force_constraint (0, e);
+ }
+ if (op3)
+ {
+ if (ASSEMBLER_DIALECT == ASM_INTEL)
+ strcat (buf, "][");
+ else
+ strcat (buf, ",");
+
+ /* We know by context, this has to be an l. */
+ iasm_force_constraint ("l", e);
+ iasm_warn_extra_reg (op3);
+ iasm_print_operand (buf, op3, argnum, uses,
+ must_be_reg, must_not_be_reg, e);
+ iasm_force_constraint (0, e);
+ if (scale)
+ {
+ strcat (buf, ",");
+ e->as_immediate = true;
+ iasm_print_operand (buf, scale, argnum, uses,
+ must_be_reg, must_not_be_reg, e);
+ e->as_immediate = false;
+ }
+ }
+ if (ASSEMBLER_DIALECT == ASM_INTEL)
+ strcat (buf, "]");
+ else
+ strcat (buf, ")");
+ }
+ break;
+
+ case ADDR_EXPR:
+ if ((TREE_CODE (TREE_OPERAND (arg, 0)) == ARRAY_REF
+ || TREE_CODE (TREE_OPERAND (arg, 0)) == VAR_DECL)
+ && ! e->as_immediate)
+ {
+ iasm_get_register_var (arg, "", buf, argnum, must_be_reg, e);
+ break;
+ }
+ if (! e->as_immediate)
+ e->as_offset = true;
+ iasm_print_operand (buf, TREE_OPERAND (arg, 0), argnum, uses,
+ must_be_reg, must_not_be_reg, e);
+ e->as_offset = false;
+ break;
+
+ case MULT_EXPR:
+ iasm_print_operand (buf, TREE_OPERAND (arg, 0), argnum, uses,
+ must_be_reg, must_not_be_reg, e);
+ strcat (buf, "*");
+ iasm_print_operand (buf, TREE_OPERAND (arg, 1), argnum, uses,
+ must_be_reg, must_not_be_reg, e);
+ break;
+ default:
+ return false;
+ }
+ return true;
+}
+/* APPLE LOCAL end CW asm blocks */
+
+/* Return the mangling of TYPE if it is an extended fundamental type. */
+
+static const char *
+/* APPLE LOCAL mangle_type 7105099 */
+ix86_mangle_type (tree type)
+{
+ /* APPLE LOCAL begin mangle_type 7105099 */
+ type = TYPE_MAIN_VARIANT (type);
+
+ if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
+ && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
+ return NULL;
+
+ /* APPLE LOCAL end mangle_type 7105099 */
+ switch (TYPE_MODE (type))
+ {
+ case TFmode:
+ /* __float128 is "g". */
+ return "g";
+ case XFmode:
+ /* "long double" or __float80 is "e". */
+ return "e";
+ default:
+ return NULL;
+ }
+}
+
+/* For 32-bit code we can save PIC register setup by using
+ __stack_chk_fail_local hidden function instead of calling
+ __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
+ register, so it is better to call __stack_chk_fail directly. */
+
+static tree
+ix86_stack_protect_fail (void)
+{
+ return TARGET_64BIT
+ ? default_external_stack_protect_fail ()
+ : default_hidden_stack_protect_fail ();
+}
+
+/* Select a format to encode pointers in exception handling data. CODE
+ is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
+ true if the symbol may be affected by dynamic relocations.
+
+ ??? All x86 object file formats are capable of representing this.
+ After all, the relocation needed is the same as for the call insn.
+ Whether or not a particular assembler allows us to enter such, I
+ guess we'll have to see. */
+int
+asm_preferred_eh_data_format (int code, int global)
+{
+ if (flag_pic)
+ {
+ int type = DW_EH_PE_sdata8;
+ if (!TARGET_64BIT
+ || ix86_cmodel == CM_SMALL_PIC
+ || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
+ type = DW_EH_PE_sdata4;
+ return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
+ }
+ if (ix86_cmodel == CM_SMALL
+ || (ix86_cmodel == CM_MEDIUM && code))
+ return DW_EH_PE_udata4;
+ return DW_EH_PE_absptr;
+}
+
+#include "gt-i386.h"