aboutsummaryrefslogtreecommitdiffstats
path: root/gcc-4.2.1-5666.3/gcc/config/i386/i386.c
diff options
context:
space:
mode:
Diffstat (limited to 'gcc-4.2.1-5666.3/gcc/config/i386/i386.c')
-rw-r--r--gcc-4.2.1-5666.3/gcc/config/i386/i386.c23515
1 files changed, 0 insertions, 23515 deletions
diff --git a/gcc-4.2.1-5666.3/gcc/config/i386/i386.c b/gcc-4.2.1-5666.3/gcc/config/i386/i386.c
deleted file mode 100644
index 0e212967a..000000000
--- a/gcc-4.2.1-5666.3/gcc/config/i386/i386.c
+++ /dev/null
@@ -1,23515 +0,0 @@
-/* Subroutines used for code generation on IA-32.
- Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
- 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
-
-This file is part of GCC.
-
-GCC is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2, or (at your option)
-any later version.
-
-GCC is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with GCC; see the file COPYING. If not, write to
-the Free Software Foundation, 51 Franklin Street, Fifth Floor,
-Boston, MA 02110-1301, USA. */
-
-#include "config.h"
-#include "system.h"
-#include "coretypes.h"
-#include "tm.h"
-#include "rtl.h"
-#include "tree.h"
-#include "tm_p.h"
-#include "regs.h"
-#include "hard-reg-set.h"
-#include "real.h"
-#include "insn-config.h"
-#include "conditions.h"
-#include "output.h"
-#include "insn-codes.h"
-#include "insn-attr.h"
-#include "flags.h"
-#include "except.h"
-#include "function.h"
-#include "recog.h"
-#include "expr.h"
-#include "optabs.h"
-#include "toplev.h"
-#include "basic-block.h"
-#include "ggc.h"
-#include "target.h"
-#include "target-def.h"
-#include "langhooks.h"
-#include "cgraph.h"
-#include "tree-gimple.h"
-#include "dwarf2.h"
-#include "tm-constrs.h"
-
-/* APPLE LOCAL begin pascal strings */
-#include "../../libcpp/internal.h"
-extern struct cpp_reader* parse_in;
-/* APPLE LOCAL end pascal strings */
-/* APPLE LOCAL begin regparmandstackparm */
-#include "integrate.h"
-#include "tree-inline.h"
-#include "splay-tree.h"
-#include "tree-pass.h"
-#include "c-tree.h"
-#include "c-common.h"
-/* APPLE LOCAL end regparmandstackparm */
-/* APPLE LOCAL begin dwarf call/pop 5221468 */
-#include "debug.h"
-#include "dwarf2out.h"
-/* APPLE LOCAL end dwarf call/pop 5221468 */
-
-#ifndef CHECK_STACK_LIMIT
-#define CHECK_STACK_LIMIT (-1)
-#endif
-
-/* Return index of given mode in mult and division cost tables. */
-#define MODE_INDEX(mode) \
- ((mode) == QImode ? 0 \
- : (mode) == HImode ? 1 \
- : (mode) == SImode ? 2 \
- : (mode) == DImode ? 3 \
- : 4)
-
-/* Processor costs (relative to an add) */
-/* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
-#define COSTS_N_BYTES(N) ((N) * 2)
-
-static const
-struct processor_costs size_cost = { /* costs for tuning for size */
- COSTS_N_BYTES (2), /* cost of an add instruction */
- COSTS_N_BYTES (3), /* cost of a lea instruction */
- COSTS_N_BYTES (2), /* variable shift costs */
- COSTS_N_BYTES (3), /* constant shift costs */
- {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
- COSTS_N_BYTES (3), /* HI */
- COSTS_N_BYTES (3), /* SI */
- COSTS_N_BYTES (3), /* DI */
- COSTS_N_BYTES (5)}, /* other */
- 0, /* cost of multiply per each bit set */
- {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
- COSTS_N_BYTES (3), /* HI */
- COSTS_N_BYTES (3), /* SI */
- COSTS_N_BYTES (3), /* DI */
- COSTS_N_BYTES (5)}, /* other */
- COSTS_N_BYTES (3), /* cost of movsx */
- COSTS_N_BYTES (3), /* cost of movzx */
- 0, /* "large" insn */
- 2, /* MOVE_RATIO */
- 2, /* cost for loading QImode using movzbl */
- {2, 2, 2}, /* cost of loading integer registers
- in QImode, HImode and SImode.
- Relative to reg-reg move (2). */
- {2, 2, 2}, /* cost of storing integer registers */
- 2, /* cost of reg,reg fld/fst */
- {2, 2, 2}, /* cost of loading fp registers
- in SFmode, DFmode and XFmode */
- {2, 2, 2}, /* cost of storing fp registers
- in SFmode, DFmode and XFmode */
- 3, /* cost of moving MMX register */
- {3, 3}, /* cost of loading MMX registers
- in SImode and DImode */
- {3, 3}, /* cost of storing MMX registers
- in SImode and DImode */
- 3, /* cost of moving SSE register */
- {3, 3, 3}, /* cost of loading SSE registers
- in SImode, DImode and TImode */
- {3, 3, 3}, /* cost of storing SSE registers
- in SImode, DImode and TImode */
- 3, /* MMX or SSE register to integer */
- 0, /* size of prefetch block */
- 0, /* number of parallel prefetches */
- 2, /* Branch cost */
- COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
- COSTS_N_BYTES (2), /* cost of FMUL instruction. */
- COSTS_N_BYTES (2), /* cost of FDIV instruction. */
- COSTS_N_BYTES (2), /* cost of FABS instruction. */
- COSTS_N_BYTES (2), /* cost of FCHS instruction. */
- COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
-};
-
-/* Processor costs (relative to an add) */
-static const
-struct processor_costs i386_cost = { /* 386 specific costs */
- COSTS_N_INSNS (1), /* cost of an add instruction */
- COSTS_N_INSNS (1), /* cost of a lea instruction */
- COSTS_N_INSNS (3), /* variable shift costs */
- COSTS_N_INSNS (2), /* constant shift costs */
- {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
- COSTS_N_INSNS (6), /* HI */
- COSTS_N_INSNS (6), /* SI */
- COSTS_N_INSNS (6), /* DI */
- COSTS_N_INSNS (6)}, /* other */
- COSTS_N_INSNS (1), /* cost of multiply per each bit set */
- {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
- COSTS_N_INSNS (23), /* HI */
- COSTS_N_INSNS (23), /* SI */
- COSTS_N_INSNS (23), /* DI */
- COSTS_N_INSNS (23)}, /* other */
- COSTS_N_INSNS (3), /* cost of movsx */
- COSTS_N_INSNS (2), /* cost of movzx */
- 15, /* "large" insn */
- 3, /* MOVE_RATIO */
- 4, /* cost for loading QImode using movzbl */
- {2, 4, 2}, /* cost of loading integer registers
- in QImode, HImode and SImode.
- Relative to reg-reg move (2). */
- {2, 4, 2}, /* cost of storing integer registers */
- 2, /* cost of reg,reg fld/fst */
- {8, 8, 8}, /* cost of loading fp registers
- in SFmode, DFmode and XFmode */
- {8, 8, 8}, /* cost of storing fp registers
- in SFmode, DFmode and XFmode */
- 2, /* cost of moving MMX register */
- {4, 8}, /* cost of loading MMX registers
- in SImode and DImode */
- {4, 8}, /* cost of storing MMX registers
- in SImode and DImode */
- 2, /* cost of moving SSE register */
- {4, 8, 16}, /* cost of loading SSE registers
- in SImode, DImode and TImode */
- {4, 8, 16}, /* cost of storing SSE registers
- in SImode, DImode and TImode */
- 3, /* MMX or SSE register to integer */
- 0, /* size of prefetch block */
- 0, /* number of parallel prefetches */
- 1, /* Branch cost */
- COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
- COSTS_N_INSNS (27), /* cost of FMUL instruction. */
- COSTS_N_INSNS (88), /* cost of FDIV instruction. */
- COSTS_N_INSNS (22), /* cost of FABS instruction. */
- COSTS_N_INSNS (24), /* cost of FCHS instruction. */
- COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
-};
-
-static const
-struct processor_costs i486_cost = { /* 486 specific costs */
- COSTS_N_INSNS (1), /* cost of an add instruction */
- COSTS_N_INSNS (1), /* cost of a lea instruction */
- COSTS_N_INSNS (3), /* variable shift costs */
- COSTS_N_INSNS (2), /* constant shift costs */
- {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
- COSTS_N_INSNS (12), /* HI */
- COSTS_N_INSNS (12), /* SI */
- COSTS_N_INSNS (12), /* DI */
- COSTS_N_INSNS (12)}, /* other */
- 1, /* cost of multiply per each bit set */
- {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
- COSTS_N_INSNS (40), /* HI */
- COSTS_N_INSNS (40), /* SI */
- COSTS_N_INSNS (40), /* DI */
- COSTS_N_INSNS (40)}, /* other */
- COSTS_N_INSNS (3), /* cost of movsx */
- COSTS_N_INSNS (2), /* cost of movzx */
- 15, /* "large" insn */
- 3, /* MOVE_RATIO */
- 4, /* cost for loading QImode using movzbl */
- {2, 4, 2}, /* cost of loading integer registers
- in QImode, HImode and SImode.
- Relative to reg-reg move (2). */
- {2, 4, 2}, /* cost of storing integer registers */
- 2, /* cost of reg,reg fld/fst */
- {8, 8, 8}, /* cost of loading fp registers
- in SFmode, DFmode and XFmode */
- {8, 8, 8}, /* cost of storing fp registers
- in SFmode, DFmode and XFmode */
- 2, /* cost of moving MMX register */
- {4, 8}, /* cost of loading MMX registers
- in SImode and DImode */
- {4, 8}, /* cost of storing MMX registers
- in SImode and DImode */
- 2, /* cost of moving SSE register */
- {4, 8, 16}, /* cost of loading SSE registers
- in SImode, DImode and TImode */
- {4, 8, 16}, /* cost of storing SSE registers
- in SImode, DImode and TImode */
- 3, /* MMX or SSE register to integer */
- 0, /* size of prefetch block */
- 0, /* number of parallel prefetches */
- 1, /* Branch cost */
- COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
- COSTS_N_INSNS (16), /* cost of FMUL instruction. */
- COSTS_N_INSNS (73), /* cost of FDIV instruction. */
- COSTS_N_INSNS (3), /* cost of FABS instruction. */
- COSTS_N_INSNS (3), /* cost of FCHS instruction. */
- COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
-};
-
-static const
-struct processor_costs pentium_cost = {
- COSTS_N_INSNS (1), /* cost of an add instruction */
- COSTS_N_INSNS (1), /* cost of a lea instruction */
- COSTS_N_INSNS (4), /* variable shift costs */
- COSTS_N_INSNS (1), /* constant shift costs */
- {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
- COSTS_N_INSNS (11), /* HI */
- COSTS_N_INSNS (11), /* SI */
- COSTS_N_INSNS (11), /* DI */
- COSTS_N_INSNS (11)}, /* other */
- 0, /* cost of multiply per each bit set */
- {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
- COSTS_N_INSNS (25), /* HI */
- COSTS_N_INSNS (25), /* SI */
- COSTS_N_INSNS (25), /* DI */
- COSTS_N_INSNS (25)}, /* other */
- COSTS_N_INSNS (3), /* cost of movsx */
- COSTS_N_INSNS (2), /* cost of movzx */
- 8, /* "large" insn */
- 6, /* MOVE_RATIO */
- 6, /* cost for loading QImode using movzbl */
- {2, 4, 2}, /* cost of loading integer registers
- in QImode, HImode and SImode.
- Relative to reg-reg move (2). */
- {2, 4, 2}, /* cost of storing integer registers */
- 2, /* cost of reg,reg fld/fst */
- {2, 2, 6}, /* cost of loading fp registers
- in SFmode, DFmode and XFmode */
- {4, 4, 6}, /* cost of storing fp registers
- in SFmode, DFmode and XFmode */
- 8, /* cost of moving MMX register */
- {8, 8}, /* cost of loading MMX registers
- in SImode and DImode */
- {8, 8}, /* cost of storing MMX registers
- in SImode and DImode */
- 2, /* cost of moving SSE register */
- {4, 8, 16}, /* cost of loading SSE registers
- in SImode, DImode and TImode */
- {4, 8, 16}, /* cost of storing SSE registers
- in SImode, DImode and TImode */
- 3, /* MMX or SSE register to integer */
- 0, /* size of prefetch block */
- 0, /* number of parallel prefetches */
- 2, /* Branch cost */
- COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
- COSTS_N_INSNS (3), /* cost of FMUL instruction. */
- COSTS_N_INSNS (39), /* cost of FDIV instruction. */
- COSTS_N_INSNS (1), /* cost of FABS instruction. */
- COSTS_N_INSNS (1), /* cost of FCHS instruction. */
- COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
-};
-
-static const
-struct processor_costs pentiumpro_cost = {
- COSTS_N_INSNS (1), /* cost of an add instruction */
- COSTS_N_INSNS (1), /* cost of a lea instruction */
- COSTS_N_INSNS (1), /* variable shift costs */
- COSTS_N_INSNS (1), /* constant shift costs */
- {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
- COSTS_N_INSNS (4), /* HI */
- COSTS_N_INSNS (4), /* SI */
- COSTS_N_INSNS (4), /* DI */
- COSTS_N_INSNS (4)}, /* other */
- 0, /* cost of multiply per each bit set */
- {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
- COSTS_N_INSNS (17), /* HI */
- COSTS_N_INSNS (17), /* SI */
- COSTS_N_INSNS (17), /* DI */
- COSTS_N_INSNS (17)}, /* other */
- COSTS_N_INSNS (1), /* cost of movsx */
- COSTS_N_INSNS (1), /* cost of movzx */
- 8, /* "large" insn */
- 6, /* MOVE_RATIO */
- 2, /* cost for loading QImode using movzbl */
- {4, 4, 4}, /* cost of loading integer registers
- in QImode, HImode and SImode.
- Relative to reg-reg move (2). */
- {2, 2, 2}, /* cost of storing integer registers */
- 2, /* cost of reg,reg fld/fst */
- {2, 2, 6}, /* cost of loading fp registers
- in SFmode, DFmode and XFmode */
- {4, 4, 6}, /* cost of storing fp registers
- in SFmode, DFmode and XFmode */
- 2, /* cost of moving MMX register */
- {2, 2}, /* cost of loading MMX registers
- in SImode and DImode */
- {2, 2}, /* cost of storing MMX registers
- in SImode and DImode */
- 2, /* cost of moving SSE register */
- {2, 2, 8}, /* cost of loading SSE registers
- in SImode, DImode and TImode */
- {2, 2, 8}, /* cost of storing SSE registers
- in SImode, DImode and TImode */
- 3, /* MMX or SSE register to integer */
- 32, /* size of prefetch block */
- 6, /* number of parallel prefetches */
- 2, /* Branch cost */
- COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
- COSTS_N_INSNS (5), /* cost of FMUL instruction. */
- COSTS_N_INSNS (56), /* cost of FDIV instruction. */
- COSTS_N_INSNS (2), /* cost of FABS instruction. */
- COSTS_N_INSNS (2), /* cost of FCHS instruction. */
- COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
-};
-
-static const
-struct processor_costs k6_cost = {
- COSTS_N_INSNS (1), /* cost of an add instruction */
- COSTS_N_INSNS (2), /* cost of a lea instruction */
- COSTS_N_INSNS (1), /* variable shift costs */
- COSTS_N_INSNS (1), /* constant shift costs */
- {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
- COSTS_N_INSNS (3), /* HI */
- COSTS_N_INSNS (3), /* SI */
- COSTS_N_INSNS (3), /* DI */
- COSTS_N_INSNS (3)}, /* other */
- 0, /* cost of multiply per each bit set */
- {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
- COSTS_N_INSNS (18), /* HI */
- COSTS_N_INSNS (18), /* SI */
- COSTS_N_INSNS (18), /* DI */
- COSTS_N_INSNS (18)}, /* other */
- COSTS_N_INSNS (2), /* cost of movsx */
- COSTS_N_INSNS (2), /* cost of movzx */
- 8, /* "large" insn */
- 4, /* MOVE_RATIO */
- 3, /* cost for loading QImode using movzbl */
- {4, 5, 4}, /* cost of loading integer registers
- in QImode, HImode and SImode.
- Relative to reg-reg move (2). */
- {2, 3, 2}, /* cost of storing integer registers */
- 4, /* cost of reg,reg fld/fst */
- {6, 6, 6}, /* cost of loading fp registers
- in SFmode, DFmode and XFmode */
- {4, 4, 4}, /* cost of storing fp registers
- in SFmode, DFmode and XFmode */
- 2, /* cost of moving MMX register */
- {2, 2}, /* cost of loading MMX registers
- in SImode and DImode */
- {2, 2}, /* cost of storing MMX registers
- in SImode and DImode */
- 2, /* cost of moving SSE register */
- {2, 2, 8}, /* cost of loading SSE registers
- in SImode, DImode and TImode */
- {2, 2, 8}, /* cost of storing SSE registers
- in SImode, DImode and TImode */
- 6, /* MMX or SSE register to integer */
- 32, /* size of prefetch block */
- 1, /* number of parallel prefetches */
- 1, /* Branch cost */
- COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
- COSTS_N_INSNS (2), /* cost of FMUL instruction. */
- COSTS_N_INSNS (56), /* cost of FDIV instruction. */
- COSTS_N_INSNS (2), /* cost of FABS instruction. */
- COSTS_N_INSNS (2), /* cost of FCHS instruction. */
- COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
-};
-
-static const
-struct processor_costs athlon_cost = {
- COSTS_N_INSNS (1), /* cost of an add instruction */
- COSTS_N_INSNS (2), /* cost of a lea instruction */
- COSTS_N_INSNS (1), /* variable shift costs */
- COSTS_N_INSNS (1), /* constant shift costs */
- {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
- COSTS_N_INSNS (5), /* HI */
- COSTS_N_INSNS (5), /* SI */
- COSTS_N_INSNS (5), /* DI */
- COSTS_N_INSNS (5)}, /* other */
- 0, /* cost of multiply per each bit set */
- {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
- COSTS_N_INSNS (26), /* HI */
- COSTS_N_INSNS (42), /* SI */
- COSTS_N_INSNS (74), /* DI */
- COSTS_N_INSNS (74)}, /* other */
- COSTS_N_INSNS (1), /* cost of movsx */
- COSTS_N_INSNS (1), /* cost of movzx */
- 8, /* "large" insn */
- 9, /* MOVE_RATIO */
- 4, /* cost for loading QImode using movzbl */
- {3, 4, 3}, /* cost of loading integer registers
- in QImode, HImode and SImode.
- Relative to reg-reg move (2). */
- {3, 4, 3}, /* cost of storing integer registers */
- 4, /* cost of reg,reg fld/fst */
- {4, 4, 12}, /* cost of loading fp registers
- in SFmode, DFmode and XFmode */
- {6, 6, 8}, /* cost of storing fp registers
- in SFmode, DFmode and XFmode */
- 2, /* cost of moving MMX register */
- {4, 4}, /* cost of loading MMX registers
- in SImode and DImode */
- {4, 4}, /* cost of storing MMX registers
- in SImode and DImode */
- 2, /* cost of moving SSE register */
- {4, 4, 6}, /* cost of loading SSE registers
- in SImode, DImode and TImode */
- {4, 4, 5}, /* cost of storing SSE registers
- in SImode, DImode and TImode */
- 5, /* MMX or SSE register to integer */
- 64, /* size of prefetch block */
- 6, /* number of parallel prefetches */
- 5, /* Branch cost */
- COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
- COSTS_N_INSNS (4), /* cost of FMUL instruction. */
- COSTS_N_INSNS (24), /* cost of FDIV instruction. */
- COSTS_N_INSNS (2), /* cost of FABS instruction. */
- COSTS_N_INSNS (2), /* cost of FCHS instruction. */
- COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
-};
-
-static const
-struct processor_costs k8_cost = {
- COSTS_N_INSNS (1), /* cost of an add instruction */
- COSTS_N_INSNS (2), /* cost of a lea instruction */
- COSTS_N_INSNS (1), /* variable shift costs */
- COSTS_N_INSNS (1), /* constant shift costs */
- {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
- COSTS_N_INSNS (4), /* HI */
- COSTS_N_INSNS (3), /* SI */
- COSTS_N_INSNS (4), /* DI */
- COSTS_N_INSNS (5)}, /* other */
- 0, /* cost of multiply per each bit set */
- {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
- COSTS_N_INSNS (26), /* HI */
- COSTS_N_INSNS (42), /* SI */
- COSTS_N_INSNS (74), /* DI */
- COSTS_N_INSNS (74)}, /* other */
- COSTS_N_INSNS (1), /* cost of movsx */
- COSTS_N_INSNS (1), /* cost of movzx */
- 8, /* "large" insn */
- 9, /* MOVE_RATIO */
- 4, /* cost for loading QImode using movzbl */
- {3, 4, 3}, /* cost of loading integer registers
- in QImode, HImode and SImode.
- Relative to reg-reg move (2). */
- {3, 4, 3}, /* cost of storing integer registers */
- 4, /* cost of reg,reg fld/fst */
- {4, 4, 12}, /* cost of loading fp registers
- in SFmode, DFmode and XFmode */
- {6, 6, 8}, /* cost of storing fp registers
- in SFmode, DFmode and XFmode */
- 2, /* cost of moving MMX register */
- {3, 3}, /* cost of loading MMX registers
- in SImode and DImode */
- {4, 4}, /* cost of storing MMX registers
- in SImode and DImode */
- 2, /* cost of moving SSE register */
- {4, 3, 6}, /* cost of loading SSE registers
- in SImode, DImode and TImode */
- {4, 4, 5}, /* cost of storing SSE registers
- in SImode, DImode and TImode */
- 5, /* MMX or SSE register to integer */
- 64, /* size of prefetch block */
- 6, /* number of parallel prefetches */
- 5, /* Branch cost */
- COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
- COSTS_N_INSNS (4), /* cost of FMUL instruction. */
- COSTS_N_INSNS (19), /* cost of FDIV instruction. */
- COSTS_N_INSNS (2), /* cost of FABS instruction. */
- COSTS_N_INSNS (2), /* cost of FCHS instruction. */
- COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
-};
-
-static const
-struct processor_costs pentium4_cost = {
- COSTS_N_INSNS (1), /* cost of an add instruction */
- COSTS_N_INSNS (3), /* cost of a lea instruction */
- COSTS_N_INSNS (4), /* variable shift costs */
- COSTS_N_INSNS (4), /* constant shift costs */
- {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
- COSTS_N_INSNS (15), /* HI */
- COSTS_N_INSNS (15), /* SI */
- COSTS_N_INSNS (15), /* DI */
- COSTS_N_INSNS (15)}, /* other */
- 0, /* cost of multiply per each bit set */
- {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
- COSTS_N_INSNS (56), /* HI */
- COSTS_N_INSNS (56), /* SI */
- COSTS_N_INSNS (56), /* DI */
- COSTS_N_INSNS (56)}, /* other */
- COSTS_N_INSNS (1), /* cost of movsx */
- COSTS_N_INSNS (1), /* cost of movzx */
- 16, /* "large" insn */
- 6, /* MOVE_RATIO */
- 2, /* cost for loading QImode using movzbl */
- {4, 5, 4}, /* cost of loading integer registers
- in QImode, HImode and SImode.
- Relative to reg-reg move (2). */
- {2, 3, 2}, /* cost of storing integer registers */
- 2, /* cost of reg,reg fld/fst */
- {2, 2, 6}, /* cost of loading fp registers
- in SFmode, DFmode and XFmode */
- {4, 4, 6}, /* cost of storing fp registers
- in SFmode, DFmode and XFmode */
- 2, /* cost of moving MMX register */
- {2, 2}, /* cost of loading MMX registers
- in SImode and DImode */
- {2, 2}, /* cost of storing MMX registers
- in SImode and DImode */
- 12, /* cost of moving SSE register */
- {12, 12, 12}, /* cost of loading SSE registers
- in SImode, DImode and TImode */
- {2, 2, 8}, /* cost of storing SSE registers
- in SImode, DImode and TImode */
- 10, /* MMX or SSE register to integer */
- 64, /* size of prefetch block */
- 6, /* number of parallel prefetches */
- 2, /* Branch cost */
- COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
- COSTS_N_INSNS (7), /* cost of FMUL instruction. */
- COSTS_N_INSNS (43), /* cost of FDIV instruction. */
- COSTS_N_INSNS (2), /* cost of FABS instruction. */
- COSTS_N_INSNS (2), /* cost of FCHS instruction. */
- COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
-};
-
-static const
-struct processor_costs nocona_cost = {
- COSTS_N_INSNS (1), /* cost of an add instruction */
- COSTS_N_INSNS (1), /* cost of a lea instruction */
- COSTS_N_INSNS (1), /* variable shift costs */
- COSTS_N_INSNS (1), /* constant shift costs */
- {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
- COSTS_N_INSNS (10), /* HI */
- COSTS_N_INSNS (10), /* SI */
- COSTS_N_INSNS (10), /* DI */
- COSTS_N_INSNS (10)}, /* other */
- 0, /* cost of multiply per each bit set */
- {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
- COSTS_N_INSNS (66), /* HI */
- COSTS_N_INSNS (66), /* SI */
- COSTS_N_INSNS (66), /* DI */
- COSTS_N_INSNS (66)}, /* other */
- COSTS_N_INSNS (1), /* cost of movsx */
- COSTS_N_INSNS (1), /* cost of movzx */
- 16, /* "large" insn */
- 17, /* MOVE_RATIO */
- 4, /* cost for loading QImode using movzbl */
- {4, 4, 4}, /* cost of loading integer registers
- in QImode, HImode and SImode.
- Relative to reg-reg move (2). */
- {4, 4, 4}, /* cost of storing integer registers */
- 3, /* cost of reg,reg fld/fst */
- {12, 12, 12}, /* cost of loading fp registers
- in SFmode, DFmode and XFmode */
- {4, 4, 4}, /* cost of storing fp registers
- in SFmode, DFmode and XFmode */
- 6, /* cost of moving MMX register */
- {12, 12}, /* cost of loading MMX registers
- in SImode and DImode */
- {12, 12}, /* cost of storing MMX registers
- in SImode and DImode */
- 6, /* cost of moving SSE register */
- {12, 12, 12}, /* cost of loading SSE registers
- in SImode, DImode and TImode */
- {12, 12, 12}, /* cost of storing SSE registers
- in SImode, DImode and TImode */
- 8, /* MMX or SSE register to integer */
- 128, /* size of prefetch block */
- 8, /* number of parallel prefetches */
- 1, /* Branch cost */
- COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
- COSTS_N_INSNS (8), /* cost of FMUL instruction. */
- COSTS_N_INSNS (40), /* cost of FDIV instruction. */
- COSTS_N_INSNS (3), /* cost of FABS instruction. */
- COSTS_N_INSNS (3), /* cost of FCHS instruction. */
- COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
-};
-/* APPLE LOCAL begin mainline */
-static const
-struct processor_costs core2_cost = {
- COSTS_N_INSNS (1), /* cost of an add instruction */
- COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
- COSTS_N_INSNS (1), /* variable shift costs */
- COSTS_N_INSNS (1), /* constant shift costs */
- {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
- COSTS_N_INSNS (3), /* HI */
- COSTS_N_INSNS (3), /* SI */
- COSTS_N_INSNS (3), /* DI */
- COSTS_N_INSNS (3)}, /* other */
- 0, /* cost of multiply per each bit set */
- {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
- COSTS_N_INSNS (22), /* HI */
- COSTS_N_INSNS (22), /* SI */
- COSTS_N_INSNS (22), /* DI */
- COSTS_N_INSNS (22)}, /* other */
- COSTS_N_INSNS (1), /* cost of movsx */
- COSTS_N_INSNS (1), /* cost of movzx */
- 8, /* "large" insn */
- 16, /* MOVE_RATIO */
- 2, /* cost for loading QImode using movzbl */
- {6, 6, 6}, /* cost of loading integer registers
- in QImode, HImode and SImode.
- Relative to reg-reg move (2). */
- {4, 4, 4}, /* cost of storing integer registers */
- 2, /* cost of reg,reg fld/fst */
- {6, 6, 6}, /* cost of loading fp registers
- in SFmode, DFmode and XFmode */
- {4, 4, 4}, /* cost of loading integer registers */
- 2, /* cost of moving MMX register */
- {6, 6}, /* cost of loading MMX registers
- in SImode and DImode */
- {4, 4}, /* cost of storing MMX registers
- in SImode and DImode */
- 2, /* cost of moving SSE register */
- {6, 6, 6}, /* cost of loading SSE registers
- in SImode, DImode and TImode */
- {4, 4, 4}, /* cost of storing SSE registers
- in SImode, DImode and TImode */
- 2, /* MMX or SSE register to integer */
- 128, /* size of prefetch block */
- 8, /* number of parallel prefetches */
- 3, /* Branch cost */
- COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
- COSTS_N_INSNS (5), /* cost of FMUL instruction. */
- COSTS_N_INSNS (32), /* cost of FDIV instruction. */
- COSTS_N_INSNS (1), /* cost of FABS instruction. */
- COSTS_N_INSNS (1), /* cost of FCHS instruction. */
- COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
-};
-/* APPLE LOCAL end mainline */
-/* Generic64 should produce code tuned for Nocona and K8. */
-static const
-struct processor_costs generic64_cost = {
- COSTS_N_INSNS (1), /* cost of an add instruction */
- /* On all chips taken into consideration lea is 2 cycles and more. With
- this cost however our current implementation of synth_mult results in
- use of unnecessary temporary registers causing regression on several
- SPECfp benchmarks. */
- COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
- COSTS_N_INSNS (1), /* variable shift costs */
- COSTS_N_INSNS (1), /* constant shift costs */
- {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
- COSTS_N_INSNS (4), /* HI */
- COSTS_N_INSNS (3), /* SI */
- COSTS_N_INSNS (4), /* DI */
- COSTS_N_INSNS (2)}, /* other */
- 0, /* cost of multiply per each bit set */
- {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
- COSTS_N_INSNS (26), /* HI */
- COSTS_N_INSNS (42), /* SI */
- COSTS_N_INSNS (74), /* DI */
- COSTS_N_INSNS (74)}, /* other */
- COSTS_N_INSNS (1), /* cost of movsx */
- COSTS_N_INSNS (1), /* cost of movzx */
- 8, /* "large" insn */
- 17, /* MOVE_RATIO */
- 4, /* cost for loading QImode using movzbl */
- {4, 4, 4}, /* cost of loading integer registers
- in QImode, HImode and SImode.
- Relative to reg-reg move (2). */
- {4, 4, 4}, /* cost of storing integer registers */
- 4, /* cost of reg,reg fld/fst */
- {12, 12, 12}, /* cost of loading fp registers
- in SFmode, DFmode and XFmode */
- {6, 6, 8}, /* cost of storing fp registers
- in SFmode, DFmode and XFmode */
- 2, /* cost of moving MMX register */
- {8, 8}, /* cost of loading MMX registers
- in SImode and DImode */
- {8, 8}, /* cost of storing MMX registers
- in SImode and DImode */
- 2, /* cost of moving SSE register */
- {8, 8, 8}, /* cost of loading SSE registers
- in SImode, DImode and TImode */
- {8, 8, 8}, /* cost of storing SSE registers
- in SImode, DImode and TImode */
- 5, /* MMX or SSE register to integer */
- 64, /* size of prefetch block */
- 6, /* number of parallel prefetches */
- /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
- is increased to perhaps more appropriate value of 5. */
- 3, /* Branch cost */
- COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
- COSTS_N_INSNS (8), /* cost of FMUL instruction. */
- COSTS_N_INSNS (20), /* cost of FDIV instruction. */
- COSTS_N_INSNS (8), /* cost of FABS instruction. */
- COSTS_N_INSNS (8), /* cost of FCHS instruction. */
- COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
-};
-
-/* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
-static const
-struct processor_costs generic32_cost = {
- COSTS_N_INSNS (1), /* cost of an add instruction */
- COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
- COSTS_N_INSNS (1), /* variable shift costs */
- COSTS_N_INSNS (1), /* constant shift costs */
- {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
- COSTS_N_INSNS (4), /* HI */
- COSTS_N_INSNS (3), /* SI */
- COSTS_N_INSNS (4), /* DI */
- COSTS_N_INSNS (2)}, /* other */
- 0, /* cost of multiply per each bit set */
- {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
- COSTS_N_INSNS (26), /* HI */
- COSTS_N_INSNS (42), /* SI */
- COSTS_N_INSNS (74), /* DI */
- COSTS_N_INSNS (74)}, /* other */
- COSTS_N_INSNS (1), /* cost of movsx */
- COSTS_N_INSNS (1), /* cost of movzx */
- 8, /* "large" insn */
- 17, /* MOVE_RATIO */
- 4, /* cost for loading QImode using movzbl */
- {4, 4, 4}, /* cost of loading integer registers
- in QImode, HImode and SImode.
- Relative to reg-reg move (2). */
- {4, 4, 4}, /* cost of storing integer registers */
- 4, /* cost of reg,reg fld/fst */
- {12, 12, 12}, /* cost of loading fp registers
- in SFmode, DFmode and XFmode */
- {6, 6, 8}, /* cost of storing fp registers
- in SFmode, DFmode and XFmode */
- 2, /* cost of moving MMX register */
- {8, 8}, /* cost of loading MMX registers
- in SImode and DImode */
- {8, 8}, /* cost of storing MMX registers
- in SImode and DImode */
- 2, /* cost of moving SSE register */
- {8, 8, 8}, /* cost of loading SSE registers
- in SImode, DImode and TImode */
- {8, 8, 8}, /* cost of storing SSE registers
- in SImode, DImode and TImode */
- 5, /* MMX or SSE register to integer */
- 64, /* size of prefetch block */
- 6, /* number of parallel prefetches */
- 3, /* Branch cost */
- COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
- COSTS_N_INSNS (8), /* cost of FMUL instruction. */
- COSTS_N_INSNS (20), /* cost of FDIV instruction. */
- COSTS_N_INSNS (8), /* cost of FABS instruction. */
- COSTS_N_INSNS (8), /* cost of FCHS instruction. */
- COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
-};
-
-const struct processor_costs *ix86_cost = &pentium_cost;
-
-/* Processor feature/optimization bitmasks. */
-#define m_386 (1<<PROCESSOR_I386)
-#define m_486 (1<<PROCESSOR_I486)
-#define m_PENT (1<<PROCESSOR_PENTIUM)
-#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
-#define m_K6 (1<<PROCESSOR_K6)
-#define m_ATHLON (1<<PROCESSOR_ATHLON)
-#define m_PENT4 (1<<PROCESSOR_PENTIUM4)
-#define m_K8 (1<<PROCESSOR_K8)
-#define m_ATHLON_K8 (m_K8 | m_ATHLON)
-#define m_NOCONA (1<<PROCESSOR_NOCONA)
-/* APPLE LOCAL mainline */
-#define m_CORE2 (1<<PROCESSOR_CORE2)
-#define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
-#define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
-#define m_GENERIC (m_GENERIC32 | m_GENERIC64)
-
-/* Generic instruction choice should be common subset of supported CPUs
- (PPro/PENT4/NOCONA/Athlon/K8). */
-
-/* Leave is not affecting Nocona SPEC2000 results negatively, so enabling for
- Generic64 seems like good code size tradeoff. We can't enable it for 32bit
- generic because it is not working well with PPro base chips. */
-/* APPLE LOCAL begin mainline */
-const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8 | m_CORE2 | m_GENERIC64;
-const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
-const int x86_zero_extend_with_and = m_486 | m_PENT;
-const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC /* m_386 | m_K6 */;
-const int x86_double_with_add = ~m_386;
-const int x86_use_bit_test = m_386;
-const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6 | m_CORE2 | m_GENERIC;
-const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
-const int x86_3dnow_a = m_ATHLON_K8;
-/* APPLE LOCAL end mainline */
-const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
-/* Branch hints were put in P4 based on simulation result. But
- after P4 was made, no performance benefit was observed with
- branch hints. It also increases the code size. As the result,
- icc never generates branch hints. */
-const int x86_branch_hints = 0;
-const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4 | m_NOCONA | m_GENERIC32; /*m_GENERIC | m_ATHLON_K8 ? */
-/* We probably ought to watch for partial register stalls on Generic32
- compilation setting as well. However in current implementation the
- partial register stalls are not eliminated very well - they can
- be introduced via subregs synthesized by combine and can happen
- in caller/callee saving sequences.
- Because this option pays back little on PPro based chips and is in conflict
- with partial reg. dependencies used by Athlon/P4 based chips, it is better
- to leave it off for generic32 for now. */
-const int x86_partial_reg_stall = m_PPRO;
-/* APPLE LOCAL begin mainline */
-const int x86_partial_flag_reg_stall = m_CORE2 | m_GENERIC;
-const int x86_use_himode_fiop = m_386 | m_486 | m_K6;
-const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT | m_CORE2 | m_GENERIC);
-const int x86_use_mov0 = m_K6;
-const int x86_use_cltd = ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC);
-const int x86_read_modify_write = ~m_PENT;
-const int x86_read_modify = ~(m_PENT | m_PPRO);
-const int x86_split_long_moves = m_PPRO;
-const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8 | m_CORE2 | m_GENERIC; /* m_PENT4 ? */
-const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
-const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
-const int x86_qimode_math = ~(0);
-const int x86_promote_qi_regs = 0;
-/* On PPro this flag is meant to avoid partial register stalls. Just like
- the x86_partial_reg_stall this option might be considered for Generic32
- if our scheme for avoiding partial stalls was more effective. */
-const int x86_himode_math = ~(m_PPRO);
-const int x86_promote_hi_regs = m_PPRO;
-const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 |m_GENERIC;
-const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
-const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
-const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
-const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC);
-const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
-const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
-const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
-const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC;
-const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC;
-const int x86_shift1 = ~m_486;
-const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
-/* In Generic model we have an conflict here in between PPro/Pentium4 based chips
- that thread 128bit SSE registers as single units versus K8 based chips that
- divide SSE registers to two 64bit halves.
- x86_sse_partial_reg_dependency promote all store destinations to be 128bit
- to allow register renaming on 128bit SSE units, but usually results in one
- extra microop on 64bit SSE units. Experimental results shows that disabling
- this option on P4 brings over 20% SPECfp regression, while enabling it on
- K8 brings roughly 2.4% regression that can be partly masked by careful scheduling
- of moves. */
-const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
-/* Set for machines where the type and dependencies are resolved on SSE
- register parts instead of whole registers, so we may maintain just
- lower part of scalar values in proper format leaving the upper part
- undefined. */
-const int x86_sse_split_regs = m_ATHLON_K8;
-const int x86_sse_typeless_stores = m_ATHLON_K8;
-const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
-const int x86_use_ffreep = m_ATHLON_K8;
-const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
-const int x86_use_incdec = ~(m_PENT4 | m_NOCONA | m_GENERIC);
-
-/* ??? Allowing interunit moves makes it all too easy for the compiler to put
- integer data in xmm registers. Which results in pretty abysmal code. */
-/* APPLE LOCAL 5612787 mainline sse4 */
-const int x86_inter_unit_moves = ~(m_ATHLON_K8);
-
-const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_NOCONA | m_CORE2 | m_PPRO | m_GENERIC32;
-/* Some CPU cores are not able to predict more than 4 branch instructions in
- the 16 byte window. */
-const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
-const int x86_schedule = m_PPRO | m_ATHLON_K8 | m_K6 | m_PENT | m_CORE2 | m_GENERIC;
-const int x86_use_bt = m_ATHLON_K8;
-/* APPLE LOCAL begin */
-/* See comment in darwin override options for what needs fixing.
- Most of this code has been rewritten in mainline anyhow.
- All we've done here is remove the const since we assign to
- them in SUBTARGET_OVERRIDE_OPTIONS. */
-/* Compare and exchange was added for 80486. */
-int x86_cmpxchg = ~m_386;
-/* Compare and exchange 8 bytes was added for pentium. */
-int x86_cmpxchg8b = ~(m_386 | m_486);
-/* Compare and exchange 16 bytes was added for nocona. */
-/* APPLE LOCAL mainline */
-int x86_cmpxchg16b = m_NOCONA | m_CORE2;
-/* Exchange and add was added for 80486. */
-int x86_xadd = ~m_386;
-/* APPLE LOCAL begin mainline bswap */
-/* Byteswap was added for 80486. */
-int x86_bswap = ~m_386;
-/* APPLE LOCAL end mainline bswap */
-/* APPLE LOCAL end */
-const int x86_pad_returns = m_ATHLON_K8 | m_CORE2 | m_GENERIC;
-/* APPLE LOCAL end mainline */
-
-/* In case the average insn count for single function invocation is
- lower than this constant, emit fast (but longer) prologue and
- epilogue code. */
-#define FAST_PROLOGUE_INSN_COUNT 20
-
-/* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
-static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
-static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
-static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
-
-/* Array of the smallest class containing reg number REGNO, indexed by
- REGNO. Used by REGNO_REG_CLASS in i386.h. */
-
-enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
-{
- /* ax, dx, cx, bx */
- AREG, DREG, CREG, BREG,
- /* si, di, bp, sp */
- SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
- /* FP registers */
- FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
- FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
- /* arg pointer */
- NON_Q_REGS,
- /* flags, fpsr, dirflag, frame */
- NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
- SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
- SSE_REGS, SSE_REGS,
- MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
- MMX_REGS, MMX_REGS,
- NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
- NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
- SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
- SSE_REGS, SSE_REGS,
-};
-
-/* The "default" register map used in 32bit mode. */
-
-int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
-{
- 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
- 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
- -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
- 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
- 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
- -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
- -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
-};
-
-static int const x86_64_int_parameter_registers[6] =
-{
- 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
- FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
-};
-
-static int const x86_64_int_return_registers[4] =
-{
- 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
-};
-
-/* The "default" register map used in 64bit mode. */
-int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
-{
- 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
- 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
- -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
- 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
- 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
- 8,9,10,11,12,13,14,15, /* extended integer registers */
- 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
-};
-
-/* Define the register numbers to be used in Dwarf debugging information.
- The SVR4 reference port C compiler uses the following register numbers
- in its Dwarf output code:
- 0 for %eax (gcc regno = 0)
- 1 for %ecx (gcc regno = 2)
- 2 for %edx (gcc regno = 1)
- 3 for %ebx (gcc regno = 3)
- 4 for %esp (gcc regno = 7)
- 5 for %ebp (gcc regno = 6)
- 6 for %esi (gcc regno = 4)
- 7 for %edi (gcc regno = 5)
- The following three DWARF register numbers are never generated by
- the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
- believes these numbers have these meanings.
- 8 for %eip (no gcc equivalent)
- 9 for %eflags (gcc regno = 17)
- 10 for %trapno (no gcc equivalent)
- It is not at all clear how we should number the FP stack registers
- for the x86 architecture. If the version of SDB on x86/svr4 were
- a bit less brain dead with respect to floating-point then we would
- have a precedent to follow with respect to DWARF register numbers
- for x86 FP registers, but the SDB on x86/svr4 is so completely
- broken with respect to FP registers that it is hardly worth thinking
- of it as something to strive for compatibility with.
- The version of x86/svr4 SDB I have at the moment does (partially)
- seem to believe that DWARF register number 11 is associated with
- the x86 register %st(0), but that's about all. Higher DWARF
- register numbers don't seem to be associated with anything in
- particular, and even for DWARF regno 11, SDB only seems to under-
- stand that it should say that a variable lives in %st(0) (when
- asked via an `=' command) if we said it was in DWARF regno 11,
- but SDB still prints garbage when asked for the value of the
- variable in question (via a `/' command).
- (Also note that the labels SDB prints for various FP stack regs
- when doing an `x' command are all wrong.)
- Note that these problems generally don't affect the native SVR4
- C compiler because it doesn't allow the use of -O with -g and
- because when it is *not* optimizing, it allocates a memory
- location for each floating-point variable, and the memory
- location is what gets described in the DWARF AT_location
- attribute for the variable in question.
- Regardless of the severe mental illness of the x86/svr4 SDB, we
- do something sensible here and we use the following DWARF
- register numbers. Note that these are all stack-top-relative
- numbers.
- 11 for %st(0) (gcc regno = 8)
- 12 for %st(1) (gcc regno = 9)
- 13 for %st(2) (gcc regno = 10)
- 14 for %st(3) (gcc regno = 11)
- 15 for %st(4) (gcc regno = 12)
- 16 for %st(5) (gcc regno = 13)
- 17 for %st(6) (gcc regno = 14)
- 18 for %st(7) (gcc regno = 15)
-*/
-int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
-{
- 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
- 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
- -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
- 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
- 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
- -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
- -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
-};
-
-/* Test and compare insns in i386.md store the information needed to
- generate branch and scc insns here. */
-
-rtx ix86_compare_op0 = NULL_RTX;
-rtx ix86_compare_op1 = NULL_RTX;
-rtx ix86_compare_emitted = NULL_RTX;
-
-/* Size of the register save area. */
-#define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
-
-/* Define the structure for the machine field in struct function. */
-
-struct stack_local_entry GTY(())
-{
- unsigned short mode;
- unsigned short n;
- rtx rtl;
- struct stack_local_entry *next;
-};
-
-/* Structure describing stack frame layout.
- Stack grows downward:
-
- [arguments]
- <- ARG_POINTER
- saved pc
-
- saved frame pointer if frame_pointer_needed
- <- HARD_FRAME_POINTER
- [saved regs]
-
- [padding1] \
- )
- [va_arg registers] (
- > to_allocate <- FRAME_POINTER
- [frame] (
- )
- [padding2] /
- */
-struct ix86_frame
-{
- int nregs;
- int padding1;
- int va_arg_size;
- HOST_WIDE_INT frame;
- int padding2;
- int outgoing_arguments_size;
- int red_zone_size;
-
- HOST_WIDE_INT to_allocate;
- /* The offsets relative to ARG_POINTER. */
- HOST_WIDE_INT frame_pointer_offset;
- HOST_WIDE_INT hard_frame_pointer_offset;
- HOST_WIDE_INT stack_pointer_offset;
-
- /* When save_regs_using_mov is set, emit prologue using
- move instead of push instructions. */
- bool save_regs_using_mov;
-};
-
-/* Code model option. */
-enum cmodel ix86_cmodel;
-/* Asm dialect. */
-enum asm_dialect ix86_asm_dialect = ASM_ATT;
-/* TLS dialects. */
-enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
-
-/* Which unit we are generating floating point math for. */
-enum fpmath_unit ix86_fpmath;
-
-/* Which cpu are we scheduling for. */
-enum processor_type ix86_tune;
-/* Which instruction set architecture to use. */
-enum processor_type ix86_arch;
-
-/* true if sse prefetch instruction is not NOOP. */
-int x86_prefetch_sse;
-
-/* ix86_regparm_string as a number */
-static int ix86_regparm;
-
-/* APPLE LOCAL begin 5612787 mainline sse4 */
-/* True if SSE population count insn supported. */
-int x86_popcnt;
-/* APPLE LOCAL end 5612787 mainline sse4 */
-
-/* -mstackrealign option */
-extern int ix86_force_align_arg_pointer;
-static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
-
-/* Preferred alignment for stack boundary in bits. */
-unsigned int ix86_preferred_stack_boundary;
-/* APPLE LOCAL begin radar 4216496, 4229407, 4120689, 4095567 */
-unsigned int ix86_save_preferred_stack_boundary;
-/* APPLE LOCAL end radar 4216496, 4229407, 4120689, 4095567 */
-
-/* Values 1-5: see jump.c */
-int ix86_branch_cost;
-
-/* Variables which are this size or smaller are put in the data/bss
- or ldata/lbss sections. */
-
-int ix86_section_threshold = 65536;
-
-/* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
-char internal_label_prefix[16];
-int internal_label_prefix_len;
-
-static bool ix86_handle_option (size_t, const char *, int);
-static void output_pic_addr_const (FILE *, rtx, int);
-static void put_condition_code (enum rtx_code, enum machine_mode,
- int, int, FILE *);
-static const char *get_some_local_dynamic_name (void);
-static int get_some_local_dynamic_name_1 (rtx *, void *);
-static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
-static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
- rtx *);
-static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
-static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
- enum machine_mode);
-static rtx get_thread_pointer (int);
-static rtx legitimize_tls_address (rtx, enum tls_model, int);
-static void get_pc_thunk_name (char [32], unsigned int);
-static rtx gen_push (rtx);
-static int ix86_flags_dependent (rtx, rtx, enum attr_type);
-static int ix86_agi_dependent (rtx, rtx, enum attr_type);
-static struct machine_function * ix86_init_machine_status (void);
-static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
-static int ix86_nsaved_regs (void);
-static void ix86_emit_save_regs (void);
-static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
-static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
-static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
-static HOST_WIDE_INT ix86_GOT_alias_set (void);
-static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
-static rtx ix86_expand_aligntest (rtx, int);
-static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
-static int ix86_issue_rate (void);
-static int ix86_adjust_cost (rtx, rtx, rtx, int);
-static int ia32_multipass_dfa_lookahead (void);
-static void ix86_init_mmx_sse_builtins (void);
-static rtx x86_this_parameter (tree);
-static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
- HOST_WIDE_INT, tree);
-static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
-static void x86_file_start (void);
-static void ix86_reorg (void);
-static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
-static tree ix86_build_builtin_va_list (void);
-static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
- tree, int *, int);
-static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
-static bool ix86_scalar_mode_supported_p (enum machine_mode);
-static bool ix86_vector_mode_supported_p (enum machine_mode);
-
-static int ix86_address_cost (rtx);
-static bool ix86_cannot_force_const_mem (rtx);
-static rtx ix86_delegitimize_address (rtx);
-
-static void i386_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
-
-struct builtin_description;
-static rtx ix86_expand_sse_comi (const struct builtin_description *,
- tree, rtx);
-static rtx ix86_expand_sse_compare (const struct builtin_description *,
- tree, rtx);
-static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
-static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
-static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
-static rtx ix86_expand_store_builtin (enum insn_code, tree);
-static rtx safe_vector_operand (rtx, enum machine_mode);
-static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
-static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
-static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
-static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
-static int ix86_fp_comparison_cost (enum rtx_code code);
-static unsigned int ix86_select_alt_pic_regnum (void);
-static int ix86_save_reg (unsigned int, int);
-static void ix86_compute_frame_layout (struct ix86_frame *);
-static int ix86_comp_type_attributes (tree, tree);
-static int ix86_function_regparm (tree, tree);
-const struct attribute_spec ix86_attribute_table[];
-static bool ix86_function_ok_for_sibcall (tree, tree);
-static tree ix86_handle_cconv_attribute (tree *, tree, tree, int, bool *);
-static int ix86_value_regno (enum machine_mode, tree, tree);
-static bool contains_128bit_aligned_vector_p (tree);
-static rtx ix86_struct_value_rtx (tree, int);
-static bool ix86_ms_bitfield_layout_p (tree);
-static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
-static int extended_reg_mentioned_1 (rtx *, void *);
-static bool ix86_rtx_costs (rtx, int, int, int *);
-static int min_insn_size (rtx);
-static tree ix86_md_asm_clobbers (tree outputs, tree inputs, tree clobbers);
-static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
-static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
- tree, bool);
-static void ix86_init_builtins (void);
-static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
-/* APPLE LOCAL mangle_type 7105099 */
-static const char *ix86_mangle_type (tree);
-static tree ix86_stack_protect_fail (void);
-static rtx ix86_internal_arg_pointer (void);
-static void ix86_dwarf_handle_frame_unspec (const char *, rtx, int);
-
-/* This function is only used on Solaris. */
-static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
- ATTRIBUTE_UNUSED;
-
-/* Register class used for passing given 64bit part of the argument.
- These represent classes as documented by the PS ABI, with the exception
- of SSESF, SSEDF classes, that are basically SSE class, just gcc will
- use SF or DFmode move instead of DImode to avoid reformatting penalties.
-
- Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
- whenever possible (upper half does contain padding).
- */
-enum x86_64_reg_class
- {
- X86_64_NO_CLASS,
- X86_64_INTEGER_CLASS,
- X86_64_INTEGERSI_CLASS,
- X86_64_SSE_CLASS,
- X86_64_SSESF_CLASS,
- X86_64_SSEDF_CLASS,
- X86_64_SSEUP_CLASS,
- X86_64_X87_CLASS,
- X86_64_X87UP_CLASS,
- X86_64_COMPLEX_X87_CLASS,
- X86_64_MEMORY_CLASS
- };
-static const char * const x86_64_reg_class_name[] = {
- "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
- "sseup", "x87", "x87up", "cplx87", "no"
-};
-
-#define MAX_CLASSES 4
-
-/* Table of constants used by fldpi, fldln2, etc.... */
-static REAL_VALUE_TYPE ext_80387_constants_table [5];
-static bool ext_80387_constants_init = 0;
-static void init_ext_80387_constants (void);
-static bool ix86_in_large_data_p (tree) ATTRIBUTE_UNUSED;
-static void ix86_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
-static void x86_64_elf_unique_section (tree decl, int reloc) ATTRIBUTE_UNUSED;
-static section *x86_64_elf_select_section (tree decl, int reloc,
- unsigned HOST_WIDE_INT align)
- ATTRIBUTE_UNUSED;
-
-/* Initialize the GCC target structure. */
-#undef TARGET_ATTRIBUTE_TABLE
-#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
-#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
-# undef TARGET_MERGE_DECL_ATTRIBUTES
-# define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
-#endif
-
-#undef TARGET_COMP_TYPE_ATTRIBUTES
-#define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
-
-#undef TARGET_INIT_BUILTINS
-#define TARGET_INIT_BUILTINS ix86_init_builtins
-#undef TARGET_EXPAND_BUILTIN
-#define TARGET_EXPAND_BUILTIN ix86_expand_builtin
-
-#undef TARGET_ASM_FUNCTION_EPILOGUE
-#define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
-
-#undef TARGET_ENCODE_SECTION_INFO
-#ifndef SUBTARGET_ENCODE_SECTION_INFO
-#define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
-#else
-#define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
-#endif
-
-#undef TARGET_ASM_OPEN_PAREN
-#define TARGET_ASM_OPEN_PAREN ""
-#undef TARGET_ASM_CLOSE_PAREN
-#define TARGET_ASM_CLOSE_PAREN ""
-
-#undef TARGET_ASM_ALIGNED_HI_OP
-#define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
-#undef TARGET_ASM_ALIGNED_SI_OP
-#define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
-#ifdef ASM_QUAD
-#undef TARGET_ASM_ALIGNED_DI_OP
-#define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
-#endif
-
-#undef TARGET_ASM_UNALIGNED_HI_OP
-#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
-#undef TARGET_ASM_UNALIGNED_SI_OP
-#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
-#undef TARGET_ASM_UNALIGNED_DI_OP
-#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
-
-#undef TARGET_SCHED_ADJUST_COST
-#define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
-#undef TARGET_SCHED_ISSUE_RATE
-#define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
-#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
-#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
- ia32_multipass_dfa_lookahead
-
-#undef TARGET_FUNCTION_OK_FOR_SIBCALL
-#define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
-
-#ifdef HAVE_AS_TLS
-#undef TARGET_HAVE_TLS
-#define TARGET_HAVE_TLS true
-#endif
-#undef TARGET_CANNOT_FORCE_CONST_MEM
-#define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
-#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
-#define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
-
-#undef TARGET_DELEGITIMIZE_ADDRESS
-#define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
-
-#undef TARGET_MS_BITFIELD_LAYOUT_P
-#define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
-
-#if TARGET_MACHO
-#undef TARGET_BINDS_LOCAL_P
-#define TARGET_BINDS_LOCAL_P darwin_binds_local_p
-#endif
-
-#undef TARGET_ASM_OUTPUT_MI_THUNK
-#define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
-#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
-#define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
-
-#undef TARGET_ASM_FILE_START
-#define TARGET_ASM_FILE_START x86_file_start
-
-#undef TARGET_DEFAULT_TARGET_FLAGS
-#define TARGET_DEFAULT_TARGET_FLAGS \
- (TARGET_DEFAULT \
- | TARGET_64BIT_DEFAULT \
- | TARGET_SUBTARGET_DEFAULT \
- | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
-
-#undef TARGET_HANDLE_OPTION
-#define TARGET_HANDLE_OPTION ix86_handle_option
-
-#undef TARGET_RTX_COSTS
-#define TARGET_RTX_COSTS ix86_rtx_costs
-#undef TARGET_ADDRESS_COST
-#define TARGET_ADDRESS_COST ix86_address_cost
-
-#undef TARGET_FIXED_CONDITION_CODE_REGS
-#define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
-#undef TARGET_CC_MODES_COMPATIBLE
-#define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
-
-#undef TARGET_MACHINE_DEPENDENT_REORG
-#define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
-
-#undef TARGET_BUILD_BUILTIN_VA_LIST
-#define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
-
-#undef TARGET_MD_ASM_CLOBBERS
-#define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
-
-#undef TARGET_PROMOTE_PROTOTYPES
-#define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
-#undef TARGET_STRUCT_VALUE_RTX
-#define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
-#undef TARGET_SETUP_INCOMING_VARARGS
-#define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
-#undef TARGET_MUST_PASS_IN_STACK
-#define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
-#undef TARGET_PASS_BY_REFERENCE
-#define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
-#undef TARGET_INTERNAL_ARG_POINTER
-#define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
-#undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
-#define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
-
-#undef TARGET_GIMPLIFY_VA_ARG_EXPR
-#define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
-
-#undef TARGET_SCALAR_MODE_SUPPORTED_P
-#define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
-
-#undef TARGET_VECTOR_MODE_SUPPORTED_P
-#define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
-
-#ifdef HAVE_AS_TLS
-#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
-#define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
-#endif
-
-#ifdef SUBTARGET_INSERT_ATTRIBUTES
-#undef TARGET_INSERT_ATTRIBUTES
-#define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
-#endif
-
-/* APPLE LOCAL begin mangle_type 7105099 */
-#undef TARGET_MANGLE_TYPE
-#define TARGET_MANGLE_TYPE ix86_mangle_type
-/* APPLE LOCAL end mangle_type 7105099 */
-
-#undef TARGET_STACK_PROTECT_FAIL
-#define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
-
-#undef TARGET_FUNCTION_VALUE
-#define TARGET_FUNCTION_VALUE ix86_function_value
-
-struct gcc_target targetm = TARGET_INITIALIZER;
-
-
-/* The svr4 ABI for the i386 says that records and unions are returned
- in memory. */
-#ifndef DEFAULT_PCC_STRUCT_RETURN
-#define DEFAULT_PCC_STRUCT_RETURN 1
-#endif
-
-/* Implement TARGET_HANDLE_OPTION. */
-
-static bool
-ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
-{
- switch (code)
- {
- case OPT_m3dnow:
- if (!value)
- {
- target_flags &= ~MASK_3DNOW_A;
- target_flags_explicit |= MASK_3DNOW_A;
- }
- return true;
-
- case OPT_mmmx:
- if (!value)
- {
- target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A);
- target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A;
- }
- return true;
-
- case OPT_msse:
- if (!value)
- {
- target_flags &= ~(MASK_SSE2 | MASK_SSE3);
- target_flags_explicit |= MASK_SSE2 | MASK_SSE3;
- }
- return true;
-
- case OPT_msse2:
- if (!value)
- {
- target_flags &= ~MASK_SSE3;
- target_flags_explicit |= MASK_SSE3;
- }
- return true;
-
- default:
- return true;
- }
-}
-
-/* APPLE LOCAL begin 4760857 optimization pragmas. */
-/* Hoisted so it can be used by reset_optimization_options. */
-static struct ptt
- {
- const struct processor_costs *cost; /* Processor costs */
- const int target_enable; /* Target flags to enable. */
- const int target_disable; /* Target flags to disable. */
- const int align_loop; /* Default alignments. */
- const int align_loop_max_skip;
- const int align_jump;
- const int align_jump_max_skip;
- const int align_func;
- }
-const processor_target_table[PROCESSOR_max] =
- {
- {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
- {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
- {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
- {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
- {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
- {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
- {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
- {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
- {&nocona_cost, 0, 0, 0, 0, 0, 0, 0},
- /* APPLE LOCAL mainline */
- {&core2_cost, 0, 0, 16, 7, 16, 7, 16},
- {&generic32_cost, 0, 0, 16, 7, 16, 7, 16},
- {&generic64_cost, 0, 0, 16, 7, 16, 7, 16}
- };
-/* APPLE LOCAL end 4760857 optimization pragmas. */
-
-/* Sometimes certain combinations of command options do not make
- sense on a particular target machine. You can define a macro
- `OVERRIDE_OPTIONS' to take account of this. This macro, if
- defined, is executed once just after all the command options have
- been parsed.
-
- Don't use this macro to turn on various extra optimizations for
- `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
-
-void
-override_options (void)
-{
- int i;
- int ix86_tune_defaulted = 0;
- /* APPLE LOCAL mainline */
- int ix86_arch_specified = 0;
-
- /* Comes from final.c -- no real reason to change it. */
-#define MAX_CODE_ALIGN 16
-
- /* APPLE LOCAL begin 4760857 optimization pragmas. */
- /* processor_target_table moved to file scope. */
- /* APPLE LOCAL end 4760857 optimization pragmas. */
-
- static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
- static struct pta
- {
- const char *const name; /* processor name or nickname. */
- const enum processor_type processor;
- const enum pta_flags
- {
- PTA_SSE = 1,
- PTA_SSE2 = 2,
- PTA_SSE3 = 4,
- PTA_MMX = 8,
- PTA_PREFETCH_SSE = 16,
- PTA_3DNOW = 32,
- PTA_3DNOW_A = 64,
- /* APPLE LOCAL begin 5612787 mainline sse4 */
- /* APPLE LOCAL begin mainline */
- PTA_64BIT = 128,
- PTA_SSSE3 = 256,
- /* APPLE LOCAL end mainline */
- PTA_CX16 = 1 << 9,
- PTA_POPCNT = 1 << 10,
- PTA_ABM = 1 << 11,
- PTA_SSE4A = 1 << 12,
- PTA_NO_SAHF = 1 << 13,
- PTA_SSE4_1 = 1 << 14,
- PTA_SSE4_2 = 1 << 15
- /* APPLE LOCAL end 5612787 mainline sse4 */
- } flags;
- }
- const processor_alias_table[] =
- {
- {"i386", PROCESSOR_I386, 0},
- {"i486", PROCESSOR_I486, 0},
- {"i586", PROCESSOR_PENTIUM, 0},
- {"pentium", PROCESSOR_PENTIUM, 0},
- {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
- {"winchip-c6", PROCESSOR_I486, PTA_MMX},
- {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
- {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
- {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
- {"i686", PROCESSOR_PENTIUMPRO, 0},
- {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
- {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
- {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
- {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
- {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
- {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
- | PTA_MMX | PTA_PREFETCH_SSE},
- {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
- | PTA_MMX | PTA_PREFETCH_SSE},
- {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
- | PTA_MMX | PTA_PREFETCH_SSE},
- {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
- | PTA_MMX | PTA_PREFETCH_SSE},
- /* APPLE LOCAL begin mainline */
- {"core2", PROCESSOR_CORE2, PTA_SSE | PTA_SSE2 | PTA_SSE3
- | PTA_64BIT | PTA_MMX
- | PTA_PREFETCH_SSE},
- /* APPLE LOCAL end mainline */
- {"k6", PROCESSOR_K6, PTA_MMX},
- {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
- {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
- {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
- | PTA_3DNOW_A},
- {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
- | PTA_3DNOW | PTA_3DNOW_A},
- {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
- | PTA_3DNOW_A | PTA_SSE},
- {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
- | PTA_3DNOW_A | PTA_SSE},
- {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
- | PTA_3DNOW_A | PTA_SSE},
- {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
- | PTA_SSE | PTA_SSE2 },
- {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
- | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
- {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
- | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
- {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
- | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
- {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
- | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
- {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
- {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
- };
-
- int const pta_size = ARRAY_SIZE (processor_alias_table);
-
-#ifdef SUBTARGET_OVERRIDE_OPTIONS
- SUBTARGET_OVERRIDE_OPTIONS;
-#endif
-
-#ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
- SUBSUBTARGET_OVERRIDE_OPTIONS;
-#endif
-
- /* -fPIC is the default for x86_64. */
- if (TARGET_MACHO && TARGET_64BIT)
- flag_pic = 2;
-
- /* Set the default values for switches whose default depends on TARGET_64BIT
- in case they weren't overwritten by command line options. */
- if (TARGET_64BIT)
- {
- /* Mach-O doesn't support omitting the frame pointer for now. */
- if (flag_omit_frame_pointer == 2)
- flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
- if (flag_asynchronous_unwind_tables == 2)
- flag_asynchronous_unwind_tables = 1;
- if (flag_pcc_struct_return == 2)
- flag_pcc_struct_return = 0;
- }
- else
- {
- if (flag_omit_frame_pointer == 2)
- flag_omit_frame_pointer = 0;
- if (flag_asynchronous_unwind_tables == 2)
- flag_asynchronous_unwind_tables = 0;
- if (flag_pcc_struct_return == 2)
- flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
- }
-
- /* Need to check -mtune=generic first. */
- if (ix86_tune_string)
- {
- if (!strcmp (ix86_tune_string, "generic")
- || !strcmp (ix86_tune_string, "i686")
- /* As special support for cross compilers we read -mtune=native
- as -mtune=generic. With native compilers we won't see the
- -mtune=native, as it was changed by the driver. */
- || !strcmp (ix86_tune_string, "native"))
- {
- if (TARGET_64BIT)
- ix86_tune_string = "generic64";
- else
- ix86_tune_string = "generic32";
- }
- else if (!strncmp (ix86_tune_string, "generic", 7))
- error ("bad value (%s) for -mtune= switch", ix86_tune_string);
- }
- else
- {
- if (ix86_arch_string)
- ix86_tune_string = ix86_arch_string;
- if (!ix86_tune_string)
- {
- ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
- ix86_tune_defaulted = 1;
- }
-
- /* ix86_tune_string is set to ix86_arch_string or defaulted. We
- need to use a sensible tune option. */
- if (!strcmp (ix86_tune_string, "generic")
- || !strcmp (ix86_tune_string, "x86-64")
- || !strcmp (ix86_tune_string, "i686"))
- {
- if (TARGET_64BIT)
- ix86_tune_string = "generic64";
- else
- ix86_tune_string = "generic32";
- }
- }
- if (!strcmp (ix86_tune_string, "x86-64"))
- warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
- "-mtune=generic instead as appropriate.");
-
- if (!ix86_arch_string)
- ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
- /* APPLE LOCAL begin mainline */
- else
- ix86_arch_specified = 1;
- /* APPLE LOCAL end mainline */
- if (!strcmp (ix86_arch_string, "generic"))
- error ("generic CPU can be used only for -mtune= switch");
- if (!strncmp (ix86_arch_string, "generic", 7))
- error ("bad value (%s) for -march= switch", ix86_arch_string);
-
- if (ix86_cmodel_string != 0)
- {
- if (!strcmp (ix86_cmodel_string, "small"))
- ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
- else if (!strcmp (ix86_cmodel_string, "medium"))
- ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
- else if (flag_pic)
- sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
- else if (!strcmp (ix86_cmodel_string, "32"))
- ix86_cmodel = CM_32;
- else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
- ix86_cmodel = CM_KERNEL;
- else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
- ix86_cmodel = CM_LARGE;
- else
- error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
- }
- else
- {
- ix86_cmodel = CM_32;
- if (TARGET_64BIT)
- ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
- }
- if (ix86_asm_string != 0)
- {
- if (! TARGET_MACHO
- && !strcmp (ix86_asm_string, "intel"))
- ix86_asm_dialect = ASM_INTEL;
- else if (!strcmp (ix86_asm_string, "att"))
- ix86_asm_dialect = ASM_ATT;
- else
- error ("bad value (%s) for -masm= switch", ix86_asm_string);
- }
- if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
- error ("code model %qs not supported in the %s bit mode",
- ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
- if (ix86_cmodel == CM_LARGE)
- sorry ("code model %<large%> not supported yet");
- if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
- sorry ("%i-bit mode not compiled in",
- (target_flags & MASK_64BIT) ? 64 : 32);
-
- for (i = 0; i < pta_size; i++)
- if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
- {
- ix86_arch = processor_alias_table[i].processor;
- /* Default cpu tuning to the architecture. */
- ix86_tune = ix86_arch;
- if (processor_alias_table[i].flags & PTA_MMX
- && !(target_flags_explicit & MASK_MMX))
- target_flags |= MASK_MMX;
- if (processor_alias_table[i].flags & PTA_3DNOW
- && !(target_flags_explicit & MASK_3DNOW))
- target_flags |= MASK_3DNOW;
- if (processor_alias_table[i].flags & PTA_3DNOW_A
- && !(target_flags_explicit & MASK_3DNOW_A))
- target_flags |= MASK_3DNOW_A;
- if (processor_alias_table[i].flags & PTA_SSE
- && !(target_flags_explicit & MASK_SSE))
- target_flags |= MASK_SSE;
- if (processor_alias_table[i].flags & PTA_SSE2
- && !(target_flags_explicit & MASK_SSE2))
- target_flags |= MASK_SSE2;
- if (processor_alias_table[i].flags & PTA_SSE3
- && !(target_flags_explicit & MASK_SSE3))
- target_flags |= MASK_SSE3;
- /* APPLE LOCAL begin mainline */
- if (processor_alias_table[i].flags & PTA_SSSE3
- && !(target_flags_explicit & MASK_SSSE3))
- target_flags |= MASK_SSSE3;
- /* APPLE LOCAL end mainline */
- /* APPLE LOCAL begin 5612787 mainline sse4 */
- if (processor_alias_table[i].flags & PTA_SSE4_1
- && !(target_flags_explicit & MASK_SSE4_1))
- target_flags |= MASK_SSE4_1;
- if (processor_alias_table[i].flags & PTA_SSE4_2
- && !(target_flags_explicit & MASK_SSE4_2))
- target_flags |= MASK_SSE4_2;
- if (processor_alias_table[i].flags & PTA_SSE4A
- && !(target_flags_explicit & MASK_SSE4A))
- target_flags |= MASK_SSE4A;
- /* APPLE LOCAL end 5612787 mainline sse4 */
- if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
- x86_prefetch_sse = true;
- if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
- error ("CPU you selected does not support x86-64 "
- "instruction set");
- break;
- }
-
- if (i == pta_size)
- error ("bad value (%s) for -march= switch", ix86_arch_string);
-
- for (i = 0; i < pta_size; i++)
- if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
- {
- ix86_tune = processor_alias_table[i].processor;
- if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
- {
- if (ix86_tune_defaulted)
- {
- ix86_tune_string = "x86-64";
- for (i = 0; i < pta_size; i++)
- if (! strcmp (ix86_tune_string,
- processor_alias_table[i].name))
- break;
- ix86_tune = processor_alias_table[i].processor;
- }
- else
- error ("CPU you selected does not support x86-64 "
- "instruction set");
- }
- /* Intel CPUs have always interpreted SSE prefetch instructions as
- NOPs; so, we can enable SSE prefetch instructions even when
- -mtune (rather than -march) points us to a processor that has them.
- However, the VIA C3 gives a SIGILL, so we only do that for i686 and
- higher processors. */
- if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
- x86_prefetch_sse = true;
- break;
- }
- if (i == pta_size)
- error ("bad value (%s) for -mtune= switch", ix86_tune_string);
-
- if (optimize_size)
- ix86_cost = &size_cost;
- else
- ix86_cost = processor_target_table[ix86_tune].cost;
- target_flags |= processor_target_table[ix86_tune].target_enable;
- target_flags &= ~processor_target_table[ix86_tune].target_disable;
-
- /* Arrange to set up i386_stack_locals for all functions. */
- init_machine_status = ix86_init_machine_status;
-
- /* Validate -mregparm= value. */
- if (ix86_regparm_string)
- {
- i = atoi (ix86_regparm_string);
- if (i < 0 || i > REGPARM_MAX)
- error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
- else
- ix86_regparm = i;
- }
- else
- if (TARGET_64BIT)
- ix86_regparm = REGPARM_MAX;
-
- /* If the user has provided any of the -malign-* options,
- warn and use that value only if -falign-* is not set.
- Remove this code in GCC 3.2 or later. */
- if (ix86_align_loops_string)
- {
- warning (0, "-malign-loops is obsolete, use -falign-loops");
- if (align_loops == 0)
- {
- i = atoi (ix86_align_loops_string);
- if (i < 0 || i > MAX_CODE_ALIGN)
- error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
- else
- align_loops = 1 << i;
- }
- }
-
- if (ix86_align_jumps_string)
- {
- warning (0, "-malign-jumps is obsolete, use -falign-jumps");
- if (align_jumps == 0)
- {
- i = atoi (ix86_align_jumps_string);
- if (i < 0 || i > MAX_CODE_ALIGN)
- error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
- else
- align_jumps = 1 << i;
- }
- }
-
- if (ix86_align_funcs_string)
- {
- warning (0, "-malign-functions is obsolete, use -falign-functions");
- if (align_functions == 0)
- {
- i = atoi (ix86_align_funcs_string);
- if (i < 0 || i > MAX_CODE_ALIGN)
- error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
- else
- align_functions = 1 << i;
- }
- }
-
- /* Default align_* from the processor table. */
- if (align_loops == 0)
- {
- align_loops = processor_target_table[ix86_tune].align_loop;
- align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
- }
- if (align_jumps == 0)
- {
- align_jumps = processor_target_table[ix86_tune].align_jump;
- align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
- }
- if (align_functions == 0)
- {
- align_functions = processor_target_table[ix86_tune].align_func;
- }
-
- /* Validate -mbranch-cost= value, or provide default. */
- ix86_branch_cost = ix86_cost->branch_cost;
- if (ix86_branch_cost_string)
- {
- i = atoi (ix86_branch_cost_string);
- if (i < 0 || i > 5)
- error ("-mbranch-cost=%d is not between 0 and 5", i);
- else
- ix86_branch_cost = i;
- }
- if (ix86_section_threshold_string)
- {
- i = atoi (ix86_section_threshold_string);
- if (i < 0)
- error ("-mlarge-data-threshold=%d is negative", i);
- else
- ix86_section_threshold = i;
- }
-
- if (ix86_tls_dialect_string)
- {
- if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
- ix86_tls_dialect = TLS_DIALECT_GNU;
- else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
- ix86_tls_dialect = TLS_DIALECT_GNU2;
- else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
- ix86_tls_dialect = TLS_DIALECT_SUN;
- else
- error ("bad value (%s) for -mtls-dialect= switch",
- ix86_tls_dialect_string);
- }
- /* APPLE LOCAL begin mainline */
- if (TARGET_64BIT)
- {
- if (TARGET_ALIGN_DOUBLE)
- error ("-malign-double makes no sense in the 64bit mode");
- if (TARGET_RTD)
- error ("-mrtd calling convention not supported in the 64bit mode");
- /* APPLE LOCAL begin radar 4877693 */
- if (ix86_force_align_arg_pointer)
- error ("-mstackrealign not supported in the 64bit mode");
- /* APPLE LOCAL end radar 4877693 */
-
- /* Enable by default the SSE and MMX builtins. Do allow the user to
- explicitly disable any of these. In particular, disabling SSE and
- MMX for kernel code is extremely useful. */
- if (!ix86_arch_specified)
- target_flags
- |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE
- | TARGET_SUBTARGET64_DEFAULT) & ~target_flags_explicit);
- /* APPLE LOCAL begin mainline candidate */
- /* Disable the red zone for kernel compilation.
- ??? Why aren't we using -mcmodel=kernel? */
- if (TARGET_MACHO
- && (flag_mkernel || flag_apple_kext))
- target_flags |= MASK_NO_RED_ZONE;
- /* APPLE LOCAL end mainline candidate */
- }
- else
- {
- if (!ix86_arch_specified)
- target_flags |= (TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit);
-
- /* i386 ABI does not specify red zone. It still makes sense to use it
- when programmer takes care to stack from being destroyed. */
- if (!(target_flags_explicit & MASK_NO_RED_ZONE))
- target_flags |= MASK_NO_RED_ZONE;
- }
-
- /* APPLE LOCAL end mainline */
- /* Keep nonleaf frame pointers. */
- if (flag_omit_frame_pointer)
- target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
- else if (TARGET_OMIT_LEAF_FRAME_POINTER)
- flag_omit_frame_pointer = 1;
-
- /* If we're doing fast math, we don't care about comparison order
- wrt NaNs. This lets us use a shorter comparison sequence. */
- if (flag_finite_math_only)
- target_flags &= ~MASK_IEEE_FP;
-
- /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
- since the insns won't need emulation. */
- if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
- target_flags &= ~MASK_NO_FANCY_MATH_387;
-
- /* Likewise, if the target doesn't have a 387, or we've specified
- software floating point, don't use 387 inline intrinsics. */
- if (!TARGET_80387)
- target_flags |= MASK_NO_FANCY_MATH_387;
- /* APPLE LOCAL begin 5612787 mainline sse4 */
- /* Turn on SSE4.1 builtins for -msse4.2. */
- if (TARGET_SSE4_2)
- target_flags |= MASK_SSE4_1;
- /* Turn on SSSE3 builtins for -msse4.1. */
- if (TARGET_SSE4_1)
- target_flags |= MASK_SSSE3;
- /* Turn on SSE3 builtins for -msse4a. */
- if (TARGET_SSE4A)
- target_flags |= MASK_SSE3;
- /* APPLE LOCAL end 5612787 mainline sse4 */
- /* APPLE LOCAL begin mainline */
- /* Turn on SSE3 builtins for -mssse3. */
- if (TARGET_SSSE3)
- target_flags |= MASK_SSE3;
- /* APPLE LOCAL end mainline */
- /* Turn on SSE2 builtins for -msse3. */
- if (TARGET_SSE3)
- target_flags |= MASK_SSE2;
-
- /* Turn on SSE builtins for -msse2. */
- if (TARGET_SSE2)
- target_flags |= MASK_SSE;
-
- /* Turn on MMX builtins for -msse. */
- if (TARGET_SSE)
- {
- target_flags |= MASK_MMX & ~target_flags_explicit;
- x86_prefetch_sse = true;
- }
-
- /* Turn on MMX builtins for 3Dnow. */
- if (TARGET_3DNOW)
- target_flags |= MASK_MMX;
-
- /* APPLE LOCAL mainline */
- /* Moved this up... */
- /* Validate -mpreferred-stack-boundary= value, or provide default.
- The default of 128 bits is for Pentium III's SSE __m128. We can't
- change it because of optimize_size. Otherwise, we can't mix object
- files compiled with -Os and -On. */
- ix86_preferred_stack_boundary = 128;
- if (ix86_preferred_stack_boundary_string)
- {
- i = atoi (ix86_preferred_stack_boundary_string);
- if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
- error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
- TARGET_64BIT ? 4 : 2);
- else
- ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
- }
-
- /* Accept -msseregparm only if at least SSE support is enabled. */
- if (TARGET_SSEREGPARM
- && ! TARGET_SSE)
- error ("-msseregparm used without SSE enabled");
-
- ix86_fpmath = TARGET_FPMATH_DEFAULT;
-
- if (ix86_fpmath_string != 0)
- {
- if (! strcmp (ix86_fpmath_string, "387"))
- ix86_fpmath = FPMATH_387;
- else if (! strcmp (ix86_fpmath_string, "sse"))
- {
- if (!TARGET_SSE)
- {
- warning (0, "SSE instruction set disabled, using 387 arithmetics");
- ix86_fpmath = FPMATH_387;
- }
- else
- ix86_fpmath = FPMATH_SSE;
- }
- else if (! strcmp (ix86_fpmath_string, "387,sse")
- || ! strcmp (ix86_fpmath_string, "sse,387"))
- {
- if (!TARGET_SSE)
- {
- warning (0, "SSE instruction set disabled, using 387 arithmetics");
- ix86_fpmath = FPMATH_387;
- }
- else if (!TARGET_80387)
- {
- warning (0, "387 instruction set disabled, using SSE arithmetics");
- ix86_fpmath = FPMATH_SSE;
- }
- else
- ix86_fpmath = FPMATH_SSE | FPMATH_387;
- }
- else
- error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
- }
-
- /* If the i387 is disabled, then do not return values in it. */
- if (!TARGET_80387)
- target_flags &= ~MASK_FLOAT_RETURNS;
-
- if ((x86_accumulate_outgoing_args & TUNEMASK)
- && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
- && !optimize_size)
- target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
-
- /* ??? Unwind info is not correct around the CFG unless either a frame
- pointer is present or M_A_O_A is set. Fixing this requires rewriting
- unwind info generation to be aware of the CFG and propagating states
- around edges. */
- if ((flag_unwind_tables || flag_asynchronous_unwind_tables
- || flag_exceptions || flag_non_call_exceptions)
- && flag_omit_frame_pointer
- && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
- {
- if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
- warning (0, "unwind tables currently require either a frame pointer "
- "or -maccumulate-outgoing-args for correctness");
- target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
- }
-
- /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
- {
- char *p;
- ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
- p = strchr (internal_label_prefix, 'X');
- internal_label_prefix_len = p - internal_label_prefix;
- *p = '\0';
- }
-
- /* When scheduling description is not available, disable scheduler pass
- so it won't slow down the compilation and make x87 code slower. */
- /* APPLE LOCAL 5591571 */
- if (1 || !TARGET_SCHEDULE)
- flag_schedule_insns_after_reload = flag_schedule_insns = 0;
-
- /* APPLE LOCAL begin dynamic-no-pic */
-#if TARGET_MACHO
- if (MACHO_DYNAMIC_NO_PIC_P)
- {
- if (flag_pic)
- warning (0, "-mdynamic-no-pic overrides -fpic or -fPIC");
- flag_pic = 0;
- }
- else
-#endif
- if (flag_pic == 1)
- {
- /* Darwin's -fpic is -fPIC. */
- flag_pic = 2;
- }
- /* APPLE LOCAL end dynamic-no-pic */
- /* APPLE LOCAL begin 4812082 -fast */
- /* These flags were the best on the software H264 codec, and have therefore
- been lumped into -fast per 4812082. They have not been evaluated on
- any other code, except that -fno-tree-pre is known to lose on the
- hardware accelerated version of the codec. */
- if (flag_fast || flag_fastf || flag_fastcp)
- {
- flag_omit_frame_pointer = 1;
- flag_strict_aliasing = 1;
- flag_tree_pre = 0;
- target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
- align_loops = processor_target_table[ix86_tune].align_loop;
- }
- /* APPLE LOCAL end 4812082 -fast */
-}
-
-/* switch to the appropriate section for output of DECL.
- DECL is either a `VAR_DECL' node or a constant of some sort.
- RELOC indicates whether forming the initial value of DECL requires
- link-time relocations. */
-
-static section *
-x86_64_elf_select_section (tree decl, int reloc,
- unsigned HOST_WIDE_INT align)
-{
- if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
- && ix86_in_large_data_p (decl))
- {
- const char *sname = NULL;
- unsigned int flags = SECTION_WRITE;
- switch (categorize_decl_for_section (decl, reloc))
- {
- case SECCAT_DATA:
- sname = ".ldata";
- break;
- case SECCAT_DATA_REL:
- sname = ".ldata.rel";
- break;
- case SECCAT_DATA_REL_LOCAL:
- sname = ".ldata.rel.local";
- break;
- case SECCAT_DATA_REL_RO:
- sname = ".ldata.rel.ro";
- break;
- case SECCAT_DATA_REL_RO_LOCAL:
- sname = ".ldata.rel.ro.local";
- break;
- case SECCAT_BSS:
- sname = ".lbss";
- flags |= SECTION_BSS;
- break;
- case SECCAT_RODATA:
- case SECCAT_RODATA_MERGE_STR:
- case SECCAT_RODATA_MERGE_STR_INIT:
- case SECCAT_RODATA_MERGE_CONST:
- sname = ".lrodata";
- flags = 0;
- break;
- case SECCAT_SRODATA:
- case SECCAT_SDATA:
- case SECCAT_SBSS:
- gcc_unreachable ();
- case SECCAT_TEXT:
- case SECCAT_TDATA:
- case SECCAT_TBSS:
- /* We don't split these for medium model. Place them into
- default sections and hope for best. */
- break;
- }
- if (sname)
- {
- /* We might get called with string constants, but get_named_section
- doesn't like them as they are not DECLs. Also, we need to set
- flags in that case. */
- if (!DECL_P (decl))
- return get_section (sname, flags, NULL);
- return get_named_section (decl, sname, reloc);
- }
- }
- return default_elf_select_section (decl, reloc, align);
-}
-
-/* Build up a unique section name, expressed as a
- STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
- RELOC indicates whether the initial value of EXP requires
- link-time relocations. */
-
-static void
-x86_64_elf_unique_section (tree decl, int reloc)
-{
- if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
- && ix86_in_large_data_p (decl))
- {
- const char *prefix = NULL;
- /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
- bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
-
- switch (categorize_decl_for_section (decl, reloc))
- {
- case SECCAT_DATA:
- case SECCAT_DATA_REL:
- case SECCAT_DATA_REL_LOCAL:
- case SECCAT_DATA_REL_RO:
- case SECCAT_DATA_REL_RO_LOCAL:
- prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
- break;
- case SECCAT_BSS:
- prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
- break;
- case SECCAT_RODATA:
- case SECCAT_RODATA_MERGE_STR:
- case SECCAT_RODATA_MERGE_STR_INIT:
- case SECCAT_RODATA_MERGE_CONST:
- prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
- break;
- case SECCAT_SRODATA:
- case SECCAT_SDATA:
- case SECCAT_SBSS:
- gcc_unreachable ();
- case SECCAT_TEXT:
- case SECCAT_TDATA:
- case SECCAT_TBSS:
- /* We don't split these for medium model. Place them into
- default sections and hope for best. */
- break;
- }
- if (prefix)
- {
- const char *name;
- size_t nlen, plen;
- char *string;
- plen = strlen (prefix);
-
- name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
- name = targetm.strip_name_encoding (name);
- nlen = strlen (name);
-
- string = alloca (nlen + plen + 1);
- memcpy (string, prefix, plen);
- memcpy (string + plen, name, nlen + 1);
-
- DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
- return;
- }
- }
- default_unique_section (decl, reloc);
-}
-
-#ifdef COMMON_ASM_OP
-/* This says how to output assembler code to declare an
- uninitialized external linkage data object.
-
- For medium model x86-64 we need to use .largecomm opcode for
- large objects. */
-void
-x86_elf_aligned_common (FILE *file,
- const char *name, unsigned HOST_WIDE_INT size,
- int align)
-{
- if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
- && size > (unsigned int)ix86_section_threshold)
- fprintf (file, ".largecomm\t");
- else
- fprintf (file, "%s", COMMON_ASM_OP);
- assemble_name (file, name);
- fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
- size, align / BITS_PER_UNIT);
-}
-
-/* Utility function for targets to use in implementing
- ASM_OUTPUT_ALIGNED_BSS. */
-
-void
-x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
- const char *name, unsigned HOST_WIDE_INT size,
- int align)
-{
- if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
- && size > (unsigned int)ix86_section_threshold)
- switch_to_section (get_named_section (decl, ".lbss", 0));
- else
- switch_to_section (bss_section);
- ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
-#ifdef ASM_DECLARE_OBJECT_NAME
- last_assemble_variable_decl = decl;
- ASM_DECLARE_OBJECT_NAME (file, name, decl);
-#else
- /* Standard thing is just output label for the object. */
- ASM_OUTPUT_LABEL (file, name);
-#endif /* ASM_DECLARE_OBJECT_NAME */
- ASM_OUTPUT_SKIP (file, size ? size : 1);
-}
-#endif
-
-void
-optimization_options (int level, int size ATTRIBUTE_UNUSED)
-{
- /* APPLE LOCAL begin disable strict aliasing; breaks too much existing code. */
-#if TARGET_MACHO
- flag_strict_aliasing = 0;
-#endif
- /* APPLE LOCAL end disable strict aliasing; breaks too much existing code. */
- /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
- make the problem with not enough registers even worse. */
-#ifdef INSN_SCHEDULING
- if (level > 1)
- flag_schedule_insns = 0;
-#endif
-
- /* APPLE LOCAL begin pragma fenv */
- /* Trapping math is not needed by many users, and is expensive.
- C99 permits us to default it off and we do that. It is
- turned on when <fenv.h> is included (see darwin_pragma_fenv
- in darwin-c.c). */
- flag_trapping_math = 0;
- /* APPLE LOCAL end pragma fenv */
-
- if (TARGET_MACHO)
- /* The Darwin libraries never set errno, so we might as well
- avoid calling them when that's the only reason we would. */
- flag_errno_math = 0;
-
- /* The default values of these switches depend on the TARGET_64BIT
- that is not known at this moment. Mark these values with 2 and
- let user the to override these. In case there is no command line option
- specifying them, we will set the defaults in override_options. */
- if (optimize >= 1)
- flag_omit_frame_pointer = 2;
- flag_pcc_struct_return = 2;
- flag_asynchronous_unwind_tables = 2;
-#ifdef SUBTARGET_OPTIMIZATION_OPTIONS
- SUBTARGET_OPTIMIZATION_OPTIONS;
-#endif
- /* APPLE LOCAL begin 4200243 */
- if (getenv ("RC_FORCE_SSE3"))
- target_flags |= MASK_SSE3;
-}
-/* APPLE LOCAL end 4200243 */
-
-/* APPLE LOCAL begin optimization pragmas 3124235/3420242 */
-/* Version of the above for use from #pragma optimization_level. Only
- per-function flags are reset. */
-#if TARGET_MACHO
-void
-reset_optimization_options (int level, int size)
-{
- /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
- make the problem with not enough registers even worse. */
-#ifdef INSN_SCHEDULING
- if (level > 1)
- flag_schedule_insns = 0;
-#endif
-
- /* APPLE LOCAL begin pragma fenv */
- /* Trapping math is not needed by many users, and is expensive.
- C99 permits us to default it off and we do that. It is
- turned on when <fenv.h> is included (see darwin_pragma_fenv
- in darwin-c.c). */
- flag_trapping_math = 0;
- /* APPLE LOCAL end pragma fenv */
-
- /* The default values of these switches depend on TARGET_64BIT
- which was set earlier and not reset. */
- if (optimize >= 1)
- {
- if (TARGET_64BIT)
- flag_omit_frame_pointer = 1;
- else
- flag_omit_frame_pointer = 0;
- }
-#ifdef SUBTARGET_OPTIMIZATION_OPTIONS
- SUBTARGET_OPTIMIZATION_OPTIONS;
-#endif
- /* APPLE LOCAL begin 4760857 */
- if (size)
- ix86_cost = &size_cost;
- else
- ix86_cost = processor_target_table[ix86_tune].cost;
-
- /* Default align_* from the processor table. */
- if (align_loops == 0)
- {
- align_loops = processor_target_table[ix86_tune].align_loop;
- align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
- }
- if (align_jumps == 0)
- {
- align_jumps = processor_target_table[ix86_tune].align_jump;
- align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
- }
- /* APPLE LOCAL end 4760857 */
-}
-#endif
-/* APPLE LOCAL end optimization pragmas 3124235/3420242 */
-
-/* Table of valid machine attributes. */
-const struct attribute_spec ix86_attribute_table[] =
-{
- /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
- /* Stdcall attribute says callee is responsible for popping arguments
- if they are not variable. */
- { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
- /* Fastcall attribute says callee is responsible for popping arguments
- if they are not variable. */
- { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
- /* Cdecl attribute says the callee is a normal C declaration */
- { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
- /* Regparm attribute specifies how many integer arguments are to be
- passed in registers. */
- { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
- /* APPLE LOCAL begin regparmandstackparm */
- /* regparmandstackparm means two entry points; a traditional stack-based
- one, and another, with a mangled name, that employs regparm and
- sseregparm. */
- { "regparmandstackparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
- { "regparmandstackparmee", 0, 0, false, true, true, ix86_handle_cconv_attribute },
- /* APPLE LOCAL end regparmandstackparm */
- /* Sseregparm attribute says we are using x86_64 calling conventions
- for FP arguments. */
- { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
- /* force_align_arg_pointer says this function realigns the stack at entry. */
- { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
- false, true, true, ix86_handle_cconv_attribute },
-#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
- { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
- { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
- { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
-#endif
- { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
- { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
-#ifdef SUBTARGET_ATTRIBUTE_TABLE
- SUBTARGET_ATTRIBUTE_TABLE,
-#endif
- { NULL, 0, 0, false, false, false, NULL }
-};
-
-/* Decide whether we can make a sibling call to a function. DECL is the
- declaration of the function being targeted by the call and EXP is the
- CALL_EXPR representing the call. */
-
-static bool
-ix86_function_ok_for_sibcall (tree decl, tree exp)
-{
- tree func;
- rtx a, b;
-
- /* APPLE LOCAL begin indirect sibcall 4087330 */
- /* If we are generating position-independent code, we cannot sibcall
- optimize any indirect call, or a direct call to a global function,
- as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
- if (!TARGET_MACHO
- && !TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
- return false;
- /* APPLE LOCAL end indirect sibcall 4087330 */
-
- if (decl)
- func = decl;
- else
- {
- func = TREE_TYPE (TREE_OPERAND (exp, 0));
- if (POINTER_TYPE_P (func))
- func = TREE_TYPE (func);
- }
-
- /* Check that the return value locations are the same. Like
- if we are returning floats on the 80387 register stack, we cannot
- make a sibcall from a function that doesn't return a float to a
- function that does or, conversely, from a function that does return
- a float to a function that doesn't; the necessary stack adjustment
- would not be executed. This is also the place we notice
- differences in the return value ABI. Note that it is ok for one
- of the functions to have void return type as long as the return
- value of the other is passed in a register. */
- a = ix86_function_value (TREE_TYPE (exp), func, false);
- b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
- cfun->decl, false);
- if (STACK_REG_P (a) || STACK_REG_P (b))
- {
- if (!rtx_equal_p (a, b))
- return false;
- }
- else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
- ;
- else if (!rtx_equal_p (a, b))
- return false;
-
- /* If this call is indirect, we'll need to be able to use a call-clobbered
- register for the address of the target function. Make sure that all
- such registers are not used for passing parameters. */
- if (!decl && !TARGET_64BIT)
- {
- tree type;
-
- /* We're looking at the CALL_EXPR, we need the type of the function. */
- type = TREE_OPERAND (exp, 0); /* pointer expression */
- type = TREE_TYPE (type); /* pointer type */
- type = TREE_TYPE (type); /* function type */
-
- if (ix86_function_regparm (type, NULL) >= 3)
- {
- /* ??? Need to count the actual number of registers to be used,
- not the possible number of registers. Fix later. */
- return false;
- }
- }
-
-#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
- /* Dllimport'd functions are also called indirectly. */
- if (decl && DECL_DLLIMPORT_P (decl)
- && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
- return false;
-#endif
-
- /* If we forced aligned the stack, then sibcalling would unalign the
- stack, which may break the called function. */
- if (cfun->machine->force_align_arg_pointer)
- return false;
-
- /* Otherwise okay. That also includes certain types of indirect calls. */
- return true;
-}
-
-/* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
- calling convention attributes;
- arguments as in struct attribute_spec.handler. */
-
-static tree
-ix86_handle_cconv_attribute (tree *node, tree name,
- tree args,
- int flags ATTRIBUTE_UNUSED,
- bool *no_add_attrs)
-{
- if (TREE_CODE (*node) != FUNCTION_TYPE
- && TREE_CODE (*node) != METHOD_TYPE
- && TREE_CODE (*node) != FIELD_DECL
- && TREE_CODE (*node) != TYPE_DECL)
- {
- warning (OPT_Wattributes, "%qs attribute only applies to functions",
- IDENTIFIER_POINTER (name));
- *no_add_attrs = true;
- return NULL_TREE;
- }
-
- /* Can combine regparm with all attributes but fastcall. */
- if (is_attribute_p ("regparm", name))
- {
- tree cst;
-
- if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
- {
- error ("fastcall and regparm attributes are not compatible");
- }
-
- /* APPLE LOCAL begin regparmandstackparm */
- if (!TARGET_64BIT
- && (lookup_attribute ("regparmandstackparm", TYPE_ATTRIBUTES (*node))
- || lookup_attribute ("regparmandstackparmee", TYPE_ATTRIBUTES (*node))))
- {
- error ("regparmandstackparm and regparm attributes are not compatible");
- }
- /* APPLE LOCAL end regparmandstackparm */
-
- cst = TREE_VALUE (args);
- if (TREE_CODE (cst) != INTEGER_CST)
- {
- warning (OPT_Wattributes,
- "%qs attribute requires an integer constant argument",
- IDENTIFIER_POINTER (name));
- *no_add_attrs = true;
- }
- else if (compare_tree_int (cst, REGPARM_MAX) > 0)
- {
- warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
- IDENTIFIER_POINTER (name), REGPARM_MAX);
- *no_add_attrs = true;
- }
-
- if (!TARGET_64BIT
- && lookup_attribute (ix86_force_align_arg_pointer_string,
- TYPE_ATTRIBUTES (*node))
- && compare_tree_int (cst, REGPARM_MAX-1))
- {
- error ("%s functions limited to %d register parameters",
- ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
- }
-
- return NULL_TREE;
- }
-
- /* APPLE LOCAL begin 5612787 mainline sse4 */
- /* Turn on popcnt instruction for -msse4.2 or -mabm. */
- if (TARGET_SSE4_2)
- x86_popcnt = true;
- /* APPLE LOCAL end 5612787 mainline sse4 */
-
- if (TARGET_64BIT)
- {
- warning (OPT_Wattributes, "%qs attribute ignored",
- IDENTIFIER_POINTER (name));
- *no_add_attrs = true;
- return NULL_TREE;
- }
-
- /* Can combine fastcall with stdcall (redundant) and sseregparm. */
- /* APPLE LOCAL begin regparmandstackparm */
- if (is_attribute_p ("fastcall", name)
- || is_attribute_p ("regparmandstackparm", name))
- /* APPLE LOCAL end regparmandstackparm */
- {
- if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
- {
- error ("fastcall and cdecl attributes are not compatible");
- }
- if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
- {
- error ("fastcall and stdcall attributes are not compatible");
- }
- if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
- {
- error ("fastcall and regparm attributes are not compatible");
- }
- }
-
- /* Can combine stdcall with fastcall (redundant), regparm and
- sseregparm. */
- else if (is_attribute_p ("stdcall", name))
- {
- if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
- {
- error ("stdcall and cdecl attributes are not compatible");
- }
- /* APPLE LOCAL begin regparmandstackparm */
- if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node))
- || lookup_attribute ("regparmandstackparm", TYPE_ATTRIBUTES (*node)))
- /* APPLE LOCAL end regparmandstackparm */
- {
- error ("stdcall and fastcall attributes are not compatible");
- }
- }
-
- /* Can combine cdecl with regparm and sseregparm. */
- else if (is_attribute_p ("cdecl", name))
- {
- if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
- {
- error ("stdcall and cdecl attributes are not compatible");
- }
- if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
- {
- error ("fastcall and cdecl attributes are not compatible");
- }
- }
-
- /* Can combine sseregparm with all attributes. */
-
- return NULL_TREE;
-}
-
-/* Return 0 if the attributes for two types are incompatible, 1 if they
- are compatible, and 2 if they are nearly compatible (which causes a
- warning to be generated). */
-
-static int
-ix86_comp_type_attributes (tree type1, tree type2)
-{
- /* Check for mismatch of non-default calling convention. */
- const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
-
- if (TREE_CODE (type1) != FUNCTION_TYPE)
- return 1;
-
- /* Check for mismatched fastcall/regparm types. */
- if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
- != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
- || (ix86_function_regparm (type1, NULL)
- != ix86_function_regparm (type2, NULL)))
- return 0;
-
- /* Check for mismatched sseregparm types. */
- if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
- != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
- return 0;
-
- /* Check for mismatched return types (cdecl vs stdcall). */
- if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
- != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
- return 0;
-
- return 1;
-}
-
-/* Return the regparm value for a function with the indicated TYPE and DECL.
- DECL may be NULL when calling function indirectly
- or considering a libcall. */
-
-static int
-ix86_function_regparm (tree type, tree decl)
-{
- tree attr;
- /* APPLE LOCAL begin MERGE FIXME audit to ensure that it's ok
-
- We had local_regparm but the FSF didn't and there didn't seem to
- be a merge conflict some something is strange. These seem to be just
- normal apple local changes. I asked Stuart about them in email. */
- int regparm = ix86_regparm;
- bool user_convention = false;
-
- if (!TARGET_64BIT)
- {
- attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
- if (attr)
- {
- regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
- user_convention = true;
- }
-
- /* APPLE LOCAL begin regparmandstackparm */
- if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type))
- || lookup_attribute ("regparmandstackparmee", TYPE_ATTRIBUTES (type)))
- /* APPLE LOCAL end regparmandstackparm */
- {
- regparm = 2;
- user_convention = true;
- }
-
- /* Use register calling convention for local functions when possible. */
- if (!TARGET_64BIT && !user_convention && decl
- && flag_unit_at_a_time && !profile_flag)
- {
- struct cgraph_local_info *i = cgraph_local_info (decl);
- if (i && i->local)
- {
- int local_regparm, globals = 0, regno;
-
- /* Make sure no regparm register is taken by a global register
- variable. */
- for (local_regparm = 0; local_regparm < 3; local_regparm++)
- if (global_regs[local_regparm])
- break;
- /* We can't use regparm(3) for nested functions as these use
- static chain pointer in third argument. */
- if (local_regparm == 3
- /* APPLE LOCAL begin mainline */
- && (decl_function_context (decl)
- || ix86_force_align_arg_pointer)
- /* APPLE LOCAL end mainline */
- && !DECL_NO_STATIC_CHAIN (decl))
- local_regparm = 2;
- /* If the function realigns its stackpointer, the
- prologue will clobber %ecx. If we've already
- generated code for the callee, the callee
- DECL_STRUCT_FUNCTION is gone, so we fall back to
- scanning the attributes for the self-realigning
- property. */
- if ((DECL_STRUCT_FUNCTION (decl)
- /* MERGE FIXME was in our version, but not in FSF 2006-05-23 */
- && DECL_STRUCT_FUNCTION (decl)->machine
- && DECL_STRUCT_FUNCTION (decl)->machine->force_align_arg_pointer)
- || (!DECL_STRUCT_FUNCTION (decl)
- && lookup_attribute (ix86_force_align_arg_pointer_string,
- TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
- local_regparm = 2;
- /* Each global register variable increases register preassure,
- so the more global reg vars there are, the smaller regparm
- optimization use, unless requested by the user explicitly. */
- for (regno = 0; regno < 6; regno++)
- if (global_regs[regno])
- globals++;
- local_regparm
- = globals < local_regparm ? local_regparm - globals : 0;
-
- if (local_regparm > regparm)
- regparm = local_regparm;
- }
- }
- }
- /* APPLE LOCAL end MERGE FIXME audit to ensure that it's ok */
- return regparm;
-}
-
-/* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
- DFmode (2) arguments in SSE registers for a function with the
- indicated TYPE and DECL. DECL may be NULL when calling function
- indirectly or considering a libcall. Otherwise return 0. */
-
-static int
-ix86_function_sseregparm (tree type, tree decl)
-{
- /* Use SSE registers to pass SFmode and DFmode arguments if requested
- by the sseregparm attribute. */
- if (TARGET_SSEREGPARM
- || (type
- && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
- {
- if (!TARGET_SSE)
- {
- if (decl)
- error ("Calling %qD with attribute sseregparm without "
- "SSE/SSE2 enabled", decl);
- else
- error ("Calling %qT with attribute sseregparm without "
- "SSE/SSE2 enabled", type);
- return 0;
- }
-
- return 2;
- }
-
- /* APPLE LOCAL begin regparmandstackparm */
- if (type && lookup_attribute ("regparmandstackparmee", TYPE_ATTRIBUTES (type)))
- return 2;
- /* APPLE LOCAL end regparmandstackparm */
-
- /* For local functions, pass up to SSE_REGPARM_MAX SFmode
- (and DFmode for SSE2) arguments in SSE registers,
- even for 32-bit targets. */
- if (!TARGET_64BIT && decl
- && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
- {
- struct cgraph_local_info *i = cgraph_local_info (decl);
- if (i && i->local)
- return TARGET_SSE2 ? 2 : 1;
- }
-
- return 0;
-}
-
-/* Return true if EAX is live at the start of the function. Used by
- ix86_expand_prologue to determine if we need special help before
- calling allocate_stack_worker. */
-
-static bool
-ix86_eax_live_at_start_p (void)
-{
- /* Cheat. Don't bother working forward from ix86_function_regparm
- to the function type to whether an actual argument is located in
- eax. Instead just look at cfg info, which is still close enough
- to correct at this point. This gives false positives for broken
- functions that might use uninitialized data that happens to be
- allocated in eax, but who cares? */
- return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end, 0);
-}
-
-/* Value is the number of bytes of arguments automatically
- popped when returning from a subroutine call.
- FUNDECL is the declaration node of the function (as a tree),
- FUNTYPE is the data type of the function (as a tree),
- or for a library call it is an identifier node for the subroutine name.
- SIZE is the number of bytes of arguments passed on the stack.
-
- On the 80386, the RTD insn may be used to pop them if the number
- of args is fixed, but if the number is variable then the caller
- must pop them all. RTD can't be used for library calls now
- because the library is compiled with the Unix compiler.
- Use of RTD is a selectable option, since it is incompatible with
- standard Unix calling sequences. If the option is not selected,
- the caller must always pop the args.
-
- The attribute stdcall is equivalent to RTD on a per module basis. */
-
-int
-ix86_return_pops_args (tree fundecl, tree funtype, int size)
-{
- int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
-
- /* Cdecl functions override -mrtd, and never pop the stack. */
- if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
-
- /* Stdcall and fastcall functions will pop the stack if not
- variable args. */
- if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
- || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
- rtd = 1;
-
- if (rtd
- && (TYPE_ARG_TYPES (funtype) == NULL_TREE
- || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
- == void_type_node)))
- return size;
- }
-
- /* Lose any fake structure return argument if it is passed on the stack. */
- if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
- && !TARGET_64BIT
- && !KEEP_AGGREGATE_RETURN_POINTER)
- {
- int nregs = ix86_function_regparm (funtype, fundecl);
-
- if (!nregs)
- return GET_MODE_SIZE (Pmode);
- }
-
- return 0;
-}
-
-/* Argument support functions. */
-
-/* Return true when register may be used to pass function parameters. */
-bool
-ix86_function_arg_regno_p (int regno)
-{
- int i;
- if (!TARGET_64BIT)
- {
- if (TARGET_MACHO)
- return (regno < REGPARM_MAX
- || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
- else
- return (regno < REGPARM_MAX
- || (TARGET_MMX && MMX_REGNO_P (regno)
- && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
- || (TARGET_SSE && SSE_REGNO_P (regno)
- && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
- }
-
- if (TARGET_MACHO)
- {
- if (SSE_REGNO_P (regno) && TARGET_SSE)
- return true;
- }
- else
- {
- if (TARGET_SSE && SSE_REGNO_P (regno)
- && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
- return true;
- }
- /* RAX is used as hidden argument to va_arg functions. */
- if (!regno)
- return true;
- for (i = 0; i < REGPARM_MAX; i++)
- if (regno == x86_64_int_parameter_registers[i])
- return true;
- return false;
-}
-
-/* Return if we do not know how to pass TYPE solely in registers. */
-
-static bool
-ix86_must_pass_in_stack (enum machine_mode mode, tree type)
-{
- if (must_pass_in_stack_var_size_or_pad (mode, type))
- return true;
-
- /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
- The layout_type routine is crafty and tries to trick us into passing
- currently unsupported vector types on the stack by using TImode. */
- return (!TARGET_64BIT && mode == TImode
- && type && TREE_CODE (type) != VECTOR_TYPE);
-}
-
-/* Initialize a variable CUM of type CUMULATIVE_ARGS
- for a call to a function whose data type is FNTYPE.
- For a library call, FNTYPE is 0. */
-
-void
-init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
- tree fntype, /* tree ptr for function decl */
- rtx libname, /* SYMBOL_REF of library name or 0 */
- tree fndecl)
-{
- static CUMULATIVE_ARGS zero_cum;
- tree param, next_param;
-
- if (TARGET_DEBUG_ARG)
- {
- fprintf (stderr, "\ninit_cumulative_args (");
- if (fntype)
- fprintf (stderr, "fntype code = %s, ret code = %s",
- tree_code_name[(int) TREE_CODE (fntype)],
- tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
- else
- fprintf (stderr, "no fntype");
-
- if (libname)
- fprintf (stderr, ", libname = %s", XSTR (libname, 0));
- }
-
- *cum = zero_cum;
-
- /* Set up the number of registers to use for passing arguments. */
- cum->nregs = ix86_regparm;
- if (TARGET_SSE)
- cum->sse_nregs = SSE_REGPARM_MAX;
- if (TARGET_MMX)
- cum->mmx_nregs = MMX_REGPARM_MAX;
- cum->warn_sse = true;
- cum->warn_mmx = true;
- cum->maybe_vaarg = false;
-
- /* Use ecx and edx registers if function has fastcall attribute,
- else look for regparm information. */
- if (fntype && !TARGET_64BIT)
- {
- if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
- {
- cum->nregs = 2;
- cum->fastcall = 1;
- }
- else
- cum->nregs = ix86_function_regparm (fntype, fndecl);
- }
-
- /* Set up the number of SSE registers used for passing SFmode
- and DFmode arguments. Warn for mismatching ABI. */
- cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
-
- /* Determine if this function has variable arguments. This is
- indicated by the last argument being 'void_type_mode' if there
- are no variable arguments. If there are variable arguments, then
- we won't pass anything in registers in 32-bit mode. */
-
- if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
- {
- for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
- param != 0; param = next_param)
- {
- next_param = TREE_CHAIN (param);
- if (next_param == 0 && TREE_VALUE (param) != void_type_node)
- {
- if (!TARGET_64BIT)
- {
- cum->nregs = 0;
- cum->sse_nregs = 0;
- cum->mmx_nregs = 0;
- cum->warn_sse = 0;
- cum->warn_mmx = 0;
- cum->fastcall = 0;
- cum->float_in_sse = 0;
- }
- cum->maybe_vaarg = true;
- }
- }
- }
- if ((!fntype && !libname)
- || (fntype && !TYPE_ARG_TYPES (fntype)))
- cum->maybe_vaarg = true;
-
- if (TARGET_DEBUG_ARG)
- fprintf (stderr, ", nregs=%d )\n", cum->nregs);
-
- return;
-}
-
-/* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
- But in the case of vector types, it is some vector mode.
-
- When we have only some of our vector isa extensions enabled, then there
- are some modes for which vector_mode_supported_p is false. For these
- modes, the generic vector support in gcc will choose some non-vector mode
- in order to implement the type. By computing the natural mode, we'll
- select the proper ABI location for the operand and not depend on whatever
- the middle-end decides to do with these vector types. */
-
-static enum machine_mode
-type_natural_mode (tree type)
-{
- enum machine_mode mode = TYPE_MODE (type);
-
- if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
- {
- HOST_WIDE_INT size = int_size_in_bytes (type);
- if ((size == 8 || size == 16)
- /* ??? Generic code allows us to create width 1 vectors. Ignore. */
- && TYPE_VECTOR_SUBPARTS (type) > 1)
- {
- enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
-
- if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
- mode = MIN_MODE_VECTOR_FLOAT;
- else
- mode = MIN_MODE_VECTOR_INT;
-
- /* Get the mode which has this inner mode and number of units. */
- for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
- if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
- && GET_MODE_INNER (mode) == innermode)
- return mode;
-
- gcc_unreachable ();
- }
- }
- /* APPLE LOCAL begin 4656532 use V1DImode for _m64 */
- /* Pass V1DImode objects as DImode. This is for compatibility. */
- if (TREE_CODE (type) == VECTOR_TYPE && mode == V1DImode && !TARGET_64BIT)
- return DImode;
- /* APPLE LOCAL end 4656532 use V1DImode for _m64 */
-
- return mode;
-}
-
-/* We want to pass a value in REGNO whose "natural" mode is MODE. However,
- this may not agree with the mode that the type system has chosen for the
- register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
- go ahead and use it. Otherwise we have to build a PARALLEL instead. */
-
-static rtx
-gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
- unsigned int regno)
-{
- rtx tmp;
-
- if (orig_mode != BLKmode)
- tmp = gen_rtx_REG (orig_mode, regno);
- else
- {
- tmp = gen_rtx_REG (mode, regno);
- tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
- tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
- }
-
- return tmp;
-}
-
-/* x86-64 register passing implementation. See x86-64 ABI for details. Goal
- of this code is to classify each 8bytes of incoming argument by the register
- class and assign registers accordingly. */
-
-/* Return the union class of CLASS1 and CLASS2.
- See the x86-64 PS ABI for details. */
-
-static enum x86_64_reg_class
-merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
-{
- /* Rule #1: If both classes are equal, this is the resulting class. */
- if (class1 == class2)
- return class1;
-
- /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
- the other class. */
- if (class1 == X86_64_NO_CLASS)
- return class2;
- if (class2 == X86_64_NO_CLASS)
- return class1;
-
- /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
- if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
- return X86_64_MEMORY_CLASS;
-
- /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
- if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
- || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
- return X86_64_INTEGERSI_CLASS;
- if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
- || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
- return X86_64_INTEGER_CLASS;
-
- /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
- MEMORY is used. */
- if (class1 == X86_64_X87_CLASS
- || class1 == X86_64_X87UP_CLASS
- || class1 == X86_64_COMPLEX_X87_CLASS
- || class2 == X86_64_X87_CLASS
- || class2 == X86_64_X87UP_CLASS
- || class2 == X86_64_COMPLEX_X87_CLASS)
- return X86_64_MEMORY_CLASS;
-
- /* Rule #6: Otherwise class SSE is used. */
- return X86_64_SSE_CLASS;
-}
-
-/* Classify the argument of type TYPE and mode MODE.
- CLASSES will be filled by the register class used to pass each word
- of the operand. The number of words is returned. In case the parameter
- should be passed in memory, 0 is returned. As a special case for zero
- sized containers, classes[0] will be NO_CLASS and 1 is returned.
-
- BIT_OFFSET is used internally for handling records and specifies offset
- of the offset in bits modulo 256 to avoid overflow cases.
-
- See the x86-64 PS ABI for details.
-*/
-
-static int
-classify_argument (enum machine_mode mode, tree type,
- enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
-{
- HOST_WIDE_INT bytes =
- (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
- int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
-
- /* Variable sized entities are always passed/returned in memory. */
- if (bytes < 0)
- return 0;
-
- if (mode != VOIDmode
- && targetm.calls.must_pass_in_stack (mode, type))
- return 0;
-
- if (type && AGGREGATE_TYPE_P (type))
- {
- int i;
- tree field;
- enum x86_64_reg_class subclasses[MAX_CLASSES];
-
- /* On x86-64 we pass structures larger than 16 bytes on the stack. */
- if (bytes > 16)
- return 0;
-
- for (i = 0; i < words; i++)
- classes[i] = X86_64_NO_CLASS;
-
- /* Zero sized arrays or structures are NO_CLASS. We return 0 to
- signalize memory class, so handle it as special case. */
- if (!words)
- {
- classes[0] = X86_64_NO_CLASS;
- return 1;
- }
-
- /* Classify each field of record and merge classes. */
- switch (TREE_CODE (type))
- {
- case RECORD_TYPE:
- /* For classes first merge in the field of the subclasses. */
- if (TYPE_BINFO (type))
- {
- tree binfo, base_binfo;
- int basenum;
-
- for (binfo = TYPE_BINFO (type), basenum = 0;
- BINFO_BASE_ITERATE (binfo, basenum, base_binfo); basenum++)
- {
- int num;
- int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
- tree type = BINFO_TYPE (base_binfo);
-
- num = classify_argument (TYPE_MODE (type),
- type, subclasses,
- (offset + bit_offset) % 256);
- if (!num)
- return 0;
- for (i = 0; i < num; i++)
- {
- int pos = (offset + (bit_offset % 64)) / 8 / 8;
- classes[i + pos] =
- merge_classes (subclasses[i], classes[i + pos]);
- }
- }
- }
- /* And now merge the fields of structure. */
- for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
- {
- if (TREE_CODE (field) == FIELD_DECL)
- {
- int num;
-
- if (TREE_TYPE (field) == error_mark_node)
- continue;
-
- /* Bitfields are always classified as integer. Handle them
- early, since later code would consider them to be
- misaligned integers. */
- if (DECL_BIT_FIELD (field))
- {
- for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
- i < ((int_bit_position (field) + (bit_offset % 64))
- + tree_low_cst (DECL_SIZE (field), 0)
- + 63) / 8 / 8; i++)
- classes[i] =
- merge_classes (X86_64_INTEGER_CLASS,
- classes[i]);
- }
- else
- {
- num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
- TREE_TYPE (field), subclasses,
- (int_bit_position (field)
- + bit_offset) % 256);
- if (!num)
- return 0;
- for (i = 0; i < num; i++)
- {
- int pos =
- (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
- classes[i + pos] =
- merge_classes (subclasses[i], classes[i + pos]);
- }
- }
- }
- }
- break;
-
- case ARRAY_TYPE:
- /* Arrays are handled as small records. */
- {
- int num;
- num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
- TREE_TYPE (type), subclasses, bit_offset);
- if (!num)
- return 0;
-
- /* The partial classes are now full classes. */
- if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
- subclasses[0] = X86_64_SSE_CLASS;
- if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
- subclasses[0] = X86_64_INTEGER_CLASS;
-
- for (i = 0; i < words; i++)
- classes[i] = subclasses[i % num];
-
- break;
- }
- case UNION_TYPE:
- case QUAL_UNION_TYPE:
- /* Unions are similar to RECORD_TYPE but offset is always 0.
- */
-
- /* Unions are not derived. */
- gcc_assert (!TYPE_BINFO (type)
- || !BINFO_N_BASE_BINFOS (TYPE_BINFO (type)));
- for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
- {
- if (TREE_CODE (field) == FIELD_DECL)
- {
- int num;
-
- if (TREE_TYPE (field) == error_mark_node)
- continue;
-
- num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
- TREE_TYPE (field), subclasses,
- bit_offset);
- if (!num)
- return 0;
- for (i = 0; i < num; i++)
- classes[i] = merge_classes (subclasses[i], classes[i]);
- }
- }
- break;
-
- default:
- gcc_unreachable ();
- }
-
- /* Final merger cleanup. */
- for (i = 0; i < words; i++)
- {
- /* If one class is MEMORY, everything should be passed in
- memory. */
- if (classes[i] == X86_64_MEMORY_CLASS)
- return 0;
-
- /* The X86_64_SSEUP_CLASS should be always preceded by
- X86_64_SSE_CLASS. */
- if (classes[i] == X86_64_SSEUP_CLASS
- && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
- classes[i] = X86_64_SSE_CLASS;
-
- /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
- if (classes[i] == X86_64_X87UP_CLASS
- && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
- classes[i] = X86_64_SSE_CLASS;
- }
- return words;
- }
-
- /* Compute alignment needed. We align all types to natural boundaries with
- exception of XFmode that is aligned to 64bits. */
- if (mode != VOIDmode && mode != BLKmode)
- {
- int mode_alignment = GET_MODE_BITSIZE (mode);
-
- if (mode == XFmode)
- mode_alignment = 128;
- else if (mode == XCmode)
- mode_alignment = 256;
- if (COMPLEX_MODE_P (mode))
- mode_alignment /= 2;
- /* Misaligned fields are always returned in memory. */
- if (bit_offset % mode_alignment)
- return 0;
- }
-
- /* for V1xx modes, just use the base mode */
- if (VECTOR_MODE_P (mode)
- && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
- mode = GET_MODE_INNER (mode);
-
- /* Classification of atomic types. */
- switch (mode)
- {
- case SDmode:
- case DDmode:
- classes[0] = X86_64_SSE_CLASS;
- return 1;
- case TDmode:
- classes[0] = X86_64_SSE_CLASS;
- classes[1] = X86_64_SSEUP_CLASS;
- return 2;
- case DImode:
- case SImode:
- case HImode:
- case QImode:
- case CSImode:
- case CHImode:
- case CQImode:
- if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
- classes[0] = X86_64_INTEGERSI_CLASS;
- else
- classes[0] = X86_64_INTEGER_CLASS;
- return 1;
- case CDImode:
- case TImode:
- classes[0] = classes[1] = X86_64_INTEGER_CLASS;
- return 2;
- case CTImode:
- return 0;
- case SFmode:
- if (!(bit_offset % 64))
- classes[0] = X86_64_SSESF_CLASS;
- else
- classes[0] = X86_64_SSE_CLASS;
- return 1;
- case DFmode:
- classes[0] = X86_64_SSEDF_CLASS;
- return 1;
- case XFmode:
- classes[0] = X86_64_X87_CLASS;
- classes[1] = X86_64_X87UP_CLASS;
- return 2;
- case TFmode:
- classes[0] = X86_64_SSE_CLASS;
- classes[1] = X86_64_SSEUP_CLASS;
- return 2;
- case SCmode:
- classes[0] = X86_64_SSE_CLASS;
- return 1;
- case DCmode:
- classes[0] = X86_64_SSEDF_CLASS;
- classes[1] = X86_64_SSEDF_CLASS;
- return 2;
- case XCmode:
- classes[0] = X86_64_COMPLEX_X87_CLASS;
- return 1;
- case TCmode:
- /* This modes is larger than 16 bytes. */
- return 0;
- case V4SFmode:
- case V4SImode:
- case V16QImode:
- case V8HImode:
- case V2DFmode:
- case V2DImode:
- classes[0] = X86_64_SSE_CLASS;
- classes[1] = X86_64_SSEUP_CLASS;
- return 2;
- case V2SFmode:
- case V2SImode:
- case V4HImode:
- case V8QImode:
- classes[0] = X86_64_SSE_CLASS;
- return 1;
- case BLKmode:
- case VOIDmode:
- return 0;
- default:
- gcc_assert (VECTOR_MODE_P (mode));
-
- if (bytes > 16)
- return 0;
-
- gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
-
- if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
- classes[0] = X86_64_INTEGERSI_CLASS;
- else
- classes[0] = X86_64_INTEGER_CLASS;
- classes[1] = X86_64_INTEGER_CLASS;
- return 1 + (bytes > 8);
- }
-}
-
-/* Examine the argument and return set number of register required in each
- class. Return 0 iff parameter should be passed in memory. */
-static int
-examine_argument (enum machine_mode mode, tree type, int in_return,
- int *int_nregs, int *sse_nregs)
-{
- enum x86_64_reg_class class[MAX_CLASSES];
- int n = classify_argument (mode, type, class, 0);
-
- *int_nregs = 0;
- *sse_nregs = 0;
- if (!n)
- return 0;
- for (n--; n >= 0; n--)
- switch (class[n])
- {
- case X86_64_INTEGER_CLASS:
- case X86_64_INTEGERSI_CLASS:
- (*int_nregs)++;
- break;
- case X86_64_SSE_CLASS:
- case X86_64_SSESF_CLASS:
- case X86_64_SSEDF_CLASS:
- (*sse_nregs)++;
- break;
- case X86_64_NO_CLASS:
- case X86_64_SSEUP_CLASS:
- break;
- case X86_64_X87_CLASS:
- case X86_64_X87UP_CLASS:
- if (!in_return)
- return 0;
- break;
- case X86_64_COMPLEX_X87_CLASS:
- return in_return ? 2 : 0;
- case X86_64_MEMORY_CLASS:
- gcc_unreachable ();
- }
- return 1;
-}
-
-/* Construct container for the argument used by GCC interface. See
- FUNCTION_ARG for the detailed description. */
-
-static rtx
-construct_container (enum machine_mode mode, enum machine_mode orig_mode,
- tree type, int in_return, int nintregs, int nsseregs,
- const int *intreg, int sse_regno)
-{
- /* The following variables hold the static issued_error state. */
- static bool issued_sse_arg_error;
- static bool issued_sse_ret_error;
- static bool issued_x87_ret_error;
-
- enum machine_mode tmpmode;
- int bytes =
- (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
- enum x86_64_reg_class class[MAX_CLASSES];
- int n;
- int i;
- int nexps = 0;
- int needed_sseregs, needed_intregs;
- rtx exp[MAX_CLASSES];
- rtx ret;
-
- n = classify_argument (mode, type, class, 0);
- if (TARGET_DEBUG_ARG)
- {
- if (!n)
- fprintf (stderr, "Memory class\n");
- else
- {
- fprintf (stderr, "Classes:");
- for (i = 0; i < n; i++)
- {
- fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
- }
- fprintf (stderr, "\n");
- }
- }
- if (!n)
- return NULL;
- if (!examine_argument (mode, type, in_return, &needed_intregs,
- &needed_sseregs))
- return NULL;
- if (needed_intregs > nintregs || needed_sseregs > nsseregs)
- return NULL;
-
- /* We allowed the user to turn off SSE for kernel mode. Don't crash if
- some less clueful developer tries to use floating-point anyway. */
- if (needed_sseregs && !TARGET_SSE)
- {
- if (in_return)
- {
- if (!issued_sse_ret_error)
- {
- error ("SSE register return with SSE disabled");
- issued_sse_ret_error = true;
- }
- }
- else if (!issued_sse_arg_error)
- {
- error ("SSE register argument with SSE disabled");
- issued_sse_arg_error = true;
- }
- return NULL;
- }
-
- /* Likewise, error if the ABI requires us to return values in the
- x87 registers and the user specified -mno-80387. */
- if (!TARGET_80387 && in_return)
- for (i = 0; i < n; i++)
- if (class[i] == X86_64_X87_CLASS
- || class[i] == X86_64_X87UP_CLASS
- || class[i] == X86_64_COMPLEX_X87_CLASS)
- {
- if (!issued_x87_ret_error)
- {
- error ("x87 register return with x87 disabled");
- issued_x87_ret_error = true;
- }
- return NULL;
- }
-
- /* First construct simple cases. Avoid SCmode, since we want to use
- single register to pass this type. */
- if (n == 1 && mode != SCmode)
- switch (class[0])
- {
- case X86_64_INTEGER_CLASS:
- case X86_64_INTEGERSI_CLASS:
- return gen_rtx_REG (mode, intreg[0]);
- case X86_64_SSE_CLASS:
- case X86_64_SSESF_CLASS:
- case X86_64_SSEDF_CLASS:
- return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
- case X86_64_X87_CLASS:
- case X86_64_COMPLEX_X87_CLASS:
- return gen_rtx_REG (mode, FIRST_STACK_REG);
- case X86_64_NO_CLASS:
- /* Zero sized array, struct or class. */
- return NULL;
- default:
- gcc_unreachable ();
- }
- if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
- && mode != BLKmode)
- return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
- if (n == 2
- && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
- return gen_rtx_REG (XFmode, FIRST_STACK_REG);
- if (n == 2 && class[0] == X86_64_INTEGER_CLASS
- && class[1] == X86_64_INTEGER_CLASS
- && (mode == CDImode || mode == TImode || mode == TFmode)
- && intreg[0] + 1 == intreg[1])
- return gen_rtx_REG (mode, intreg[0]);
-
- /* Otherwise figure out the entries of the PARALLEL. */
- for (i = 0; i < n; i++)
- {
- switch (class[i])
- {
- case X86_64_NO_CLASS:
- break;
- case X86_64_INTEGER_CLASS:
- case X86_64_INTEGERSI_CLASS:
- /* Merge TImodes on aligned occasions here too. */
- if (i * 8 + 8 > bytes)
- tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
- else if (class[i] == X86_64_INTEGERSI_CLASS)
- tmpmode = SImode;
- else
- tmpmode = DImode;
- /* We've requested 24 bytes we don't have mode for. Use DImode. */
- if (tmpmode == BLKmode)
- tmpmode = DImode;
- exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
- gen_rtx_REG (tmpmode, *intreg),
- GEN_INT (i*8));
- intreg++;
- break;
- case X86_64_SSESF_CLASS:
- exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
- gen_rtx_REG (SFmode,
- SSE_REGNO (sse_regno)),
- GEN_INT (i*8));
- sse_regno++;
- break;
- case X86_64_SSEDF_CLASS:
- exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
- gen_rtx_REG (DFmode,
- SSE_REGNO (sse_regno)),
- GEN_INT (i*8));
- sse_regno++;
- break;
- case X86_64_SSE_CLASS:
- if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
- tmpmode = TImode;
- else
- tmpmode = DImode;
- exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
- gen_rtx_REG (tmpmode,
- SSE_REGNO (sse_regno)),
- GEN_INT (i*8));
- if (tmpmode == TImode)
- i++;
- sse_regno++;
- break;
- default:
- gcc_unreachable ();
- }
- }
-
- /* Empty aligned struct, union or class. */
- if (nexps == 0)
- return NULL;
-
- ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
- for (i = 0; i < nexps; i++)
- XVECEXP (ret, 0, i) = exp [i];
- return ret;
-}
-
-/* Update the data in CUM to advance over an argument
- of mode MODE and data type TYPE.
- (TYPE is null for libcalls where that information may not be available.) */
-
-void
-function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
- tree type, int named)
-{
- int bytes =
- (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
- int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
-
- if (type)
- mode = type_natural_mode (type);
-
- if (TARGET_DEBUG_ARG)
- fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
- "mode=%s, named=%d)\n\n",
- words, cum->words, cum->nregs, cum->sse_nregs,
- GET_MODE_NAME (mode), named);
-
- if (TARGET_64BIT)
- {
- int int_nregs, sse_nregs;
- if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
- cum->words += words;
- else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
- {
- cum->nregs -= int_nregs;
- cum->sse_nregs -= sse_nregs;
- cum->regno += int_nregs;
- cum->sse_regno += sse_nregs;
- }
- else
- cum->words += words;
- }
- else
- {
- switch (mode)
- {
- default:
- break;
-
- case BLKmode:
- if (bytes < 0)
- break;
- /* FALLTHRU */
-
- case DImode:
- case SImode:
- case HImode:
- case QImode:
- cum->words += words;
- cum->nregs -= words;
- cum->regno += words;
-
- if (cum->nregs <= 0)
- {
- cum->nregs = 0;
- cum->regno = 0;
- }
- break;
-
- case DFmode:
- if (cum->float_in_sse < 2)
- break;
- case SFmode:
- if (cum->float_in_sse < 1)
- break;
- /* FALLTHRU */
-
- case TImode:
- case V16QImode:
- case V8HImode:
- case V4SImode:
- case V2DImode:
- case V4SFmode:
- case V2DFmode:
- if (!type || !AGGREGATE_TYPE_P (type))
- {
- cum->sse_words += words;
- cum->sse_nregs -= 1;
- cum->sse_regno += 1;
- if (cum->sse_nregs <= 0)
- {
- cum->sse_nregs = 0;
- cum->sse_regno = 0;
- }
- }
- break;
-
- case V8QImode:
- case V4HImode:
- case V2SImode:
- case V2SFmode:
- if (!type || !AGGREGATE_TYPE_P (type))
- {
- cum->mmx_words += words;
- cum->mmx_nregs -= 1;
- cum->mmx_regno += 1;
- if (cum->mmx_nregs <= 0)
- {
- cum->mmx_nregs = 0;
- cum->mmx_regno = 0;
- }
- }
- break;
- }
- }
-}
-
-/* Define where to put the arguments to a function.
- Value is zero to push the argument on the stack,
- or a hard register in which to store the argument.
-
- MODE is the argument's machine mode.
- TYPE is the data type of the argument (as a tree).
- This is null for libcalls where that information may
- not be available.
- CUM is a variable of type CUMULATIVE_ARGS which gives info about
- the preceding args and about the function being called.
- NAMED is nonzero if this argument is a named parameter
- (otherwise it is an extra parameter matching an ellipsis). */
-
-rtx
-function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode,
- tree type, int named)
-{
- enum machine_mode mode = orig_mode;
- rtx ret = NULL_RTX;
- int bytes =
- (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
- int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
- static bool warnedsse, warnedmmx;
-
- /* To simplify the code below, represent vector types with a vector mode
- even if MMX/SSE are not active. */
- if (type && TREE_CODE (type) == VECTOR_TYPE)
- mode = type_natural_mode (type);
-
- /* Handle a hidden AL argument containing number of registers for varargs
- x86-64 functions. For i386 ABI just return constm1_rtx to avoid
- any AL settings. */
- if (mode == VOIDmode)
- {
- if (TARGET_64BIT)
- return GEN_INT (cum->maybe_vaarg
- ? (cum->sse_nregs < 0
- ? SSE_REGPARM_MAX
- : cum->sse_regno)
- : -1);
- else
- return constm1_rtx;
- }
- if (TARGET_64BIT)
- ret = construct_container (mode, orig_mode, type, 0, cum->nregs,
- cum->sse_nregs,
- &x86_64_int_parameter_registers [cum->regno],
- cum->sse_regno);
- else
- switch (mode)
- {
- /* For now, pass fp/complex values on the stack. */
- default:
- break;
-
- case BLKmode:
- if (bytes < 0)
- break;
- /* FALLTHRU */
- case DImode:
- case SImode:
- case HImode:
- case QImode:
- if (words <= cum->nregs)
- {
- int regno = cum->regno;
-
- /* Fastcall allocates the first two DWORD (SImode) or
- smaller arguments to ECX and EDX. */
- if (cum->fastcall)
- {
- if (mode == BLKmode || mode == DImode)
- break;
-
- /* ECX not EAX is the first allocated register. */
- if (regno == 0)
- regno = 2;
- }
- ret = gen_rtx_REG (mode, regno);
- }
- break;
- case DFmode:
- if (cum->float_in_sse < 2)
- break;
- case SFmode:
- if (cum->float_in_sse < 1)
- break;
- /* FALLTHRU */
- case TImode:
- case V16QImode:
- case V8HImode:
- case V4SImode:
- case V2DImode:
- case V4SFmode:
- case V2DFmode:
- if (!type || !AGGREGATE_TYPE_P (type))
- {
- if (!TARGET_SSE && !warnedsse && cum->warn_sse)
- {
- warnedsse = true;
- warning (0, "SSE vector argument without SSE enabled "
- "changes the ABI");
- }
- if (cum->sse_nregs)
- ret = gen_reg_or_parallel (mode, orig_mode,
- cum->sse_regno + FIRST_SSE_REG);
- }
- break;
- case V8QImode:
- case V4HImode:
- case V2SImode:
- case V2SFmode:
- if (!type || !AGGREGATE_TYPE_P (type))
- {
- if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
- {
- warnedmmx = true;
- warning (0, "MMX vector argument without MMX enabled "
- "changes the ABI");
- }
- if (cum->mmx_nregs)
- ret = gen_reg_or_parallel (mode, orig_mode,
- cum->mmx_regno + FIRST_MMX_REG);
- }
- break;
- }
-
- if (TARGET_DEBUG_ARG)
- {
- fprintf (stderr,
- "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
- words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
-
- if (ret)
- print_simple_rtl (stderr, ret);
- else
- fprintf (stderr, ", stack");
-
- fprintf (stderr, " )\n");
- }
-
- return ret;
-}
-
-/* A C expression that indicates when an argument must be passed by
- reference. If nonzero for an argument, a copy of that argument is
- made in memory and a pointer to the argument is passed instead of
- the argument itself. The pointer is passed in whatever way is
- appropriate for passing a pointer to that type. */
-
-static bool
-ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
- enum machine_mode mode ATTRIBUTE_UNUSED,
- tree type, bool named ATTRIBUTE_UNUSED)
-{
- if (!TARGET_64BIT)
- return 0;
-
- if (type && int_size_in_bytes (type) == -1)
- {
- if (TARGET_DEBUG_ARG)
- fprintf (stderr, "function_arg_pass_by_reference\n");
- return 1;
- }
-
- return 0;
-}
-
-/* Return true when TYPE should be 128bit aligned for 32bit argument passing
- ABI. Only called if TARGET_SSE. */
-static bool
-contains_128bit_aligned_vector_p (tree type)
-{
- enum machine_mode mode = TYPE_MODE (type);
- if (SSE_REG_MODE_P (mode)
- && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
- return true;
- if (TYPE_ALIGN (type) < 128)
- return false;
-
- if (AGGREGATE_TYPE_P (type))
- {
- /* Walk the aggregates recursively. */
- switch (TREE_CODE (type))
- {
- case RECORD_TYPE:
- case UNION_TYPE:
- case QUAL_UNION_TYPE:
- {
- tree field;
-
- if (TYPE_BINFO (type))
- {
- tree binfo, base_binfo;
- int i;
-
- for (binfo = TYPE_BINFO (type), i = 0;
- BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
- if (contains_128bit_aligned_vector_p
- (BINFO_TYPE (base_binfo)))
- return true;
- }
- /* And now merge the fields of structure. */
- for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
- {
- if (TREE_CODE (field) == FIELD_DECL
- && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
- return true;
- }
- break;
- }
-
- case ARRAY_TYPE:
- /* Just for use if some languages passes arrays by value. */
- if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
- return true;
- break;
-
- default:
- gcc_unreachable ();
- }
- }
- return false;
-}
-
-/* Gives the alignment boundary, in bits, of an argument with the
- specified mode and type. */
-
-int
-ix86_function_arg_boundary (enum machine_mode mode, tree type)
-{
- int align;
- if (type)
- align = TYPE_ALIGN (type);
- else
- align = GET_MODE_ALIGNMENT (mode);
- /* APPLE LOCAL begin unbreak ppc64 abi 5103220 */
- if (type && integer_zerop (TYPE_SIZE (type)))
- align = PARM_BOUNDARY;
- /* APPLE LOCAL end unbreak ppc64 abi 5103220 */
- if (align < PARM_BOUNDARY)
- align = PARM_BOUNDARY;
- if (!TARGET_64BIT)
- {
- /* i386 ABI defines all arguments to be 4 byte aligned. We have to
- make an exception for SSE modes since these require 128bit
- alignment.
-
- The handling here differs from field_alignment. ICC aligns MMX
- arguments to 4 byte boundaries, while structure fields are aligned
- to 8 byte boundaries. */
- if (!TARGET_SSE)
- align = PARM_BOUNDARY;
- else if (!type)
- {
- if (!SSE_REG_MODE_P (mode))
- align = PARM_BOUNDARY;
- }
- else
- {
- if (!contains_128bit_aligned_vector_p (type))
- align = PARM_BOUNDARY;
- }
- }
- if (align > 128)
- align = 128;
- return align;
-}
-
-/* Return true if N is a possible register number of function value. */
-bool
-ix86_function_value_regno_p (int regno)
-{
- if (TARGET_MACHO)
- {
- if (!TARGET_64BIT)
- {
- return ((regno) == 0
- || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
- || ((regno) == FIRST_SSE_REG && TARGET_SSE));
- }
- return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
- || ((regno) == FIRST_SSE_REG && TARGET_SSE)
- || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
- }
- else
- {
- if (regno == 0
- || (regno == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
- || (regno == FIRST_SSE_REG && TARGET_SSE))
- return true;
-
- if (!TARGET_64BIT
- && (regno == FIRST_MMX_REG && TARGET_MMX))
- return true;
-
- return false;
- }
-}
-
-/* Define how to find the value returned by a function.
- VALTYPE is the data type of the value (as a tree).
- If the precise function being called is known, FUNC is its FUNCTION_DECL;
- otherwise, FUNC is 0. */
-rtx
-ix86_function_value (tree valtype, tree fntype_or_decl,
- bool outgoing ATTRIBUTE_UNUSED)
-{
- enum machine_mode natmode = type_natural_mode (valtype);
-
- if (TARGET_64BIT)
- {
- rtx ret = construct_container (natmode, TYPE_MODE (valtype), valtype,
- 1, REGPARM_MAX, SSE_REGPARM_MAX,
- x86_64_int_return_registers, 0);
- /* For zero sized structures, construct_container return NULL, but we
- need to keep rest of compiler happy by returning meaningful value. */
- if (!ret)
- ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
- return ret;
- }
- else
- {
- tree fn = NULL_TREE, fntype;
- if (fntype_or_decl
- && DECL_P (fntype_or_decl))
- fn = fntype_or_decl;
- fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
- return gen_rtx_REG (TYPE_MODE (valtype),
- ix86_value_regno (natmode, fn, fntype));
- }
-}
-
-/* APPLE LOCAL begin radar 4781080 */
-/* Return true iff must generate call to objcMsgSend for an
- fp-returning method. */
-bool
-ix86_objc_fpreturn_msgcall (tree ret_type, bool no_long_double)
-{
- if (no_long_double)
- return TARGET_64BIT && SCALAR_FLOAT_TYPE_P (ret_type)
- && TYPE_MODE (ret_type) != XFmode;
- else
- return SCALAR_FLOAT_TYPE_P (ret_type);
-}
-/* APPLE LOCAL end radar 4781080 */
-
-/* Return true iff type is returned in memory. */
-int
-ix86_return_in_memory (tree type)
-{
- int needed_intregs, needed_sseregs, size;
- enum machine_mode mode = type_natural_mode (type);
-
- if (TARGET_64BIT)
- return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
-
- if (mode == BLKmode)
- return 1;
-
- size = int_size_in_bytes (type);
-
- if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
- return 0;
-
- if (VECTOR_MODE_P (mode) || mode == TImode)
- {
- /* User-created vectors small enough to fit in EAX. */
- if (size < 8)
- return 0;
-
- /* MMX/3dNow values are returned in MM0,
- except when it doesn't exits. */
- if (size == 8)
- /* APPLE LOCAL begin radar 4875125. */
- /* Undo the mainline patch which broke MACHO ABI compatibility. */
- return (TARGET_MACHO) ? 1 : (TARGET_MMX ? 0 : 1);
- /* APPLE LOCAL end radar 4875125. */
-
- /* SSE values are returned in XMM0, except when it doesn't exist. */
- if (size == 16)
- return (TARGET_SSE ? 0 : 1);
- }
-
- if (mode == XFmode)
- return 0;
-
- if (mode == TDmode)
- return 1;
-
- if (size > 12)
- return 1;
- return 0;
-}
-
-/* When returning SSE vector types, we have a choice of either
- (1) being abi incompatible with a -march switch, or
- (2) generating an error.
- Given no good solution, I think the safest thing is one warning.
- The user won't be able to use -Werror, but....
-
- Choose the STRUCT_VALUE_RTX hook because that's (at present) only
- called in response to actually generating a caller or callee that
- uses such a type. As opposed to RETURN_IN_MEMORY, which is called
- via aggregate_value_p for general type probing from tree-ssa. */
-
-static rtx
-ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
-{
- static bool warnedsse, warnedmmx;
-
- if (type)
- {
- /* Look at the return type of the function, not the function type. */
- enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
-
- if (!TARGET_SSE && !warnedsse)
- {
- if (mode == TImode
- || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
- {
- warnedsse = true;
- warning (0, "SSE vector return without SSE enabled "
- "changes the ABI");
- }
- }
-
- if (!TARGET_MMX && !warnedmmx)
- {
- if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
- {
- warnedmmx = true;
- warning (0, "MMX vector return without MMX enabled "
- "changes the ABI");
- }
- }
- }
-
- return NULL;
-}
-
-/* Define how to find the value returned by a library function
- assuming the value has mode MODE. */
-rtx
-ix86_libcall_value (enum machine_mode mode)
-{
- if (TARGET_64BIT)
- {
- switch (mode)
- {
- case SFmode:
- case SCmode:
- case DFmode:
- case DCmode:
- case TFmode:
- case SDmode:
- case DDmode:
- case TDmode:
- return gen_rtx_REG (mode, FIRST_SSE_REG);
- case XFmode:
- case XCmode:
- return gen_rtx_REG (mode, FIRST_FLOAT_REG);
- case TCmode:
- return NULL;
- default:
- return gen_rtx_REG (mode, 0);
- }
- }
- else
- return gen_rtx_REG (mode, ix86_value_regno (mode, NULL, NULL));
-}
-
-/* Given a mode, return the register to use for a return value. */
-
-static int
-ix86_value_regno (enum machine_mode mode, tree func, tree fntype)
-{
- gcc_assert (!TARGET_64BIT);
-
- /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
- we normally prevent this case when mmx is not available. However
- some ABIs may require the result to be returned like DImode. */
- if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
- return TARGET_MMX ? FIRST_MMX_REG : 0;
-
- /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
- we prevent this case when sse is not available. However some ABIs
- may require the result to be returned like integer TImode. */
- if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
- return TARGET_SSE ? FIRST_SSE_REG : 0;
-
- /* Decimal floating point values can go in %eax, unlike other float modes. */
- if (DECIMAL_FLOAT_MODE_P (mode))
- return 0;
-
- /* APPLE LOCAL begin regparmandstackparm */
- if (SSE_FLOAT_MODE_P(mode)
- && fntype && lookup_attribute ("regparmandstackparmee", TYPE_ATTRIBUTES (fntype)))
- return FIRST_SSE_REG;
- /* APPLE LOCAL end regparmandstackparm */
-
- /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */
- if (!SCALAR_FLOAT_MODE_P (mode) || !TARGET_FLOAT_RETURNS_IN_80387)
- return 0;
-
- /* Floating point return values in %st(0), except for local functions when
- SSE math is enabled or for functions with sseregparm attribute. */
- if ((func || fntype)
- && (mode == SFmode || mode == DFmode))
- {
- int sse_level = ix86_function_sseregparm (fntype, func);
- if ((sse_level >= 1 && mode == SFmode)
- || (sse_level == 2 && mode == DFmode))
- return FIRST_SSE_REG;
- }
-
- return FIRST_FLOAT_REG;
-}
-
-/* Create the va_list data type. */
-
-static tree
-ix86_build_builtin_va_list (void)
-{
- tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
-
- /* For i386 we use plain pointer to argument area. */
- if (!TARGET_64BIT)
- return build_pointer_type (char_type_node);
-
- record = (*lang_hooks.types.make_type) (RECORD_TYPE);
- type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
-
- f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
- unsigned_type_node);
- f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
- unsigned_type_node);
- f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
- ptr_type_node);
- f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
- ptr_type_node);
-
- va_list_gpr_counter_field = f_gpr;
- va_list_fpr_counter_field = f_fpr;
-
- DECL_FIELD_CONTEXT (f_gpr) = record;
- DECL_FIELD_CONTEXT (f_fpr) = record;
- DECL_FIELD_CONTEXT (f_ovf) = record;
- DECL_FIELD_CONTEXT (f_sav) = record;
-
- TREE_CHAIN (record) = type_decl;
- TYPE_NAME (record) = type_decl;
- TYPE_FIELDS (record) = f_gpr;
- TREE_CHAIN (f_gpr) = f_fpr;
- TREE_CHAIN (f_fpr) = f_ovf;
- TREE_CHAIN (f_ovf) = f_sav;
-
- layout_type (record);
-
- /* The correct type is an array type of one element. */
- return build_array_type (record, build_index_type (size_zero_node));
-}
-
-/* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
-
-static void
-ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
- tree type, int *pretend_size ATTRIBUTE_UNUSED,
- int no_rtl)
-{
- CUMULATIVE_ARGS next_cum;
- rtx save_area = NULL_RTX, mem;
- rtx label;
- rtx label_ref;
- rtx tmp_reg;
- rtx nsse_reg;
- int set;
- tree fntype;
- int stdarg_p;
- int i;
-
- if (!TARGET_64BIT)
- return;
-
- if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
- return;
-
- /* Indicate to allocate space on the stack for varargs save area. */
- ix86_save_varrargs_registers = 1;
-
- cfun->stack_alignment_needed = 128;
-
- fntype = TREE_TYPE (current_function_decl);
- stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
- && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
- != void_type_node));
-
- /* For varargs, we do not want to skip the dummy va_dcl argument.
- For stdargs, we do want to skip the last named argument. */
- next_cum = *cum;
- if (stdarg_p)
- function_arg_advance (&next_cum, mode, type, 1);
-
- if (!no_rtl)
- save_area = frame_pointer_rtx;
-
- set = get_varargs_alias_set ();
-
- for (i = next_cum.regno;
- i < ix86_regparm
- && i < next_cum.regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
- i++)
- {
- mem = gen_rtx_MEM (Pmode,
- plus_constant (save_area, i * UNITS_PER_WORD));
- MEM_NOTRAP_P (mem) = 1;
- set_mem_alias_set (mem, set);
- emit_move_insn (mem, gen_rtx_REG (Pmode,
- x86_64_int_parameter_registers[i]));
- }
-
- if (next_cum.sse_nregs && cfun->va_list_fpr_size)
- {
- /* Now emit code to save SSE registers. The AX parameter contains number
- of SSE parameter registers used to call this function. We use
- sse_prologue_save insn template that produces computed jump across
- SSE saves. We need some preparation work to get this working. */
-
- label = gen_label_rtx ();
- label_ref = gen_rtx_LABEL_REF (Pmode, label);
-
- /* Compute address to jump to :
- label - 5*eax + nnamed_sse_arguments*5 */
- tmp_reg = gen_reg_rtx (Pmode);
- nsse_reg = gen_reg_rtx (Pmode);
- emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
- emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
- gen_rtx_MULT (Pmode, nsse_reg,
- GEN_INT (4))));
- if (next_cum.sse_regno)
- emit_move_insn
- (nsse_reg,
- gen_rtx_CONST (DImode,
- gen_rtx_PLUS (DImode,
- label_ref,
- GEN_INT (next_cum.sse_regno * 4))));
- else
- emit_move_insn (nsse_reg, label_ref);
- emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
-
- /* Compute address of memory block we save into. We always use pointer
- pointing 127 bytes after first byte to store - this is needed to keep
- instruction size limited by 4 bytes. */
- tmp_reg = gen_reg_rtx (Pmode);
- emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
- plus_constant (save_area,
- 8 * REGPARM_MAX + 127)));
- mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
- MEM_NOTRAP_P (mem) = 1;
- set_mem_alias_set (mem, set);
- set_mem_align (mem, BITS_PER_WORD);
-
- /* And finally do the dirty job! */
- emit_insn (gen_sse_prologue_save (mem, nsse_reg,
- GEN_INT (next_cum.sse_regno), label));
- }
-
-}
-
-/* Implement va_start. */
-
-void
-ix86_va_start (tree valist, rtx nextarg)
-{
- HOST_WIDE_INT words, n_gpr, n_fpr;
- tree f_gpr, f_fpr, f_ovf, f_sav;
- tree gpr, fpr, ovf, sav, t;
- tree type;
-
- /* Only 64bit target needs something special. */
- if (!TARGET_64BIT)
- {
- std_expand_builtin_va_start (valist, nextarg);
- return;
- }
-
- f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
- f_fpr = TREE_CHAIN (f_gpr);
- f_ovf = TREE_CHAIN (f_fpr);
- f_sav = TREE_CHAIN (f_ovf);
-
- valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
- gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
- fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
- ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
- sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
-
- /* Count number of gp and fp argument registers used. */
- words = current_function_args_info.words;
- n_gpr = current_function_args_info.regno;
- n_fpr = current_function_args_info.sse_regno;
-
- if (TARGET_DEBUG_ARG)
- fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
- (int) words, (int) n_gpr, (int) n_fpr);
-
- if (cfun->va_list_gpr_size)
- {
- type = TREE_TYPE (gpr);
- t = build2 (MODIFY_EXPR, type, gpr,
- build_int_cst (type, n_gpr * 8));
- TREE_SIDE_EFFECTS (t) = 1;
- expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
- }
-
- if (cfun->va_list_fpr_size)
- {
- type = TREE_TYPE (fpr);
- t = build2 (MODIFY_EXPR, type, fpr,
- build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
- TREE_SIDE_EFFECTS (t) = 1;
- expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
- }
-
- /* Find the overflow area. */
- type = TREE_TYPE (ovf);
- t = make_tree (type, virtual_incoming_args_rtx);
- if (words != 0)
- t = build2 (PLUS_EXPR, type, t,
- build_int_cst (type, words * UNITS_PER_WORD));
- t = build2 (MODIFY_EXPR, type, ovf, t);
- TREE_SIDE_EFFECTS (t) = 1;
- expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
-
- if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
- {
- /* Find the register save area.
- Prologue of the function save it right above stack frame. */
- type = TREE_TYPE (sav);
- t = make_tree (type, frame_pointer_rtx);
- t = build2 (MODIFY_EXPR, type, sav, t);
- TREE_SIDE_EFFECTS (t) = 1;
- expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
- }
-}
-
-/* Implement va_arg. */
-
-tree
-ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
-{
- static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
- tree f_gpr, f_fpr, f_ovf, f_sav;
- tree gpr, fpr, ovf, sav, t;
- int size, rsize;
- tree lab_false, lab_over = NULL_TREE;
- tree addr, t2;
- rtx container;
- int indirect_p = 0;
- tree ptrtype;
- enum machine_mode nat_mode;
-
- /* Only 64bit target needs something special. */
- if (!TARGET_64BIT)
- return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
-
- f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
- f_fpr = TREE_CHAIN (f_gpr);
- f_ovf = TREE_CHAIN (f_fpr);
- f_sav = TREE_CHAIN (f_ovf);
-
- valist = build_va_arg_indirect_ref (valist);
- gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
- fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
- ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
- sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
-
- indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
- if (indirect_p)
- type = build_pointer_type (type);
- size = int_size_in_bytes (type);
- rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
-
- nat_mode = type_natural_mode (type);
- container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
- REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
-
- /* Pull the value out of the saved registers. */
-
- addr = create_tmp_var (ptr_type_node, "addr");
- DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
-
- if (container)
- {
- int needed_intregs, needed_sseregs;
- bool need_temp;
- tree int_addr, sse_addr;
-
- lab_false = create_artificial_label ();
- lab_over = create_artificial_label ();
-
- examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
-
- need_temp = (!REG_P (container)
- && ((needed_intregs && TYPE_ALIGN (type) > 64)
- || TYPE_ALIGN (type) > 128));
-
- /* In case we are passing structure, verify that it is consecutive block
- on the register save area. If not we need to do moves. */
- if (!need_temp && !REG_P (container))
- {
- /* Verify that all registers are strictly consecutive */
- if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
- {
- int i;
-
- for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
- {
- rtx slot = XVECEXP (container, 0, i);
- if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
- || INTVAL (XEXP (slot, 1)) != i * 16)
- need_temp = 1;
- }
- }
- else
- {
- int i;
-
- for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
- {
- rtx slot = XVECEXP (container, 0, i);
- if (REGNO (XEXP (slot, 0)) != (unsigned int) i
- || INTVAL (XEXP (slot, 1)) != i * 8)
- need_temp = 1;
- }
- }
- }
- if (!need_temp)
- {
- int_addr = addr;
- sse_addr = addr;
- }
- else
- {
- int_addr = create_tmp_var (ptr_type_node, "int_addr");
- DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
- sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
- DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
- }
-
- /* First ensure that we fit completely in registers. */
- if (needed_intregs)
- {
- t = build_int_cst (TREE_TYPE (gpr),
- (REGPARM_MAX - needed_intregs + 1) * 8);
- t = build2 (GE_EXPR, boolean_type_node, gpr, t);
- t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
- t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
- gimplify_and_add (t, pre_p);
- }
- if (needed_sseregs)
- {
- t = build_int_cst (TREE_TYPE (fpr),
- (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
- + REGPARM_MAX * 8);
- t = build2 (GE_EXPR, boolean_type_node, fpr, t);
- t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
- t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
- gimplify_and_add (t, pre_p);
- }
-
- /* Compute index to start of area used for integer regs. */
- if (needed_intregs)
- {
- /* int_addr = gpr + sav; */
- t = fold_convert (ptr_type_node, gpr);
- t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
- t = build2 (MODIFY_EXPR, void_type_node, int_addr, t);
- gimplify_and_add (t, pre_p);
- }
- if (needed_sseregs)
- {
- /* sse_addr = fpr + sav; */
- t = fold_convert (ptr_type_node, fpr);
- t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
- t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t);
- gimplify_and_add (t, pre_p);
- }
- if (need_temp)
- {
- int i;
- tree temp = create_tmp_var (type, "va_arg_tmp");
-
- /* addr = &temp; */
- t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
- t = build2 (MODIFY_EXPR, void_type_node, addr, t);
- gimplify_and_add (t, pre_p);
-
- for (i = 0; i < XVECLEN (container, 0); i++)
- {
- rtx slot = XVECEXP (container, 0, i);
- rtx reg = XEXP (slot, 0);
- enum machine_mode mode = GET_MODE (reg);
- tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
- tree addr_type = build_pointer_type (piece_type);
- tree src_addr, src;
- int src_offset;
- tree dest_addr, dest;
-
- if (SSE_REGNO_P (REGNO (reg)))
- {
- src_addr = sse_addr;
- src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
- }
- else
- {
- src_addr = int_addr;
- src_offset = REGNO (reg) * 8;
- }
- src_addr = fold_convert (addr_type, src_addr);
- src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
- size_int (src_offset)));
- src = build_va_arg_indirect_ref (src_addr);
-
- dest_addr = fold_convert (addr_type, addr);
- dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
- size_int (INTVAL (XEXP (slot, 1)))));
- dest = build_va_arg_indirect_ref (dest_addr);
-
- t = build2 (MODIFY_EXPR, void_type_node, dest, src);
- gimplify_and_add (t, pre_p);
- }
- }
-
- if (needed_intregs)
- {
- t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
- build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
- t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
- gimplify_and_add (t, pre_p);
- }
- if (needed_sseregs)
- {
- t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
- build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
- t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
- gimplify_and_add (t, pre_p);
- }
-
- t = build1 (GOTO_EXPR, void_type_node, lab_over);
- gimplify_and_add (t, pre_p);
-
- t = build1 (LABEL_EXPR, void_type_node, lab_false);
- append_to_statement_list (t, pre_p);
- }
-
- /* ... otherwise out of the overflow area. */
-
- /* Care for on-stack alignment if needed. */
- if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
- || integer_zerop (TYPE_SIZE (type)))
- t = ovf;
- else
- {
- HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
- t = build2 (PLUS_EXPR, TREE_TYPE (ovf), ovf,
- build_int_cst (TREE_TYPE (ovf), align - 1));
- t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
- build_int_cst (TREE_TYPE (t), -align));
- }
- gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
-
- t2 = build2 (MODIFY_EXPR, void_type_node, addr, t);
- gimplify_and_add (t2, pre_p);
-
- t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
- build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
- t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
- gimplify_and_add (t, pre_p);
-
- if (container)
- {
- t = build1 (LABEL_EXPR, void_type_node, lab_over);
- append_to_statement_list (t, pre_p);
- }
-
- ptrtype = build_pointer_type (type);
- addr = fold_convert (ptrtype, addr);
-
- if (indirect_p)
- addr = build_va_arg_indirect_ref (addr);
- return build_va_arg_indirect_ref (addr);
-}
-
-/* Return nonzero if OPNUM's MEM should be matched
- in movabs* patterns. */
-
-int
-ix86_check_movabs (rtx insn, int opnum)
-{
- rtx set, mem;
-
- set = PATTERN (insn);
- if (GET_CODE (set) == PARALLEL)
- set = XVECEXP (set, 0, 0);
- gcc_assert (GET_CODE (set) == SET);
- mem = XEXP (set, opnum);
- while (GET_CODE (mem) == SUBREG)
- mem = SUBREG_REG (mem);
- gcc_assert (GET_CODE (mem) == MEM);
- return (volatile_ok || !MEM_VOLATILE_P (mem));
-}
-
-/* Initialize the table of extra 80387 mathematical constants. */
-
-static void
-init_ext_80387_constants (void)
-{
- static const char * cst[5] =
- {
- "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
- "0.6931471805599453094286904741849753009", /* 1: fldln2 */
- "1.4426950408889634073876517827983434472", /* 2: fldl2e */
- "3.3219280948873623478083405569094566090", /* 3: fldl2t */
- "3.1415926535897932385128089594061862044", /* 4: fldpi */
- };
- int i;
-
- for (i = 0; i < 5; i++)
- {
- real_from_string (&ext_80387_constants_table[i], cst[i]);
- /* Ensure each constant is rounded to XFmode precision. */
- real_convert (&ext_80387_constants_table[i],
- XFmode, &ext_80387_constants_table[i]);
- }
-
- ext_80387_constants_init = 1;
-}
-
-/* Return true if the constant is something that can be loaded with
- a special instruction. */
-
-int
-standard_80387_constant_p (rtx x)
-{
- if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
- return -1;
-
- if (x == CONST0_RTX (GET_MODE (x)))
- return 1;
- if (x == CONST1_RTX (GET_MODE (x)))
- return 2;
-
- /* For XFmode constants, try to find a special 80387 instruction when
- optimizing for size or on those CPUs that benefit from them. */
- if (GET_MODE (x) == XFmode
- && (optimize_size || x86_ext_80387_constants & TUNEMASK))
- {
- REAL_VALUE_TYPE r;
- int i;
-
- if (! ext_80387_constants_init)
- init_ext_80387_constants ();
-
- REAL_VALUE_FROM_CONST_DOUBLE (r, x);
- for (i = 0; i < 5; i++)
- if (real_identical (&r, &ext_80387_constants_table[i]))
- return i + 3;
- }
-
- return 0;
-}
-
-/* Return the opcode of the special instruction to be used to load
- the constant X. */
-
-const char *
-standard_80387_constant_opcode (rtx x)
-{
- switch (standard_80387_constant_p (x))
- {
- case 1:
- return "fldz";
- case 2:
- return "fld1";
- case 3:
- return "fldlg2";
- case 4:
- return "fldln2";
- case 5:
- return "fldl2e";
- case 6:
- return "fldl2t";
- case 7:
- return "fldpi";
- default:
- gcc_unreachable ();
- }
-}
-
-/* Return the CONST_DOUBLE representing the 80387 constant that is
- loaded by the specified special instruction. The argument IDX
- matches the return value from standard_80387_constant_p. */
-
-rtx
-standard_80387_constant_rtx (int idx)
-{
- int i;
-
- if (! ext_80387_constants_init)
- init_ext_80387_constants ();
-
- switch (idx)
- {
- case 3:
- case 4:
- case 5:
- case 6:
- case 7:
- i = idx - 3;
- break;
-
- default:
- gcc_unreachable ();
- }
-
- return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
- XFmode);
-}
-
-/* Return 1 if mode is a valid mode for sse. */
-static int
-standard_sse_mode_p (enum machine_mode mode)
-{
- switch (mode)
- {
- case V16QImode:
- case V8HImode:
- case V4SImode:
- case V2DImode:
- case V4SFmode:
- case V2DFmode:
- return 1;
-
- default:
- return 0;
- }
-}
-
-/* Return 1 if X is FP constant we can load to SSE register w/o using memory.
- */
-int
-standard_sse_constant_p (rtx x)
-{
- enum machine_mode mode = GET_MODE (x);
-
- if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
- return 1;
- if (vector_all_ones_operand (x, mode)
- && standard_sse_mode_p (mode))
- return TARGET_SSE2 ? 2 : -1;
-
- return 0;
-}
-
-/* Return the opcode of the special instruction to be used to load
- the constant X. */
-
-const char *
-standard_sse_constant_opcode (rtx insn, rtx x)
-{
- switch (standard_sse_constant_p (x))
- {
- case 1:
- if (get_attr_mode (insn) == MODE_V4SF)
- return "xorps\t%0, %0";
- else if (get_attr_mode (insn) == MODE_V2DF)
- return "xorpd\t%0, %0";
- else
- return "pxor\t%0, %0";
- case 2:
- return "pcmpeqd\t%0, %0";
- }
- gcc_unreachable ();
-}
-
-/* Returns 1 if OP contains a symbol reference */
-
-int
-symbolic_reference_mentioned_p (rtx op)
-{
- const char *fmt;
- int i;
-
- if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
- return 1;
-
- fmt = GET_RTX_FORMAT (GET_CODE (op));
- for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
- {
- if (fmt[i] == 'E')
- {
- int j;
-
- for (j = XVECLEN (op, i) - 1; j >= 0; j--)
- if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
- return 1;
- }
-
- else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
- return 1;
- }
-
- return 0;
-}
-
-/* Return 1 if it is appropriate to emit `ret' instructions in the
- body of a function. Do this only if the epilogue is simple, needing a
- couple of insns. Prior to reloading, we can't tell how many registers
- must be saved, so return 0 then. Return 0 if there is no frame
- marker to de-allocate. */
-
-int
-ix86_can_use_return_insn_p (void)
-{
- struct ix86_frame frame;
-
- if (! reload_completed || frame_pointer_needed)
- return 0;
-
- /* Don't allow more than 32 pop, since that's all we can do
- with one instruction. */
- if (current_function_pops_args
- && current_function_args_size >= 32768)
- return 0;
-
- ix86_compute_frame_layout (&frame);
- return frame.to_allocate == 0 && frame.nregs == 0;
-}
-
-/* Value should be nonzero if functions must have frame pointers.
- Zero means the frame pointer need not be set up (and parms may
- be accessed via the stack pointer) in functions that seem suitable. */
-
-int
-ix86_frame_pointer_required (void)
-{
- /* If we accessed previous frames, then the generated code expects
- to be able to access the saved ebp value in our frame. */
- if (cfun->machine->accesses_prev_frame)
- return 1;
-
- /* Several x86 os'es need a frame pointer for other reasons,
- usually pertaining to setjmp. */
- if (SUBTARGET_FRAME_POINTER_REQUIRED)
- return 1;
-
- /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
- the frame pointer by default. Turn it back on now if we've not
- got a leaf function. */
- if (TARGET_OMIT_LEAF_FRAME_POINTER
- && (!current_function_is_leaf
- || ix86_current_function_calls_tls_descriptor))
- return 1;
-
- if (current_function_profile)
- return 1;
-
- return 0;
-}
-
-/* Record that the current function accesses previous call frames. */
-
-void
-ix86_setup_frame_addresses (void)
-{
- cfun->machine->accesses_prev_frame = 1;
-}
-
-#if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
-# define USE_HIDDEN_LINKONCE 1
-#else
-# define USE_HIDDEN_LINKONCE 0
-#endif
-
-/* APPLE LOCAL 5695218 */
-static GTY(()) int pic_labels_used;
-
-/* Fills in the label name that should be used for a pc thunk for
- the given register. */
-
-static void
-get_pc_thunk_name (char name[32], unsigned int regno)
-{
- gcc_assert (!TARGET_64BIT);
-
- /* APPLE LOCAL deep branch prediction pic-base. */
- if (USE_HIDDEN_LINKONCE || TARGET_MACHO)
- sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
- else
- ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
-}
-
-
-/* This function generates code for -fpic that loads %ebx with
- the return address of the caller and then returns. */
-
-void
-ix86_file_end (void)
-{
- rtx xops[2];
- int regno;
-
- for (regno = 0; regno < 8; ++regno)
- {
- char name[32];
-
- if (! ((pic_labels_used >> regno) & 1))
- continue;
-
- get_pc_thunk_name (name, regno);
-
-#if TARGET_MACHO
- if (TARGET_MACHO)
- {
- switch_to_section (darwin_sections[text_coal_section]);
- fputs ("\t.weak_definition\t", asm_out_file);
- assemble_name (asm_out_file, name);
- fputs ("\n\t.private_extern\t", asm_out_file);
- assemble_name (asm_out_file, name);
- fputs ("\n", asm_out_file);
- ASM_OUTPUT_LABEL (asm_out_file, name);
- }
- else
-#endif
- if (USE_HIDDEN_LINKONCE)
- {
- tree decl;
-
- decl = build_decl (FUNCTION_DECL, get_identifier (name),
- error_mark_node);
- TREE_PUBLIC (decl) = 1;
- TREE_STATIC (decl) = 1;
- DECL_ONE_ONLY (decl) = 1;
-
- (*targetm.asm_out.unique_section) (decl, 0);
- switch_to_section (get_named_section (decl, NULL, 0));
-
- (*targetm.asm_out.globalize_label) (asm_out_file, name);
- fputs ("\t.hidden\t", asm_out_file);
- assemble_name (asm_out_file, name);
- fputc ('\n', asm_out_file);
- ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
- }
- /* APPLE LOCAL begin deep branch prediction pic-base */
-#if TARGET_MACHO
- else if (TARGET_MACHO)
- {
- switch_to_section (darwin_sections[text_coal_section]);
- fputs (".weak_definition\t", asm_out_file);
- assemble_name (asm_out_file, name);
- fputs ("\n.private_extern\t", asm_out_file);
- assemble_name (asm_out_file, name);
- fputs ("\n", asm_out_file);
- ASM_OUTPUT_LABEL (asm_out_file, name);
- }
-#endif
- /* APPLE LOCAL end deep branch prediction pic-base */
- else
- {
- switch_to_section (text_section);
- ASM_OUTPUT_LABEL (asm_out_file, name);
- }
-
- xops[0] = gen_rtx_REG (SImode, regno);
- xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
- output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
- output_asm_insn ("ret", xops);
- }
-
- if (NEED_INDICATE_EXEC_STACK)
- file_end_indicate_exec_stack ();
-}
-
-/* Emit code for the SET_GOT patterns. */
-
-const char *
-output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
-{
- rtx xops[3];
-
- xops[0] = dest;
- xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
-
- if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
- {
- xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
-
- if (!flag_pic)
- output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
- else
- /* APPLE LOCAL begin dwarf call/pop 5221468 */
- {
- output_asm_insn ("call\t%a2", xops);
-
- /* If necessary, report the effect that the instruction has on
- the unwind info. */
-#if defined (DWARF2_UNWIND_INFO)
- if (flag_asynchronous_unwind_tables
-#if !defined (HAVE_prologue)
- && !ACCUMULATE_OUTGOING_ARGS
-#endif
- && dwarf2out_do_frame ())
- {
- rtx insn = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
- gen_rtx_PLUS (Pmode, stack_pointer_rtx,
- GEN_INT (-4)));
- insn = make_insn_raw (insn);
- RTX_FRAME_RELATED_P (insn) = 1;
- dwarf2out_frame_debug (insn, true);
- }
-#endif
- }
- /* APPLE LOCAL end dwarf call/pop 5221468 */
-
-#if TARGET_MACHO
- /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
- is what will be referenced by the Mach-O PIC subsystem. */
- if (!label)
- ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
-#endif
-
- (*targetm.asm_out.internal_label) (asm_out_file, "L",
- CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
-
- if (flag_pic)
- /* APPLE LOCAL begin dwarf call/pop 5221468 */
- {
- output_asm_insn ("pop{l}\t%0", xops);
-
- /* If necessary, report the effect that the instruction has on
- the unwind info. We've already done this for delay slots
- and call instructions. */
-#if defined (DWARF2_UNWIND_INFO)
- if (flag_asynchronous_unwind_tables
-#if !defined (HAVE_prologue)
- && !ACCUMULATE_OUTGOING_ARGS
-#endif
- && dwarf2out_do_frame ())
- {
- rtx insn = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
- gen_rtx_PLUS (Pmode, stack_pointer_rtx,
- GEN_INT (4)));
- insn = make_insn_raw (insn);
- RTX_FRAME_RELATED_P (insn) = 1;
- dwarf2out_frame_debug (insn, true);
- }
-#endif
- }
- /* APPLE LOCAL end dwarf call/pop 5221468 */
- }
- else
- {
- char name[32];
- get_pc_thunk_name (name, REGNO (dest));
- pic_labels_used |= 1 << REGNO (dest);
-
- xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
- xops[2] = gen_rtx_MEM (QImode, xops[2]);
- output_asm_insn ("call\t%X2", xops);
- /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
- is what will be referenced by the Mach-O PIC subsystem. */
-#if TARGET_MACHO
- if (!label)
- ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
- else
- targetm.asm_out.internal_label (asm_out_file, "L",
- CODE_LABEL_NUMBER (label));
-#endif
- }
-
- if (TARGET_MACHO)
- return "";
-
- if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
- output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
- else
- output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
-
- return "";
-}
-
-/* Generate an "push" pattern for input ARG. */
-
-static rtx
-gen_push (rtx arg)
-{
- return gen_rtx_SET (VOIDmode,
- gen_rtx_MEM (Pmode,
- gen_rtx_PRE_DEC (Pmode,
- stack_pointer_rtx)),
- arg);
-}
-
-/* Return >= 0 if there is an unused call-clobbered register available
- for the entire function. */
-
-static unsigned int
-ix86_select_alt_pic_regnum (void)
-{
- if (current_function_is_leaf && !current_function_profile
- && !ix86_current_function_calls_tls_descriptor)
- {
- int i;
- for (i = 2; i >= 0; --i)
- if (!regs_ever_live[i])
- return i;
- }
-
- return INVALID_REGNUM;
-}
-
-/* APPLE LOCAL begin 5695218 */
-/* Reload may introduce references to the PIC base register
- that do not directly reference pic_offset_table_rtx.
- In the rare event we choose an alternate PIC register,
- walk all the insns and rewrite every reference. */
-/* Run through the insns, changing references to the original
- PIC_OFFSET_TABLE_REGNUM to our new one. */
-static void
-ix86_globally_replace_pic_reg (unsigned int new_pic_regno)
-{
- rtx insn;
- const int nregs = PIC_OFFSET_TABLE_REGNUM + 1;
- rtx reg_map[FIRST_PSEUDO_REGISTER];
- memset (reg_map, 0, nregs * sizeof (rtx));
- pic_offset_table_rtx = gen_rtx_REG (SImode, new_pic_regno);
- reg_map[REAL_PIC_OFFSET_TABLE_REGNUM] = pic_offset_table_rtx;
-
- push_topmost_sequence ();
- for (insn = get_insns (); insn != NULL; insn = NEXT_INSN (insn))
- {
- if (GET_CODE (insn) == INSN || GET_CODE (insn) == JUMP_INSN)
- {
- replace_regs (PATTERN (insn), reg_map, nregs, 1);
- replace_regs (REG_NOTES (insn), reg_map, nregs, 1);
- }
-#if defined (TARGET_TOC)
- else if (GET_CODE (insn) == CALL_INSN)
- {
- if ( !SIBLING_CALL_P (insn))
- abort ();
- }
-#endif
- }
- pop_topmost_sequence ();
-
- regs_ever_live[new_pic_regno] = 1;
- regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 0;
-#if defined (TARGET_TOC)
- cfun->machine->substitute_pic_base_reg = new_pic_regno;
-#endif
-}
-/* APPLE LOCAL end 5695218 */
-
-/* Return 1 if we need to save REGNO. */
-static int
-ix86_save_reg (unsigned int regno, int maybe_eh_return)
-{
- /* APPLE LOCAL begin CW asm blocks */
- /* For an asm function, we don't save any registers, instead, the
- user is responsible. */
- if (cfun->iasm_asm_function)
- return 0;
- /* APPLE LOCAL end CW asm blocks */
-
- if (pic_offset_table_rtx
- && regno == REAL_PIC_OFFSET_TABLE_REGNUM
- /* APPLE LOCAL begin 5695218 */
- && (current_function_uses_pic_offset_table
- || current_function_profile
- || current_function_calls_eh_return
- || current_function_uses_const_pool))
- /* APPLE LOCAL end 5695218 */
- {
- if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
- return 0;
- return 1;
- }
-
- if (current_function_calls_eh_return && maybe_eh_return)
- {
- unsigned i;
- for (i = 0; ; i++)
- {
- unsigned test = EH_RETURN_DATA_REGNO (i);
- if (test == INVALID_REGNUM)
- break;
- if (test == regno)
- return 1;
- }
- }
-
- if (cfun->machine->force_align_arg_pointer
- && regno == REGNO (cfun->machine->force_align_arg_pointer))
- return 1;
-
- /* APPLE LOCAL begin 5695218 */
- /* In order to get accurate usage info for the PIC register, we've
- been forced to break and un-break the call_used_regs and
- fixed_regs vectors. Ignore them when considering the PIC
- register. */
- if (regno == REAL_PIC_OFFSET_TABLE_REGNUM
- && regs_ever_live[regno])
- return 1;
- /* APPLE LOCAL end 5695218 */
-
- return (regs_ever_live[regno]
- && !call_used_regs[regno]
- && !fixed_regs[regno]
- && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
-}
-
-/* Return number of registers to be saved on the stack. */
-
-static int
-ix86_nsaved_regs (void)
-{
- int nregs = 0;
- int regno;
-
- for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
- if (ix86_save_reg (regno, true))
- nregs++;
- return nregs;
-}
-
-/* Return the offset between two registers, one to be eliminated, and the other
- its replacement, at the start of a routine. */
-
-HOST_WIDE_INT
-ix86_initial_elimination_offset (int from, int to)
-{
- struct ix86_frame frame;
- ix86_compute_frame_layout (&frame);
-
- if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
- return frame.hard_frame_pointer_offset;
- else if (from == FRAME_POINTER_REGNUM
- && to == HARD_FRAME_POINTER_REGNUM)
- return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
- else
- {
- gcc_assert (to == STACK_POINTER_REGNUM);
-
- if (from == ARG_POINTER_REGNUM)
- return frame.stack_pointer_offset;
-
- gcc_assert (from == FRAME_POINTER_REGNUM);
- return frame.stack_pointer_offset - frame.frame_pointer_offset;
- }
-}
-
-/* Fill structure ix86_frame about frame of currently computed function. */
-
-static void
-ix86_compute_frame_layout (struct ix86_frame *frame)
-{
- HOST_WIDE_INT total_size;
- unsigned int stack_alignment_needed;
- HOST_WIDE_INT offset;
- unsigned int preferred_alignment;
- HOST_WIDE_INT size = get_frame_size ();
-
- frame->nregs = ix86_nsaved_regs ();
- total_size = size;
-
- stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
- preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
-
- /* During reload iteration the amount of registers saved can change.
- Recompute the value as needed. Do not recompute when amount of registers
- didn't change as reload does multiple calls to the function and does not
- expect the decision to change within single iteration. */
- if (!optimize_size
- && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
- {
- int count = frame->nregs;
-
- cfun->machine->use_fast_prologue_epilogue_nregs = count;
- /* The fast prologue uses move instead of push to save registers. This
- is significantly longer, but also executes faster as modern hardware
- can execute the moves in parallel, but can't do that for push/pop.
-
- Be careful about choosing what prologue to emit: When function takes
- many instructions to execute we may use slow version as well as in
- case function is known to be outside hot spot (this is known with
- feedback only). Weight the size of function by number of registers
- to save as it is cheap to use one or two push instructions but very
- slow to use many of them. */
- if (count)
- count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
- if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
- || (flag_branch_probabilities
- && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
- cfun->machine->use_fast_prologue_epilogue = false;
- else
- cfun->machine->use_fast_prologue_epilogue
- = !expensive_function_p (count);
- }
- if (TARGET_PROLOGUE_USING_MOVE
- && cfun->machine->use_fast_prologue_epilogue)
- frame->save_regs_using_mov = true;
- else
- frame->save_regs_using_mov = false;
-
-
- /* Skip return address and saved base pointer. */
- offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
-
- frame->hard_frame_pointer_offset = offset;
-
- /* Do some sanity checking of stack_alignment_needed and
- preferred_alignment, since i386 port is the only using those features
- that may break easily. */
-
- gcc_assert (!size || stack_alignment_needed);
- gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
- gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
- gcc_assert (stack_alignment_needed
- <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
-
- if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
- stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
-
- /* Register save area */
- offset += frame->nregs * UNITS_PER_WORD;
-
- /* Va-arg area */
- if (ix86_save_varrargs_registers)
- {
- offset += X86_64_VARARGS_SIZE;
- frame->va_arg_size = X86_64_VARARGS_SIZE;
- }
- else
- frame->va_arg_size = 0;
-
- /* Align start of frame for local function. */
- frame->padding1 = ((offset + stack_alignment_needed - 1)
- & -stack_alignment_needed) - offset;
-
- offset += frame->padding1;
-
- /* Frame pointer points here. */
- frame->frame_pointer_offset = offset;
-
- offset += size;
-
- /* Add outgoing arguments area. Can be skipped if we eliminated
- all the function calls as dead code.
- Skipping is however impossible when function calls alloca. Alloca
- expander assumes that last current_function_outgoing_args_size
- of stack frame are unused. */
- if (ACCUMULATE_OUTGOING_ARGS
- && (!current_function_is_leaf || current_function_calls_alloca
- || ix86_current_function_calls_tls_descriptor))
- {
- offset += current_function_outgoing_args_size;
- frame->outgoing_arguments_size = current_function_outgoing_args_size;
- }
- else
- frame->outgoing_arguments_size = 0;
-
- /* Align stack boundary. Only needed if we're calling another function
- or using alloca. */
- if (!current_function_is_leaf || current_function_calls_alloca
- || ix86_current_function_calls_tls_descriptor)
- frame->padding2 = ((offset + preferred_alignment - 1)
- & -preferred_alignment) - offset;
- else
- frame->padding2 = 0;
-
- offset += frame->padding2;
-
- /* We've reached end of stack frame. */
- frame->stack_pointer_offset = offset;
-
- /* Size prologue needs to allocate. */
- frame->to_allocate =
- (size + frame->padding1 + frame->padding2
- + frame->outgoing_arguments_size + frame->va_arg_size);
-
- if ((!frame->to_allocate && frame->nregs <= 1)
- || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
- frame->save_regs_using_mov = false;
-
- if (TARGET_RED_ZONE && current_function_sp_is_unchanging
- && current_function_is_leaf
- && !ix86_current_function_calls_tls_descriptor)
- {
- frame->red_zone_size = frame->to_allocate;
- if (frame->save_regs_using_mov)
- frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
- if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
- frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
- }
- else
- frame->red_zone_size = 0;
- frame->to_allocate -= frame->red_zone_size;
- frame->stack_pointer_offset -= frame->red_zone_size;
-#if 0
- fprintf (stderr, "nregs: %i\n", frame->nregs);
- fprintf (stderr, "size: %i\n", size);
- fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
- fprintf (stderr, "padding1: %i\n", frame->padding1);
- fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
- fprintf (stderr, "padding2: %i\n", frame->padding2);
- fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
- fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
- fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
- fprintf (stderr, "hard_frame_pointer_offset: %i\n",
- frame->hard_frame_pointer_offset);
- fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
-#endif
-}
-
-/* Emit code to save registers in the prologue. */
-
-static void
-ix86_emit_save_regs (void)
-{
- unsigned int regno;
- rtx insn;
-
- for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
- if (ix86_save_reg (regno, true))
- {
- insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
- RTX_FRAME_RELATED_P (insn) = 1;
- }
-}
-
-/* Emit code to save registers using MOV insns. First register
- is restored from POINTER + OFFSET. */
-static void
-ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
-{
- unsigned int regno;
- rtx insn;
-
- for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
- if (ix86_save_reg (regno, true))
- {
- insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
- Pmode, offset),
- gen_rtx_REG (Pmode, regno));
- RTX_FRAME_RELATED_P (insn) = 1;
- offset += UNITS_PER_WORD;
- }
-}
-
-/* Expand prologue or epilogue stack adjustment.
- The pattern exist to put a dependency on all ebp-based memory accesses.
- STYLE should be negative if instructions should be marked as frame related,
- zero if %r11 register is live and cannot be freely used and positive
- otherwise. */
-
-static void
-pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
-{
- rtx insn;
-
- if (! TARGET_64BIT)
- insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
- else if (x86_64_immediate_operand (offset, DImode))
- insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
- else
- {
- rtx r11;
- /* r11 is used by indirect sibcall return as well, set before the
- epilogue and used after the epilogue. ATM indirect sibcall
- shouldn't be used together with huge frame sizes in one
- function because of the frame_size check in sibcall.c. */
- gcc_assert (style);
- r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
- insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
- /* APPLE LOCAL async unwind info 5949469 */
- if (style < 0 /* || flag_asynchronous_unwind_tables*/)
- RTX_FRAME_RELATED_P (insn) = 1;
- insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
- offset));
- }
- if (style < 0)
- RTX_FRAME_RELATED_P (insn) = 1;
- /* APPLE LOCAL begin async unwind info 5949350 5949469 */
-#if 0
- else if (flag_asynchronous_unwind_tables
- && (src == hard_frame_pointer_rtx
- || src == stack_pointer_rtx))
- RTX_FRAME_RELATED_P (insn) = 1;
-#endif
- /* APPLE LOCAL end async unwind info 5949350 5949469 */
-}
-
-/* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
-
-static rtx
-ix86_internal_arg_pointer (void)
-{
- bool has_force_align_arg_pointer =
- (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
- TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
- if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
- && DECL_NAME (current_function_decl)
- && MAIN_NAME_P (DECL_NAME (current_function_decl))
- && DECL_FILE_SCOPE_P (current_function_decl))
- || ix86_force_align_arg_pointer
- || has_force_align_arg_pointer)
- {
- /* Nested functions can't realign the stack due to a register
- conflict. */
- if (DECL_CONTEXT (current_function_decl)
- && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
- {
- if (ix86_force_align_arg_pointer)
- warning (0, "-mstackrealign ignored for nested functions");
- if (has_force_align_arg_pointer)
- error ("%s not supported for nested functions",
- ix86_force_align_arg_pointer_string);
- return virtual_incoming_args_rtx;
- }
- cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2);
- return copy_to_reg (cfun->machine->force_align_arg_pointer);
- }
- else
- return virtual_incoming_args_rtx;
-}
-
-/* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
- This is called from dwarf2out.c to emit call frame instructions
- for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
-static void
-ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
-{
- rtx unspec = SET_SRC (pattern);
- gcc_assert (GET_CODE (unspec) == UNSPEC);
-
- switch (index)
- {
- case UNSPEC_REG_SAVE:
- dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
- SET_DEST (pattern));
- break;
- case UNSPEC_DEF_CFA:
- dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
- INTVAL (XVECEXP (unspec, 0, 0)));
- break;
- default:
- gcc_unreachable ();
- }
-}
-
-/* APPLE LOCAL begin 3399553 */
-/* Calculate the value of FLT_ROUNDS into DEST.
-
- The rounding mode is in bits 11:10 of FPSR, and has the following
- settings:
- 00 Round to nearest
- 01 Round to -inf
- 10 Round to +inf
- 11 Round to 0
-
- FLT_ROUNDS, on the other hand, expects the following:
- -1 Undefined
- 0 Round to 0
- 1 Round to nearest
- 2 Round to +inf
- 3 Round to -inf
-
- To perform the conversion, we do:
- (((((FPSR & 0x800) >> 11) | ((FPSR & 0x400) >> 9)) + 1) & 3)
-*/
-void
-ix86_expand_flt_rounds (rtx dest)
-{
- rtx mem = assign_stack_temp (HImode, GET_MODE_SIZE (HImode), 0);
- rtx temp = gen_reg_rtx (SImode);
-
- /* Step #1: Read FPSR. Unfortunately, this can only be done into a
- 16-bit memory location. */
- emit_insn (gen_x86_fnstcw_1 (mem));
-
- /* Step #2: Copy into a register. */
- emit_insn (gen_zero_extendhisi2 (dest, mem));
-
- /* Step #3: Perform conversion described above. */
- emit_insn (gen_andsi3 (temp, dest, GEN_INT (0x400)));
- emit_insn (gen_andsi3 (dest, dest, GEN_INT (0x800)));
- emit_insn (gen_lshrsi3 (temp, temp, GEN_INT (9)));
- emit_insn (gen_lshrsi3 (dest, dest, GEN_INT (11)));
- emit_insn (gen_iorsi3 (dest, dest, temp));
- emit_insn (gen_addsi3 (dest, dest, const1_rtx));
- emit_insn (gen_andsi3 (dest, dest, GEN_INT (3)));
-}
-/* APPLE LOCAL end 3399553 */
-
-/* APPLE LOCAL begin fix-and-continue x86 */
-#ifndef TARGET_FIX_AND_CONTINUE
-#define TARGET_FIX_AND_CONTINUE 0
-#endif
-/* APPLE LOCAL end fix-and-continue x86 */
-
-/* Expand the prologue into a bunch of separate insns. */
-
-void
-ix86_expand_prologue (void)
-{
- rtx insn;
- bool pic_reg_used;
- struct ix86_frame frame;
- HOST_WIDE_INT allocate;
-
- /* APPLE LOCAL begin fix-and-continue x86 */
- if (TARGET_FIX_AND_CONTINUE)
- {
- /* gdb on darwin arranges to forward a function from the old
- address by modifying the first 6 instructions of the function
- to branch to the overriding function. This is necessary to
- permit function pointers that point to the old function to
- actually forward to the new function. */
- emit_insn (gen_nop ());
- emit_insn (gen_nop ());
- emit_insn (gen_nop ());
- emit_insn (gen_nop ());
- emit_insn (gen_nop ());
- emit_insn (gen_nop ());
- }
- /* APPLE LOCAL end fix-and-continue x86 */
-
- ix86_compute_frame_layout (&frame);
-
- if (cfun->machine->force_align_arg_pointer)
- {
- rtx x, y;
-
- /* Grab the argument pointer. */
- x = plus_constant (stack_pointer_rtx, 4);
- y = cfun->machine->force_align_arg_pointer;
- insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
- RTX_FRAME_RELATED_P (insn) = 1;
-
- /* The unwind info consists of two parts: install the fafp as the cfa,
- and record the fafp as the "save register" of the stack pointer.
- The later is there in order that the unwinder can see where it
- should restore the stack pointer across the and insn. */
- x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
- x = gen_rtx_SET (VOIDmode, y, x);
- RTX_FRAME_RELATED_P (x) = 1;
- y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
- UNSPEC_REG_SAVE);
- y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
- RTX_FRAME_RELATED_P (y) = 1;
- x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
- x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
- REG_NOTES (insn) = x;
-
- /* Align the stack. */
- emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
- GEN_INT (-16)));
-
- /* And here we cheat like madmen with the unwind info. We force the
- cfa register back to sp+4, which is exactly what it was at the
- start of the function. Re-pushing the return address results in
- the return at the same spot relative to the cfa, and thus is
- correct wrt the unwind info. */
- x = cfun->machine->force_align_arg_pointer;
- x = gen_frame_mem (Pmode, plus_constant (x, -4));
- insn = emit_insn (gen_push (x));
- RTX_FRAME_RELATED_P (insn) = 1;
-
- x = GEN_INT (4);
- x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
- x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
- x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
- REG_NOTES (insn) = x;
- }
-
- /* Note: AT&T enter does NOT have reversed args. Enter is probably
- slower on all targets. Also sdb doesn't like it. */
-
- if (frame_pointer_needed)
- {
- insn = emit_insn (gen_push (hard_frame_pointer_rtx));
- RTX_FRAME_RELATED_P (insn) = 1;
-
- insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
- RTX_FRAME_RELATED_P (insn) = 1;
- }
-
- allocate = frame.to_allocate;
-
- if (!frame.save_regs_using_mov)
- ix86_emit_save_regs ();
- else
- allocate += frame.nregs * UNITS_PER_WORD;
-
- /* When using red zone we may start register saving before allocating
- the stack frame saving one cycle of the prologue. */
- if (TARGET_RED_ZONE && frame.save_regs_using_mov)
- ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
- : stack_pointer_rtx,
- -frame.nregs * UNITS_PER_WORD);
-
- if (allocate == 0)
- ;
- else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
- /* APPLE LOCAL begin CW asm blocks */
- {
- if (! cfun->iasm_asm_function)
- pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
- GEN_INT (-allocate), -1);
- }
- /* APPLE LOCAL end CW asm blocks */
- else
- {
- /* Only valid for Win32. */
- rtx eax = gen_rtx_REG (SImode, 0);
- bool eax_live = ix86_eax_live_at_start_p ();
- rtx t;
-
- gcc_assert (!TARGET_64BIT);
-
- if (eax_live)
- {
- emit_insn (gen_push (eax));
- allocate -= 4;
- }
-
- emit_move_insn (eax, GEN_INT (allocate));
-
- insn = emit_insn (gen_allocate_stack_worker (eax));
- RTX_FRAME_RELATED_P (insn) = 1;
- t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
- t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
- REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
- t, REG_NOTES (insn));
-
- if (eax_live)
- {
- if (frame_pointer_needed)
- t = plus_constant (hard_frame_pointer_rtx,
- allocate
- - frame.to_allocate
- - frame.nregs * UNITS_PER_WORD);
- else
- t = plus_constant (stack_pointer_rtx, allocate);
- emit_move_insn (eax, gen_rtx_MEM (SImode, t));
- }
- }
-
- if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
- {
- if (!frame_pointer_needed || !frame.to_allocate)
- ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
- else
- ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
- -frame.nregs * UNITS_PER_WORD);
- }
-
- pic_reg_used = false;
- /* APPLE LOCAL begin 5695218 */
- if (pic_offset_table_rtx && regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
- && !TARGET_64BIT)
- {
- unsigned int alt_pic_reg_used;
-
- alt_pic_reg_used = ix86_select_alt_pic_regnum ();
- /* APPLE LOCAL end 5695218 */
-
- if (alt_pic_reg_used != INVALID_REGNUM)
- /* APPLE LOCAL begin 5695218 */
- /* REGNO (pic_offset_table_rtx) = alt_pic_reg_used; */
- ix86_globally_replace_pic_reg (alt_pic_reg_used);
- /* APPLE LOCAL end 5695218 */
-
- pic_reg_used = true;
- }
-
- if (pic_reg_used)
- {
- if (TARGET_64BIT)
- insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
- else
- insn = emit_insn (gen_set_got (pic_offset_table_rtx));
-
- /* Even with accurate pre-reload life analysis, we can wind up
- deleting all references to the pic register after reload.
- Consider if cross-jumping unifies two sides of a branch
- controlled by a comparison vs the only read from a global.
- In which case, allow the set_got to be deleted, though we're
- too late to do anything about the ebx save in the prologue. */
- REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
- }
-
- /* Prevent function calls from be scheduled before the call to mcount.
- In the pic_reg_used case, make sure that the got load isn't deleted. */
- if (current_function_profile)
- emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
-}
-
-/* Emit code to restore saved registers using MOV insns. First register
- is restored from POINTER + OFFSET. */
-static void
-ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
- int maybe_eh_return)
-{
- int regno;
- rtx base_address = gen_rtx_MEM (Pmode, pointer);
-
- for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
- if (ix86_save_reg (regno, maybe_eh_return))
- {
- /* Ensure that adjust_address won't be forced to produce pointer
- out of range allowed by x86-64 instruction set. */
- if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
- {
- rtx r11;
-
- r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
- emit_move_insn (r11, GEN_INT (offset));
- emit_insn (gen_adddi3 (r11, r11, pointer));
- base_address = gen_rtx_MEM (Pmode, r11);
- offset = 0;
- }
- emit_move_insn (gen_rtx_REG (Pmode, regno),
- adjust_address (base_address, Pmode, offset));
- offset += UNITS_PER_WORD;
- }
-}
-
-/* Restore function stack, frame, and registers. */
-
-void
-ix86_expand_epilogue (int style)
-{
- int regno;
- int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
- struct ix86_frame frame;
- HOST_WIDE_INT offset;
-
- ix86_compute_frame_layout (&frame);
-
- /* Calculate start of saved registers relative to ebp. Special care
- must be taken for the normal return case of a function using
- eh_return: the eax and edx registers are marked as saved, but not
- restored along this path. */
- offset = frame.nregs;
- if (current_function_calls_eh_return && style != 2)
- offset -= 2;
- offset *= -UNITS_PER_WORD;
-
- /* APPLE LOCAL begin CW asm blocks */
- /* For an asm function, don't generate an epilogue. */
- if (cfun->iasm_asm_function)
- {
- emit_jump_insn (gen_return_internal ());
- return;
- }
- /* APPLE LOCAL end CW asm blocks */
-
- /* If we're only restoring one register and sp is not valid then
- using a move instruction to restore the register since it's
- less work than reloading sp and popping the register.
-
- The default code result in stack adjustment using add/lea instruction,
- while this code results in LEAVE instruction (or discrete equivalent),
- so it is profitable in some other cases as well. Especially when there
- are no registers to restore. We also use this code when TARGET_USE_LEAVE
- and there is exactly one register to pop. This heuristic may need some
- tuning in future. */
- if ((!sp_valid && frame.nregs <= 1)
- || (TARGET_EPILOGUE_USING_MOVE
- && cfun->machine->use_fast_prologue_epilogue
- && (frame.nregs > 1 || frame.to_allocate))
- || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
- || (frame_pointer_needed && TARGET_USE_LEAVE
- && cfun->machine->use_fast_prologue_epilogue
- && frame.nregs == 1)
- || current_function_calls_eh_return)
- {
- /* Restore registers. We can use ebp or esp to address the memory
- locations. If both are available, default to ebp, since offsets
- are known to be small. Only exception is esp pointing directly to the
- end of block of saved registers, where we may simplify addressing
- mode. */
-
- if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
- ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
- frame.to_allocate, style == 2);
- else
- ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
- offset, style == 2);
-
- /* eh_return epilogues need %ecx added to the stack pointer. */
- if (style == 2)
- {
- rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
-
- if (frame_pointer_needed)
- {
- tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
- tmp = plus_constant (tmp, UNITS_PER_WORD);
- emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
-
- tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
- emit_move_insn (hard_frame_pointer_rtx, tmp);
-
- pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
- const0_rtx, style);
- }
- else
- {
- tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
- tmp = plus_constant (tmp, (frame.to_allocate
- + frame.nregs * UNITS_PER_WORD));
- emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
- }
- }
- else if (!frame_pointer_needed)
- pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
- GEN_INT (frame.to_allocate
- + frame.nregs * UNITS_PER_WORD),
- style);
- /* If not an i386, mov & pop is faster than "leave". */
- else if (TARGET_USE_LEAVE || optimize_size
- || !cfun->machine->use_fast_prologue_epilogue)
- emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
- else
- {
- pro_epilogue_adjust_stack (stack_pointer_rtx,
- hard_frame_pointer_rtx,
- const0_rtx, style);
- if (TARGET_64BIT)
- emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
- else
- emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
- }
- }
- else
- {
- /* First step is to deallocate the stack frame so that we can
- pop the registers. */
- if (!sp_valid)
- {
- gcc_assert (frame_pointer_needed);
- pro_epilogue_adjust_stack (stack_pointer_rtx,
- hard_frame_pointer_rtx,
- GEN_INT (offset), style);
- }
- else if (frame.to_allocate)
- pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
- GEN_INT (frame.to_allocate), style);
-
- for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
- if (ix86_save_reg (regno, false))
- {
- if (TARGET_64BIT)
- emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
- else
- emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
- }
- if (frame_pointer_needed)
- {
- /* Leave results in shorter dependency chains on CPUs that are
- able to grok it fast. */
- if (TARGET_USE_LEAVE)
- emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
- else if (TARGET_64BIT)
- emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
- else
- emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
- }
- }
-
- if (cfun->machine->force_align_arg_pointer)
- {
- emit_insn (gen_addsi3 (stack_pointer_rtx,
- cfun->machine->force_align_arg_pointer,
- GEN_INT (-4)));
- }
-
- /* Sibcall epilogues don't want a return instruction. */
- if (style == 0)
- return;
-
- if (current_function_pops_args && current_function_args_size)
- {
- rtx popc = GEN_INT (current_function_pops_args);
-
- /* i386 can only pop 64K bytes. If asked to pop more, pop
- return address, do explicit add, and jump indirectly to the
- caller. */
-
- if (current_function_pops_args >= 65536)
- {
- rtx ecx = gen_rtx_REG (SImode, 2);
-
- /* There is no "pascal" calling convention in 64bit ABI. */
- gcc_assert (!TARGET_64BIT);
-
- emit_insn (gen_popsi1 (ecx));
- emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
- emit_jump_insn (gen_return_indirect_internal (ecx));
- }
- else
- emit_jump_insn (gen_return_pop_internal (popc));
- }
- else
- emit_jump_insn (gen_return_internal ());
-}
-
-/* Reset from the function's potential modifications. */
-
-static void
-ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
- HOST_WIDE_INT size ATTRIBUTE_UNUSED)
-{
- if (pic_offset_table_rtx)
- REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
-#if TARGET_MACHO
- /* Mach-O doesn't support labels at the end of objects, so if
- it looks like we might want one, insert a NOP. */
- {
- rtx insn = get_last_insn ();
- while (insn
- && NOTE_P (insn)
- && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED_LABEL)
- insn = PREV_INSN (insn);
- if (insn
- && (LABEL_P (insn)
- || (NOTE_P (insn)
- && NOTE_LINE_NUMBER (insn) == NOTE_INSN_DELETED_LABEL)))
- fputs ("\tnop\n", file);
- }
-#endif
-
-}
-
-/* Extract the parts of an RTL expression that is a valid memory address
- for an instruction. Return 0 if the structure of the address is
- grossly off. Return -1 if the address contains ASHIFT, so it is not
- strictly valid, but still used for computing length of lea instruction. */
-
-int
-ix86_decompose_address (rtx addr, struct ix86_address *out)
-{
- rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
- rtx base_reg, index_reg;
- HOST_WIDE_INT scale = 1;
- rtx scale_rtx = NULL_RTX;
- int retval = 1;
- enum ix86_address_seg seg = SEG_DEFAULT;
-
- if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
- base = addr;
- else if (GET_CODE (addr) == PLUS)
- {
- rtx addends[4], op;
- int n = 0, i;
-
- op = addr;
- do
- {
- if (n >= 4)
- return 0;
- addends[n++] = XEXP (op, 1);
- op = XEXP (op, 0);
- }
- while (GET_CODE (op) == PLUS);
- if (n >= 4)
- return 0;
- addends[n] = op;
-
- for (i = n; i >= 0; --i)
- {
- op = addends[i];
- switch (GET_CODE (op))
- {
- case MULT:
- if (index)
- return 0;
- index = XEXP (op, 0);
- scale_rtx = XEXP (op, 1);
- break;
-
- case UNSPEC:
- if (XINT (op, 1) == UNSPEC_TP
- && TARGET_TLS_DIRECT_SEG_REFS
- && seg == SEG_DEFAULT)
- seg = TARGET_64BIT ? SEG_FS : SEG_GS;
- else
- return 0;
- break;
-
- case REG:
- case SUBREG:
- if (!base)
- base = op;
- else if (!index)
- index = op;
- else
- return 0;
- break;
-
- case CONST:
- case CONST_INT:
- case SYMBOL_REF:
- case LABEL_REF:
- if (disp)
- return 0;
- disp = op;
- break;
-
- default:
- return 0;
- }
- }
- }
- else if (GET_CODE (addr) == MULT)
- {
- index = XEXP (addr, 0); /* index*scale */
- scale_rtx = XEXP (addr, 1);
- }
- else if (GET_CODE (addr) == ASHIFT)
- {
- rtx tmp;
-
- /* We're called for lea too, which implements ashift on occasion. */
- index = XEXP (addr, 0);
- tmp = XEXP (addr, 1);
- if (GET_CODE (tmp) != CONST_INT)
- return 0;
- scale = INTVAL (tmp);
- if ((unsigned HOST_WIDE_INT) scale > 3)
- return 0;
- scale = 1 << scale;
- retval = -1;
- }
- else
- disp = addr; /* displacement */
-
- /* Extract the integral value of scale. */
- if (scale_rtx)
- {
- if (GET_CODE (scale_rtx) != CONST_INT)
- return 0;
- scale = INTVAL (scale_rtx);
- }
-
- base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
- index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
-
- /* Allow arg pointer and stack pointer as index if there is not scaling. */
- if (base_reg && index_reg && scale == 1
- && (index_reg == arg_pointer_rtx
- || index_reg == frame_pointer_rtx
- || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
- {
- rtx tmp;
- tmp = base, base = index, index = tmp;
- tmp = base_reg, base_reg = index_reg, index_reg = tmp;
- }
-
- /* Special case: %ebp cannot be encoded as a base without a displacement. */
- if ((base_reg == hard_frame_pointer_rtx
- || base_reg == frame_pointer_rtx
- || base_reg == arg_pointer_rtx) && !disp)
- disp = const0_rtx;
-
- /* Special case: on K6, [%esi] makes the instruction vector decoded.
- Avoid this by transforming to [%esi+0]. */
- if (ix86_tune == PROCESSOR_K6 && !optimize_size
- && base_reg && !index_reg && !disp
- && REG_P (base_reg)
- && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
- disp = const0_rtx;
-
- /* Special case: encode reg+reg instead of reg*2. */
- if (!base && index && scale && scale == 2)
- base = index, base_reg = index_reg, scale = 1;
-
- /* Special case: scaling cannot be encoded without base or displacement. */
- if (!base && !disp && index && scale != 1)
- disp = const0_rtx;
-
- out->base = base;
- out->index = index;
- out->disp = disp;
- out->scale = scale;
- out->seg = seg;
-
- return retval;
-}
-
-/* Return cost of the memory address x.
- For i386, it is better to use a complex address than let gcc copy
- the address into a reg and make a new pseudo. But not if the address
- requires to two regs - that would mean more pseudos with longer
- lifetimes. */
-static int
-ix86_address_cost (rtx x)
-{
- struct ix86_address parts;
- int cost = 1;
- int ok = ix86_decompose_address (x, &parts);
-
- gcc_assert (ok);
-
- if (parts.base && GET_CODE (parts.base) == SUBREG)
- parts.base = SUBREG_REG (parts.base);
- if (parts.index && GET_CODE (parts.index) == SUBREG)
- parts.index = SUBREG_REG (parts.index);
-
- /* More complex memory references are better. */
- if (parts.disp && parts.disp != const0_rtx)
- cost--;
- if (parts.seg != SEG_DEFAULT)
- cost--;
-
- /* Attempt to minimize number of registers in the address. */
- if ((parts.base
- && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
- || (parts.index
- && (!REG_P (parts.index)
- || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
- cost++;
-
- if (parts.base
- && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
- && parts.index
- && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
- && parts.base != parts.index)
- cost++;
-
- /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
- since it's predecode logic can't detect the length of instructions
- and it degenerates to vector decoded. Increase cost of such
- addresses here. The penalty is minimally 2 cycles. It may be worthwhile
- to split such addresses or even refuse such addresses at all.
-
- Following addressing modes are affected:
- [base+scale*index]
- [scale*index+disp]
- [base+index]
-
- The first and last case may be avoidable by explicitly coding the zero in
- memory address, but I don't have AMD-K6 machine handy to check this
- theory. */
-
- if (TARGET_K6
- && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
- || (parts.disp && !parts.base && parts.index && parts.scale != 1)
- || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
- cost += 10;
-
- return cost;
-}
-
-/* If X is a machine specific address (i.e. a symbol or label being
- referenced as a displacement from the GOT implemented using an
- UNSPEC), then return the base term. Otherwise return X. */
-
-rtx
-ix86_find_base_term (rtx x)
-{
- rtx term;
-
- if (TARGET_64BIT)
- {
- if (GET_CODE (x) != CONST)
- return x;
- term = XEXP (x, 0);
- if (GET_CODE (term) == PLUS
- && (GET_CODE (XEXP (term, 1)) == CONST_INT
- || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
- term = XEXP (term, 0);
- if (GET_CODE (term) != UNSPEC
- || XINT (term, 1) != UNSPEC_GOTPCREL)
- return x;
-
- term = XVECEXP (term, 0, 0);
-
- if (GET_CODE (term) != SYMBOL_REF
- && GET_CODE (term) != LABEL_REF)
- return x;
-
- return term;
- }
-
- term = ix86_delegitimize_address (x);
-
- if (GET_CODE (term) != SYMBOL_REF
- && GET_CODE (term) != LABEL_REF)
- return x;
-
- return term;
-}
-
-/* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
- this is used for to form addresses to local data when -fPIC is in
- use. */
-
-static bool
-darwin_local_data_pic (rtx disp)
-{
- if (GET_CODE (disp) == MINUS)
- {
- if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
- || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
- if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
- {
- const char *sym_name = XSTR (XEXP (disp, 1), 0);
- if (! strcmp (sym_name, "<pic base>"))
- return true;
- }
- }
-
- return false;
-}
-
-/* Determine if a given RTX is a valid constant. We already know this
- satisfies CONSTANT_P. */
-
-bool
-legitimate_constant_p (rtx x)
-{
- switch (GET_CODE (x))
- {
- case CONST:
- x = XEXP (x, 0);
-
- if (GET_CODE (x) == PLUS)
- {
- if (GET_CODE (XEXP (x, 1)) != CONST_INT)
- return false;
- x = XEXP (x, 0);
- }
-
- if (TARGET_MACHO && darwin_local_data_pic (x))
- return true;
-
- /* Only some unspecs are valid as "constants". */
- if (GET_CODE (x) == UNSPEC)
- switch (XINT (x, 1))
- {
- case UNSPEC_GOTOFF:
- return TARGET_64BIT;
- case UNSPEC_TPOFF:
- case UNSPEC_NTPOFF:
- x = XVECEXP (x, 0, 0);
- return (GET_CODE (x) == SYMBOL_REF
- && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
- case UNSPEC_DTPOFF:
- x = XVECEXP (x, 0, 0);
- return (GET_CODE (x) == SYMBOL_REF
- && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
- default:
- return false;
- }
-
- /* We must have drilled down to a symbol. */
- if (GET_CODE (x) == LABEL_REF)
- return true;
- if (GET_CODE (x) != SYMBOL_REF)
- return false;
- /* FALLTHRU */
-
- case SYMBOL_REF:
- /* TLS symbols are never valid. */
- if (SYMBOL_REF_TLS_MODEL (x))
- return false;
- /* APPLE LOCAL begin dynamic-no-pic */
-#if TARGET_MACHO
- if (TARGET_MACHO && MACHO_DYNAMIC_NO_PIC_P)
- return machopic_symbol_defined_p (x);
-#endif
- break;
-
- case PLUS:
- {
- rtx left = XEXP (x, 0);
- rtx right = XEXP (x, 1);
- bool left_is_constant = legitimate_constant_p (left);
- bool right_is_constant = legitimate_constant_p (right);
- return left_is_constant && right_is_constant;
- }
- break;
- /* APPLE LOCAL end dynamic-no-pic */
-
- case CONST_DOUBLE:
- if (GET_MODE (x) == TImode
- && x != CONST0_RTX (TImode)
- && !TARGET_64BIT)
- return false;
- break;
-
- case CONST_VECTOR:
- /* APPLE LOCAL begin radar 4874197 mainline candidate */
- if (standard_sse_constant_p (x))
- /* APPLE LOCAL end radar 4874197 mainline candidate */
- return true;
- return false;
-
- default:
- break;
- }
-
- /* Otherwise we handle everything else in the move patterns. */
- return true;
-}
-
-/* Determine if it's legal to put X into the constant pool. This
- is not possible for the address of thread-local symbols, which
- is checked above. */
-
-static bool
-ix86_cannot_force_const_mem (rtx x)
-{
- /* We can always put integral constants and vectors in memory. */
- switch (GET_CODE (x))
- {
- case CONST_INT:
- case CONST_DOUBLE:
- case CONST_VECTOR:
- return false;
-
- default:
- break;
- }
- return !legitimate_constant_p (x);
-}
-
-/* Determine if a given RTX is a valid constant address. */
-
-bool
-constant_address_p (rtx x)
-{
- return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
-}
-
-/* Nonzero if the constant value X is a legitimate general operand
- when generating PIC code. It is given that flag_pic is on and
- that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
-
-bool
-legitimate_pic_operand_p (rtx x)
-{
- rtx inner;
-
- switch (GET_CODE (x))
- {
- case CONST:
- inner = XEXP (x, 0);
- if (GET_CODE (inner) == PLUS
- && GET_CODE (XEXP (inner, 1)) == CONST_INT)
- inner = XEXP (inner, 0);
-
- /* Only some unspecs are valid as "constants". */
- if (GET_CODE (inner) == UNSPEC)
- switch (XINT (inner, 1))
- {
- case UNSPEC_GOTOFF:
- return TARGET_64BIT;
- case UNSPEC_TPOFF:
- x = XVECEXP (inner, 0, 0);
- return (GET_CODE (x) == SYMBOL_REF
- && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
- default:
- return false;
- }
- /* FALLTHRU */
-
- case SYMBOL_REF:
- case LABEL_REF:
- return legitimate_pic_address_disp_p (x);
-
- default:
- return true;
- }
-}
-
-/* Determine if a given CONST RTX is a valid memory displacement
- in PIC mode. */
-
-int
-legitimate_pic_address_disp_p (rtx disp)
-{
- bool saw_plus;
-
- /* In 64bit mode we can allow direct addresses of symbols and labels
- when they are not dynamic symbols. */
- if (TARGET_64BIT)
- {
- rtx op0 = disp, op1;
-
- switch (GET_CODE (disp))
- {
- case LABEL_REF:
- return true;
-
- case CONST:
- if (GET_CODE (XEXP (disp, 0)) != PLUS)
- break;
- op0 = XEXP (XEXP (disp, 0), 0);
- op1 = XEXP (XEXP (disp, 0), 1);
- if (GET_CODE (op1) != CONST_INT
- || INTVAL (op1) >= 16*1024*1024
- || INTVAL (op1) < -16*1024*1024)
- break;
- if (GET_CODE (op0) == LABEL_REF)
- return true;
- if (GET_CODE (op0) != SYMBOL_REF)
- break;
- /* FALLTHRU */
-
- case SYMBOL_REF:
- /* TLS references should always be enclosed in UNSPEC. */
- if (SYMBOL_REF_TLS_MODEL (op0))
- return false;
- /* APPLE LOCAL begin fix-and-continue 6227434 */
-#if TARGET_MACHO
- if (machopic_data_defined_p (op0))
- return true;
-
- /* Under -mfix-and-continue, even local storage is
- addressed via the GOT, so that the value of local
- statics is preserved when a function is "fixed." */
- if (indirect_data (op0))
- return false;
-#endif
- /* APPLE LOCAL end fix-and-continue 6227434 */
- if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0))
- return true;
- break;
-
- default:
- break;
- }
- }
- if (GET_CODE (disp) != CONST)
- return 0;
- disp = XEXP (disp, 0);
-
- if (TARGET_64BIT)
- {
- /* We are unsafe to allow PLUS expressions. This limit allowed distance
- of GOT tables. We should not need these anyway. */
- if (GET_CODE (disp) != UNSPEC
- || (XINT (disp, 1) != UNSPEC_GOTPCREL
- && XINT (disp, 1) != UNSPEC_GOTOFF))
- return 0;
-
- if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
- && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
- return 0;
- return 1;
- }
-
- saw_plus = false;
- if (GET_CODE (disp) == PLUS)
- {
- if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
- return 0;
- disp = XEXP (disp, 0);
- saw_plus = true;
- }
-
- if (TARGET_MACHO && darwin_local_data_pic (disp))
- return 1;
-
- if (GET_CODE (disp) != UNSPEC)
- return 0;
-
- switch (XINT (disp, 1))
- {
- case UNSPEC_GOT:
- if (saw_plus)
- return false;
- return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
- case UNSPEC_GOTOFF:
- /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
- While ABI specify also 32bit relocation but we don't produce it in
- small PIC model at all. */
- if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
- || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
- && !TARGET_64BIT)
- return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
- return false;
- case UNSPEC_GOTTPOFF:
- case UNSPEC_GOTNTPOFF:
- case UNSPEC_INDNTPOFF:
- if (saw_plus)
- return false;
- disp = XVECEXP (disp, 0, 0);
- return (GET_CODE (disp) == SYMBOL_REF
- && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
- case UNSPEC_NTPOFF:
- disp = XVECEXP (disp, 0, 0);
- return (GET_CODE (disp) == SYMBOL_REF
- && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
- case UNSPEC_DTPOFF:
- disp = XVECEXP (disp, 0, 0);
- return (GET_CODE (disp) == SYMBOL_REF
- && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
- }
-
- return 0;
-}
-
-/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
- memory address for an instruction. The MODE argument is the machine mode
- for the MEM expression that wants to use this address.
-
- It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
- convert common non-canonical forms to canonical form so that they will
- be recognized. */
-
-int
-legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
-{
- struct ix86_address parts;
- rtx base, index, disp;
- HOST_WIDE_INT scale;
- const char *reason = NULL;
- rtx reason_rtx = NULL_RTX;
-
- if (TARGET_DEBUG_ADDR)
- {
- fprintf (stderr,
- "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
- GET_MODE_NAME (mode), strict);
- debug_rtx (addr);
- }
-
- if (ix86_decompose_address (addr, &parts) <= 0)
- {
- reason = "decomposition failed";
- goto report_error;
- }
-
- base = parts.base;
- index = parts.index;
- disp = parts.disp;
- scale = parts.scale;
-
- /* Validate base register.
-
- Don't allow SUBREG's that span more than a word here. It can lead to spill
- failures when the base is one word out of a two word structure, which is
- represented internally as a DImode int. */
-
- if (base)
- {
- rtx reg;
- reason_rtx = base;
-
- if (REG_P (base))
- reg = base;
- else if (GET_CODE (base) == SUBREG
- && REG_P (SUBREG_REG (base))
- && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
- <= UNITS_PER_WORD)
- reg = SUBREG_REG (base);
- else
- {
- reason = "base is not a register";
- goto report_error;
- }
-
- if (GET_MODE (base) != Pmode)
- {
- reason = "base is not in Pmode";
- goto report_error;
- }
-
- if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
- || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
- {
- reason = "base is not valid";
- goto report_error;
- }
- }
-
- /* Validate index register.
-
- Don't allow SUBREG's that span more than a word here -- same as above. */
-
- if (index)
- {
- rtx reg;
- reason_rtx = index;
-
- if (REG_P (index))
- reg = index;
- else if (GET_CODE (index) == SUBREG
- && REG_P (SUBREG_REG (index))
- && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
- <= UNITS_PER_WORD)
- reg = SUBREG_REG (index);
- else
- {
- reason = "index is not a register";
- goto report_error;
- }
-
- if (GET_MODE (index) != Pmode)
- {
- reason = "index is not in Pmode";
- goto report_error;
- }
-
- if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
- || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
- {
- reason = "index is not valid";
- goto report_error;
- }
- }
-
- /* Validate scale factor. */
- if (scale != 1)
- {
- reason_rtx = GEN_INT (scale);
- if (!index)
- {
- reason = "scale without index";
- goto report_error;
- }
-
- if (scale != 2 && scale != 4 && scale != 8)
- {
- reason = "scale is not a valid multiplier";
- goto report_error;
- }
- }
-
- /* Validate displacement. */
- if (disp)
- {
- reason_rtx = disp;
-
- if (GET_CODE (disp) == CONST
- && GET_CODE (XEXP (disp, 0)) == UNSPEC)
- switch (XINT (XEXP (disp, 0), 1))
- {
- /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
- used. While ABI specify also 32bit relocations, we don't produce
- them at all and use IP relative instead. */
- case UNSPEC_GOT:
- case UNSPEC_GOTOFF:
- gcc_assert (flag_pic);
- if (!TARGET_64BIT)
- goto is_legitimate_pic;
- reason = "64bit address unspec";
- goto report_error;
-
- case UNSPEC_GOTPCREL:
- gcc_assert (flag_pic);
- goto is_legitimate_pic;
-
- case UNSPEC_GOTTPOFF:
- case UNSPEC_GOTNTPOFF:
- case UNSPEC_INDNTPOFF:
- case UNSPEC_NTPOFF:
- case UNSPEC_DTPOFF:
- break;
-
- default:
- reason = "invalid address unspec";
- goto report_error;
- }
-
- else if (SYMBOLIC_CONST (disp)
- && (flag_pic
- || (TARGET_MACHO
-#if TARGET_MACHO
- && MACHOPIC_INDIRECT
- && !machopic_operand_p (disp)
-#endif
- )))
- {
-
- is_legitimate_pic:
- if (TARGET_64BIT && (index || base))
- {
- /* foo@dtpoff(%rX) is ok. */
- if (GET_CODE (disp) != CONST
- || GET_CODE (XEXP (disp, 0)) != PLUS
- || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
- || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
- || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
- && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
- {
- reason = "non-constant pic memory reference";
- goto report_error;
- }
- }
- /* APPLE LOCAL begin dynamic-no-pic */
- else if (flag_pic && ! legitimate_pic_address_disp_p (disp))
- {
- reason = "displacement is an invalid pic construct";
- goto report_error;
- }
-#if TARGET_MACHO
- else if (MACHO_DYNAMIC_NO_PIC_P && !legitimate_constant_p (disp))
- {
- reason = "displacment must be referenced via non_lazy_pointer";
- goto report_error;
- }
-#endif
- /* APPLE LOCAL end dynamic-no-pic */
-
- /* This code used to verify that a symbolic pic displacement
- includes the pic_offset_table_rtx register.
-
- While this is good idea, unfortunately these constructs may
- be created by "adds using lea" optimization for incorrect
- code like:
-
- int a;
- int foo(int i)
- {
- return *(&a+i);
- }
-
- This code is nonsensical, but results in addressing
- GOT table with pic_offset_table_rtx base. We can't
- just refuse it easily, since it gets matched by
- "addsi3" pattern, that later gets split to lea in the
- case output register differs from input. While this
- can be handled by separate addsi pattern for this case
- that never results in lea, this seems to be easier and
- correct fix for crash to disable this test. */
- }
- else if (GET_CODE (disp) != LABEL_REF
- && GET_CODE (disp) != CONST_INT
- && (GET_CODE (disp) != CONST
- || !legitimate_constant_p (disp))
- && (GET_CODE (disp) != SYMBOL_REF
- || !legitimate_constant_p (disp)))
- {
- reason = "displacement is not constant";
- goto report_error;
- }
- else if (TARGET_64BIT
- && !x86_64_immediate_operand (disp, VOIDmode))
- {
- reason = "displacement is out of range";
- goto report_error;
- }
- }
-
- /* Everything looks valid. */
- if (TARGET_DEBUG_ADDR)
- fprintf (stderr, "Success.\n");
- return TRUE;
-
- report_error:
- if (TARGET_DEBUG_ADDR)
- {
- fprintf (stderr, "Error: %s\n", reason);
- debug_rtx (reason_rtx);
- }
- return FALSE;
-}
-
-/* Return a unique alias set for the GOT. */
-
-static HOST_WIDE_INT
-ix86_GOT_alias_set (void)
-{
- static HOST_WIDE_INT set = -1;
- if (set == -1)
- set = new_alias_set ();
- return set;
-}
-
-/* Return a legitimate reference for ORIG (an address) using the
- register REG. If REG is 0, a new pseudo is generated.
-
- There are two types of references that must be handled:
-
- 1. Global data references must load the address from the GOT, via
- the PIC reg. An insn is emitted to do this load, and the reg is
- returned.
-
- 2. Static data references, constant pool addresses, and code labels
- compute the address as an offset from the GOT, whose base is in
- the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
- differentiate them from global data objects. The returned
- address is the PIC reg + an unspec constant.
-
- GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
- reg also appears in the address. */
-
-static rtx
-legitimize_pic_address (rtx orig, rtx reg)
-{
- rtx addr = orig;
- rtx new = orig;
- rtx base;
-
-#if TARGET_MACHO
- if (TARGET_MACHO && !TARGET_64BIT)
- {
- if (reg == 0)
- reg = gen_reg_rtx (Pmode);
- /* Use the generic Mach-O PIC machinery. */
- return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
- }
-#endif
-
- if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
- new = addr;
- else if (TARGET_64BIT
- && ix86_cmodel != CM_SMALL_PIC
- && local_symbolic_operand (addr, Pmode))
- {
- rtx tmpreg;
- /* This symbol may be referenced via a displacement from the PIC
- base address (@GOTOFF). */
-
- if (reload_in_progress)
- regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
- if (GET_CODE (addr) == CONST)
- addr = XEXP (addr, 0);
- if (GET_CODE (addr) == PLUS)
- {
- new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
- new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
- }
- else
- new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
- new = gen_rtx_CONST (Pmode, new);
- if (!reg)
- tmpreg = gen_reg_rtx (Pmode);
- else
- tmpreg = reg;
- emit_move_insn (tmpreg, new);
-
- if (reg != 0)
- {
- new = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
- tmpreg, 1, OPTAB_DIRECT);
- new = reg;
- }
- else new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
- }
- else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
- {
- /* This symbol may be referenced via a displacement from the PIC
- base address (@GOTOFF). */
-
- if (reload_in_progress)
- regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
- if (GET_CODE (addr) == CONST)
- addr = XEXP (addr, 0);
- if (GET_CODE (addr) == PLUS)
- {
- new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
- new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
- }
- else
- new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
- new = gen_rtx_CONST (Pmode, new);
- new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
-
- if (reg != 0)
- {
- emit_move_insn (reg, new);
- new = reg;
- }
- }
- else if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
- {
- if (TARGET_64BIT)
- {
- new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
- new = gen_rtx_CONST (Pmode, new);
- new = gen_const_mem (Pmode, new);
- set_mem_alias_set (new, ix86_GOT_alias_set ());
-
- if (reg == 0)
- reg = gen_reg_rtx (Pmode);
- /* Use directly gen_movsi, otherwise the address is loaded
- into register for CSE. We don't want to CSE this addresses,
- instead we CSE addresses from the GOT table, so skip this. */
- emit_insn (gen_movsi (reg, new));
- new = reg;
- }
- else
- {
- /* This symbol must be referenced via a load from the
- Global Offset Table (@GOT). */
-
- if (reload_in_progress)
- regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
- new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
- new = gen_rtx_CONST (Pmode, new);
- new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
- new = gen_const_mem (Pmode, new);
- set_mem_alias_set (new, ix86_GOT_alias_set ());
-
- if (reg == 0)
- reg = gen_reg_rtx (Pmode);
- emit_move_insn (reg, new);
- new = reg;
- }
- }
- else
- {
- if (GET_CODE (addr) == CONST_INT
- && !x86_64_immediate_operand (addr, VOIDmode))
- {
- if (reg)
- {
- emit_move_insn (reg, addr);
- new = reg;
- }
- else
- new = force_reg (Pmode, addr);
- }
- else if (GET_CODE (addr) == CONST)
- {
- addr = XEXP (addr, 0);
-
- /* We must match stuff we generate before. Assume the only
- unspecs that can get here are ours. Not that we could do
- anything with them anyway.... */
- if (GET_CODE (addr) == UNSPEC
- || (GET_CODE (addr) == PLUS
- && GET_CODE (XEXP (addr, 0)) == UNSPEC))
- return orig;
- gcc_assert (GET_CODE (addr) == PLUS);
- }
- if (GET_CODE (addr) == PLUS)
- {
- rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
-
- /* Check first to see if this is a constant offset from a @GOTOFF
- symbol reference. */
- if (local_symbolic_operand (op0, Pmode)
- && GET_CODE (op1) == CONST_INT)
- {
- if (!TARGET_64BIT)
- {
- if (reload_in_progress)
- regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
- new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
- UNSPEC_GOTOFF);
- new = gen_rtx_PLUS (Pmode, new, op1);
- new = gen_rtx_CONST (Pmode, new);
- new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
-
- if (reg != 0)
- {
- emit_move_insn (reg, new);
- new = reg;
- }
- }
- else
- {
- if (INTVAL (op1) < -16*1024*1024
- || INTVAL (op1) >= 16*1024*1024)
- {
- if (!x86_64_immediate_operand (op1, Pmode))
- op1 = force_reg (Pmode, op1);
- new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
- }
- }
- }
- else
- {
- base = legitimize_pic_address (XEXP (addr, 0), reg);
- new = legitimize_pic_address (XEXP (addr, 1),
- base == reg ? NULL_RTX : reg);
-
- if (GET_CODE (new) == CONST_INT)
- new = plus_constant (base, INTVAL (new));
- else
- {
- if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
- {
- base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
- new = XEXP (new, 1);
- }
- new = gen_rtx_PLUS (Pmode, base, new);
- /* APPLE LOCAL begin fix-and-continue 6358507 */
- if (!legitimate_address_p (Pmode, new, FALSE))
- new = force_reg (Pmode, new);
- /* APPLE LOCAL end fix-and-continue 6358507 */
- }
- }
- }
- }
- return new;
-}
-
-/* Load the thread pointer. If TO_REG is true, force it into a register. */
-
-static rtx
-get_thread_pointer (int to_reg)
-{
- rtx tp, reg, insn;
-
- tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
- if (!to_reg)
- return tp;
-
- reg = gen_reg_rtx (Pmode);
- insn = gen_rtx_SET (VOIDmode, reg, tp);
- insn = emit_insn (insn);
-
- return reg;
-}
-
-/* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
- false if we expect this to be used for a memory address and true if
- we expect to load the address into a register. */
-
-static rtx
-legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
-{
- rtx dest, base, off, pic, tp;
- int type;
-
- switch (model)
- {
- case TLS_MODEL_GLOBAL_DYNAMIC:
- dest = gen_reg_rtx (Pmode);
- tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
-
- if (TARGET_64BIT && ! TARGET_GNU2_TLS)
- {
- rtx rax = gen_rtx_REG (Pmode, 0), insns;
-
- start_sequence ();
- emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
- insns = get_insns ();
- end_sequence ();
-
- emit_libcall_block (insns, dest, rax, x);
- }
- else if (TARGET_64BIT && TARGET_GNU2_TLS)
- emit_insn (gen_tls_global_dynamic_64 (dest, x));
- else
- emit_insn (gen_tls_global_dynamic_32 (dest, x));
-
- if (TARGET_GNU2_TLS)
- {
- dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
-
- set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
- }
- break;
-
- case TLS_MODEL_LOCAL_DYNAMIC:
- base = gen_reg_rtx (Pmode);
- tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
-
- if (TARGET_64BIT && ! TARGET_GNU2_TLS)
- {
- rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
-
- start_sequence ();
- emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
- insns = get_insns ();
- end_sequence ();
-
- note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
- note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
- emit_libcall_block (insns, base, rax, note);
- }
- else if (TARGET_64BIT && TARGET_GNU2_TLS)
- emit_insn (gen_tls_local_dynamic_base_64 (base));
- else
- emit_insn (gen_tls_local_dynamic_base_32 (base));
-
- if (TARGET_GNU2_TLS)
- {
- rtx x = ix86_tls_module_base ();
-
- set_unique_reg_note (get_last_insn (), REG_EQUIV,
- gen_rtx_MINUS (Pmode, x, tp));
- }
-
- off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
- off = gen_rtx_CONST (Pmode, off);
-
- dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
-
- if (TARGET_GNU2_TLS)
- {
- dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
-
- set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
- }
-
- break;
-
- case TLS_MODEL_INITIAL_EXEC:
- if (TARGET_64BIT)
- {
- pic = NULL;
- type = UNSPEC_GOTNTPOFF;
- }
- else if (flag_pic)
- {
- if (reload_in_progress)
- regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
- pic = pic_offset_table_rtx;
- type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
- }
- else if (!TARGET_ANY_GNU_TLS)
- {
- pic = gen_reg_rtx (Pmode);
- emit_insn (gen_set_got (pic));
- type = UNSPEC_GOTTPOFF;
- }
- else
- {
- pic = NULL;
- type = UNSPEC_INDNTPOFF;
- }
-
- off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
- off = gen_rtx_CONST (Pmode, off);
- if (pic)
- off = gen_rtx_PLUS (Pmode, pic, off);
- off = gen_const_mem (Pmode, off);
- set_mem_alias_set (off, ix86_GOT_alias_set ());
-
- if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
- {
- base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
- off = force_reg (Pmode, off);
- return gen_rtx_PLUS (Pmode, base, off);
- }
- else
- {
- base = get_thread_pointer (true);
- dest = gen_reg_rtx (Pmode);
- emit_insn (gen_subsi3 (dest, base, off));
- }
- break;
-
- case TLS_MODEL_LOCAL_EXEC:
- off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
- (TARGET_64BIT || TARGET_ANY_GNU_TLS)
- ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
- off = gen_rtx_CONST (Pmode, off);
-
- if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
- {
- base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
- return gen_rtx_PLUS (Pmode, base, off);
- }
- else
- {
- base = get_thread_pointer (true);
- dest = gen_reg_rtx (Pmode);
- emit_insn (gen_subsi3 (dest, base, off));
- }
- break;
-
- default:
- gcc_unreachable ();
- }
-
- return dest;
-}
-
-/* Try machine-dependent ways of modifying an illegitimate address
- to be legitimate. If we find one, return the new, valid address.
- This macro is used in only one place: `memory_address' in explow.c.
-
- OLDX is the address as it was before break_out_memory_refs was called.
- In some cases it is useful to look at this to decide what needs to be done.
-
- MODE and WIN are passed so that this macro can use
- GO_IF_LEGITIMATE_ADDRESS.
-
- It is always safe for this macro to do nothing. It exists to recognize
- opportunities to optimize the output.
-
- For the 80386, we handle X+REG by loading X into a register R and
- using R+REG. R will go in a general reg and indexing will be used.
- However, if REG is a broken-out memory address or multiplication,
- nothing needs to be done because REG can certainly go in a general reg.
-
- When -fpic is used, special handling is needed for symbolic references.
- See comments by legitimize_pic_address in i386.c for details. */
-
-rtx
-legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
-{
- int changed = 0;
- unsigned log;
-
- if (TARGET_DEBUG_ADDR)
- {
- fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
- GET_MODE_NAME (mode));
- debug_rtx (x);
- }
-
- log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
- if (log)
- return legitimize_tls_address (x, log, false);
- if (GET_CODE (x) == CONST
- && GET_CODE (XEXP (x, 0)) == PLUS
- && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
- && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
- {
- rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
- return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
- }
-
- if (flag_pic && SYMBOLIC_CONST (x))
- return legitimize_pic_address (x, 0);
- /* APPLE LOCAL begin dynamic-no-pic */
-#if TARGET_MACHO
- if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
- return machopic_indirect_data_reference (x, 0);
-#endif
- /* APPLE LOCAL end dynamic-no-pic */
-
- /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
- if (GET_CODE (x) == ASHIFT
- && GET_CODE (XEXP (x, 1)) == CONST_INT
- && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
- {
- changed = 1;
- log = INTVAL (XEXP (x, 1));
- x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
- GEN_INT (1 << log));
- }
-
- if (GET_CODE (x) == PLUS)
- {
- /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
-
- if (GET_CODE (XEXP (x, 0)) == ASHIFT
- && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
- && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
- {
- changed = 1;
- log = INTVAL (XEXP (XEXP (x, 0), 1));
- XEXP (x, 0) = gen_rtx_MULT (Pmode,
- force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
- GEN_INT (1 << log));
- }
-
- if (GET_CODE (XEXP (x, 1)) == ASHIFT
- && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
- && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
- {
- changed = 1;
- log = INTVAL (XEXP (XEXP (x, 1), 1));
- XEXP (x, 1) = gen_rtx_MULT (Pmode,
- force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
- GEN_INT (1 << log));
- }
-
- /* Put multiply first if it isn't already. */
- if (GET_CODE (XEXP (x, 1)) == MULT)
- {
- rtx tmp = XEXP (x, 0);
- XEXP (x, 0) = XEXP (x, 1);
- XEXP (x, 1) = tmp;
- changed = 1;
- }
-
- /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
- into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
- created by virtual register instantiation, register elimination, and
- similar optimizations. */
- if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
- {
- changed = 1;
- x = gen_rtx_PLUS (Pmode,
- gen_rtx_PLUS (Pmode, XEXP (x, 0),
- XEXP (XEXP (x, 1), 0)),
- XEXP (XEXP (x, 1), 1));
- }
-
- /* Canonicalize
- (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
- into (plus (plus (mult (reg) (const)) (reg)) (const)). */
- else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
- && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
- && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
- && CONSTANT_P (XEXP (x, 1)))
- {
- rtx constant;
- rtx other = NULL_RTX;
-
- if (GET_CODE (XEXP (x, 1)) == CONST_INT)
- {
- constant = XEXP (x, 1);
- other = XEXP (XEXP (XEXP (x, 0), 1), 1);
- }
- else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
- {
- constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
- other = XEXP (x, 1);
- }
- else
- constant = 0;
-
- if (constant)
- {
- changed = 1;
- x = gen_rtx_PLUS (Pmode,
- gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
- XEXP (XEXP (XEXP (x, 0), 1), 0)),
- plus_constant (other, INTVAL (constant)));
- }
- }
-
- if (changed && legitimate_address_p (mode, x, FALSE))
- return x;
-
- if (GET_CODE (XEXP (x, 0)) == MULT)
- {
- changed = 1;
- XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
- }
-
- if (GET_CODE (XEXP (x, 1)) == MULT)
- {
- changed = 1;
- XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
- }
-
- if (changed
- && GET_CODE (XEXP (x, 1)) == REG
- && GET_CODE (XEXP (x, 0)) == REG)
- return x;
-
- if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
- {
- changed = 1;
- x = legitimize_pic_address (x, 0);
- }
-
- if (changed && legitimate_address_p (mode, x, FALSE))
- return x;
-
- if (GET_CODE (XEXP (x, 0)) == REG)
- {
- rtx temp = gen_reg_rtx (Pmode);
- rtx val = force_operand (XEXP (x, 1), temp);
- if (val != temp)
- emit_move_insn (temp, val);
-
- XEXP (x, 1) = temp;
- return x;
- }
-
- else if (GET_CODE (XEXP (x, 1)) == REG)
- {
- rtx temp = gen_reg_rtx (Pmode);
- rtx val = force_operand (XEXP (x, 0), temp);
- if (val != temp)
- emit_move_insn (temp, val);
-
- XEXP (x, 0) = temp;
- return x;
- }
- }
-
- return x;
-}
-
-/* Print an integer constant expression in assembler syntax. Addition
- and subtraction are the only arithmetic that may appear in these
- expressions. FILE is the stdio stream to write to, X is the rtx, and
- CODE is the operand print code from the output string. */
-
-static void
-output_pic_addr_const (FILE *file, rtx x, int code)
-{
- char buf[256];
-
- switch (GET_CODE (x))
- {
- case PC:
- gcc_assert (flag_pic);
- putc ('.', file);
- break;
-
- case SYMBOL_REF:
- /* APPLE LOCAL begin axe stubs 5571540 */
- if (! TARGET_MACHO ||
-#if TARGET_MACHO
- ! darwin_stubs ||
-#endif
- TARGET_64BIT)
- /* APPLE LOCAL end axe stubs 5571540 */
- output_addr_const (file, x);
- else
- {
- const char *name = XSTR (x, 0);
-
- /* Mark the decl as referenced so that cgraph will output the function. */
- if (SYMBOL_REF_DECL (x))
- mark_decl_referenced (SYMBOL_REF_DECL (x));
-
-#if TARGET_MACHO
- if (MACHOPIC_INDIRECT
- && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
- name = machopic_indirection_name (x, /*stub_p=*/true);
-#endif
- assemble_name (file, name);
- }
- if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
- fputs ("@PLT", file);
- break;
-
- case LABEL_REF:
- x = XEXP (x, 0);
- /* FALLTHRU */
- case CODE_LABEL:
- ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
- assemble_name (asm_out_file, buf);
- break;
-
- case CONST_INT:
- fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
- break;
-
- case CONST:
- /* This used to output parentheses around the expression,
- but that does not work on the 386 (either ATT or BSD assembler). */
- output_pic_addr_const (file, XEXP (x, 0), code);
- break;
-
- case CONST_DOUBLE:
- if (GET_MODE (x) == VOIDmode)
- {
- /* We can use %d if the number is <32 bits and positive. */
- if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
- fprintf (file, "0x%lx%08lx",
- (unsigned long) CONST_DOUBLE_HIGH (x),
- (unsigned long) CONST_DOUBLE_LOW (x));
- else
- fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
- }
- else
- /* We can't handle floating point constants;
- PRINT_OPERAND must handle them. */
- output_operand_lossage ("floating constant misused");
- break;
-
- case PLUS:
- /* Some assemblers need integer constants to appear first. */
- if (GET_CODE (XEXP (x, 0)) == CONST_INT)
- {
- output_pic_addr_const (file, XEXP (x, 0), code);
- putc ('+', file);
- output_pic_addr_const (file, XEXP (x, 1), code);
- }
- else
- {
- gcc_assert (GET_CODE (XEXP (x, 1)) == CONST_INT);
- output_pic_addr_const (file, XEXP (x, 1), code);
- putc ('+', file);
- output_pic_addr_const (file, XEXP (x, 0), code);
- }
- break;
-
- case MINUS:
- if (!TARGET_MACHO)
- putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
- output_pic_addr_const (file, XEXP (x, 0), code);
- putc ('-', file);
- output_pic_addr_const (file, XEXP (x, 1), code);
- if (!TARGET_MACHO)
- putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
- break;
-
- case UNSPEC:
- gcc_assert (XVECLEN (x, 0) == 1);
- output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
- switch (XINT (x, 1))
- {
- case UNSPEC_GOT:
- fputs ("@GOT", file);
- break;
- case UNSPEC_GOTOFF:
- fputs ("@GOTOFF", file);
- break;
- case UNSPEC_GOTPCREL:
- fputs ("@GOTPCREL(%rip)", file);
- break;
- case UNSPEC_GOTTPOFF:
- /* FIXME: This might be @TPOFF in Sun ld too. */
- fputs ("@GOTTPOFF", file);
- break;
- case UNSPEC_TPOFF:
- fputs ("@TPOFF", file);
- break;
- case UNSPEC_NTPOFF:
- if (TARGET_64BIT)
- fputs ("@TPOFF", file);
- else
- fputs ("@NTPOFF", file);
- break;
- case UNSPEC_DTPOFF:
- fputs ("@DTPOFF", file);
- break;
- case UNSPEC_GOTNTPOFF:
- if (TARGET_64BIT)
- fputs ("@GOTTPOFF(%rip)", file);
- else
- fputs ("@GOTNTPOFF", file);
- break;
- case UNSPEC_INDNTPOFF:
- fputs ("@INDNTPOFF", file);
- break;
- default:
- output_operand_lossage ("invalid UNSPEC as operand");
- break;
- }
- break;
-
- default:
- output_operand_lossage ("invalid expression as operand");
- }
-}
-
-/* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
- We need to emit DTP-relative relocations. */
-
-static void
-i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
-{
- fputs (ASM_LONG, file);
- output_addr_const (file, x);
- fputs ("@DTPOFF", file);
- switch (size)
- {
- case 4:
- break;
- case 8:
- fputs (", 0", file);
- break;
- default:
- gcc_unreachable ();
- }
-}
-
-/* In the name of slightly smaller debug output, and to cater to
- general assembler lossage, recognize PIC+GOTOFF and turn it back
- into a direct symbol reference.
-
- On Darwin, this is necessary to avoid a crash, because Darwin
- has a different PIC label for each routine but the DWARF debugging
- information is not associated with any particular routine, so it's
- necessary to remove references to the PIC label from RTL stored by
- the DWARF output code. */
-
-static rtx
-ix86_delegitimize_address (rtx orig_x)
-{
- rtx x = orig_x;
- /* reg_addend is NULL or a multiple of some register. */
- rtx reg_addend = NULL_RTX;
- /* const_addend is NULL or a const_int. */
- rtx const_addend = NULL_RTX;
- /* This is the result, or NULL. */
- rtx result = NULL_RTX;
-
- if (GET_CODE (x) == MEM)
- x = XEXP (x, 0);
-
- if (TARGET_64BIT)
- {
- if (GET_CODE (x) != CONST
- || GET_CODE (XEXP (x, 0)) != UNSPEC
- || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
- || GET_CODE (orig_x) != MEM)
- return orig_x;
- return XVECEXP (XEXP (x, 0), 0, 0);
- }
-
- if (GET_CODE (x) != PLUS
- || GET_CODE (XEXP (x, 1)) != CONST)
- return orig_x;
-
- if (GET_CODE (XEXP (x, 0)) == REG
- && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
- /* %ebx + GOT/GOTOFF */
- ;
- else if (GET_CODE (XEXP (x, 0)) == PLUS)
- {
- /* %ebx + %reg * scale + GOT/GOTOFF */
- reg_addend = XEXP (x, 0);
- if (GET_CODE (XEXP (reg_addend, 0)) == REG
- && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
- reg_addend = XEXP (reg_addend, 1);
- else if (GET_CODE (XEXP (reg_addend, 1)) == REG
- && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
- reg_addend = XEXP (reg_addend, 0);
- else
- return orig_x;
- if (GET_CODE (reg_addend) != REG
- && GET_CODE (reg_addend) != MULT
- && GET_CODE (reg_addend) != ASHIFT)
- return orig_x;
- }
- else
- return orig_x;
-
- x = XEXP (XEXP (x, 1), 0);
- if (GET_CODE (x) == PLUS
- && GET_CODE (XEXP (x, 1)) == CONST_INT)
- {
- const_addend = XEXP (x, 1);
- x = XEXP (x, 0);
- }
-
- if (GET_CODE (x) == UNSPEC
- && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
- || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
- result = XVECEXP (x, 0, 0);
-
- if (TARGET_MACHO && darwin_local_data_pic (x)
- && GET_CODE (orig_x) != MEM)
- result = XEXP (x, 0);
-
- if (! result)
- return orig_x;
-
- if (const_addend)
- result = gen_rtx_PLUS (Pmode, result, const_addend);
- if (reg_addend)
- result = gen_rtx_PLUS (Pmode, reg_addend, result);
- return result;
-}
-
-static void
-put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
- int fp, FILE *file)
-{
- const char *suffix;
-
- if (mode == CCFPmode || mode == CCFPUmode)
- {
- enum rtx_code second_code, bypass_code;
- ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
- gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
- code = ix86_fp_compare_code_to_integer (code);
- mode = CCmode;
- }
- if (reverse)
- code = reverse_condition (code);
-
- switch (code)
- {
- case EQ:
- suffix = "e";
- break;
- case NE:
- suffix = "ne";
- break;
- case GT:
- gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
- suffix = "g";
- break;
- case GTU:
- /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
- Those same assemblers have the same but opposite lossage on cmov. */
- gcc_assert (mode == CCmode);
- suffix = fp ? "nbe" : "a";
- break;
- case LT:
- switch (mode)
- {
- case CCNOmode:
- case CCGOCmode:
- suffix = "s";
- break;
-
- case CCmode:
- case CCGCmode:
- suffix = "l";
- break;
-
- default:
- gcc_unreachable ();
- }
- break;
- case LTU:
- gcc_assert (mode == CCmode);
- suffix = "b";
- break;
- case GE:
- switch (mode)
- {
- case CCNOmode:
- case CCGOCmode:
- suffix = "ns";
- break;
-
- case CCmode:
- case CCGCmode:
- suffix = "ge";
- break;
-
- default:
- gcc_unreachable ();
- }
- break;
- case GEU:
- /* ??? As above. */
- gcc_assert (mode == CCmode);
- suffix = fp ? "nb" : "ae";
- break;
- case LE:
- gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
- suffix = "le";
- break;
- case LEU:
- gcc_assert (mode == CCmode);
- suffix = "be";
- break;
- case UNORDERED:
- suffix = fp ? "u" : "p";
- break;
- case ORDERED:
- suffix = fp ? "nu" : "np";
- break;
- default:
- gcc_unreachable ();
- }
- fputs (suffix, file);
-}
-
-/* Print the name of register X to FILE based on its machine mode and number.
- If CODE is 'w', pretend the mode is HImode.
- If CODE is 'b', pretend the mode is QImode.
- If CODE is 'k', pretend the mode is SImode.
- If CODE is 'q', pretend the mode is DImode.
- If CODE is 'h', pretend the reg is the 'high' byte register.
- If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
-
-void
-print_reg (rtx x, int code, FILE *file)
-{
- gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
- && REGNO (x) != FRAME_POINTER_REGNUM
- && REGNO (x) != FLAGS_REG
- && REGNO (x) != FPSR_REG);
-
- if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
- putc ('%', file);
-
- if (code == 'w' || MMX_REG_P (x))
- code = 2;
- else if (code == 'b')
- code = 1;
- else if (code == 'k')
- code = 4;
- else if (code == 'q')
- code = 8;
- else if (code == 'y')
- code = 3;
- else if (code == 'h')
- code = 0;
- else
- code = GET_MODE_SIZE (GET_MODE (x));
-
- /* Irritatingly, AMD extended registers use different naming convention
- from the normal registers. */
- if (REX_INT_REG_P (x))
- {
- gcc_assert (TARGET_64BIT);
- switch (code)
- {
- case 0:
- error ("extended registers have no high halves");
- break;
- case 1:
- fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
- break;
- case 2:
- fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
- break;
- case 4:
- fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
- break;
- case 8:
- fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
- break;
- default:
- error ("unsupported operand size for extended register");
- break;
- }
- return;
- }
- switch (code)
- {
- case 3:
- if (STACK_TOP_P (x))
- {
- fputs ("st(0)", file);
- break;
- }
- /* FALLTHRU */
- case 8:
- case 4:
- case 12:
- if (! ANY_FP_REG_P (x))
- putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
- /* FALLTHRU */
- case 16:
- case 2:
- normal:
- fputs (hi_reg_name[REGNO (x)], file);
- break;
- case 1:
- if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
- goto normal;
- fputs (qi_reg_name[REGNO (x)], file);
- break;
- case 0:
- if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
- goto normal;
- fputs (qi_high_reg_name[REGNO (x)], file);
- break;
- default:
- gcc_unreachable ();
- }
-}
-
-/* Locate some local-dynamic symbol still in use by this function
- so that we can print its name in some tls_local_dynamic_base
- pattern. */
-
-static const char *
-get_some_local_dynamic_name (void)
-{
- rtx insn;
-
- if (cfun->machine->some_ld_name)
- return cfun->machine->some_ld_name;
-
- for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
- if (INSN_P (insn)
- && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
- return cfun->machine->some_ld_name;
-
- gcc_unreachable ();
-}
-
-static int
-get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
-{
- rtx x = *px;
-
- if (GET_CODE (x) == SYMBOL_REF
- && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
- {
- cfun->machine->some_ld_name = XSTR (x, 0);
- return 1;
- }
-
- return 0;
-}
-
-/* Meaning of CODE:
- L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
- C -- print opcode suffix for set/cmov insn.
- c -- like C, but print reversed condition
- F,f -- likewise, but for floating-point.
- O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
- otherwise nothing
- R -- print the prefix for register names.
- z -- print the opcode suffix for the size of the current operand.
- * -- print a star (in certain assembler syntax)
- A -- print an absolute memory reference.
- w -- print the operand as if it's a "word" (HImode) even if it isn't.
- s -- print a shift double count, followed by the assemblers argument
- delimiter.
- b -- print the QImode name of the register for the indicated operand.
- %b0 would print %al if operands[0] is reg 0.
- w -- likewise, print the HImode name of the register.
- k -- likewise, print the SImode name of the register.
- q -- likewise, print the DImode name of the register.
- h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
- y -- print "st(0)" instead of "st" as a register.
- D -- print condition for SSE cmp instruction.
- P -- if PIC, print an @PLT suffix.
- X -- don't print any sort of PIC '@' suffix for a symbol.
- & -- print some in-use local-dynamic symbol name.
- H -- print a memory address offset by 8; used for sse high-parts
- */
-
-void
-print_operand (FILE *file, rtx x, int code)
-{
- if (code)
- {
- switch (code)
- {
- case '*':
- if (ASSEMBLER_DIALECT == ASM_ATT)
- putc ('*', file);
- return;
-
- case '&':
- assemble_name (file, get_some_local_dynamic_name ());
- return;
-
- case 'A':
- switch (ASSEMBLER_DIALECT)
- {
- case ASM_ATT:
- putc ('*', file);
- break;
-
- case ASM_INTEL:
- /* Intel syntax. For absolute addresses, registers should not
- be surrounded by braces. */
- if (GET_CODE (x) != REG)
- {
- putc ('[', file);
- PRINT_OPERAND (file, x, 0);
- putc (']', file);
- return;
- }
- break;
-
- default:
- gcc_unreachable ();
- }
-
- PRINT_OPERAND (file, x, 0);
- return;
-
-
- case 'L':
- if (ASSEMBLER_DIALECT == ASM_ATT)
- putc ('l', file);
- return;
-
- case 'W':
- if (ASSEMBLER_DIALECT == ASM_ATT)
- putc ('w', file);
- return;
-
- case 'B':
- if (ASSEMBLER_DIALECT == ASM_ATT)
- putc ('b', file);
- return;
-
- case 'Q':
- if (ASSEMBLER_DIALECT == ASM_ATT)
- putc ('l', file);
- return;
-
- case 'S':
- if (ASSEMBLER_DIALECT == ASM_ATT)
- putc ('s', file);
- return;
-
- case 'T':
- if (ASSEMBLER_DIALECT == ASM_ATT)
- putc ('t', file);
- return;
-
- case 'z':
- /* 387 opcodes don't get size suffixes if the operands are
- registers. */
- if (STACK_REG_P (x))
- return;
-
- /* Likewise if using Intel opcodes. */
- if (ASSEMBLER_DIALECT == ASM_INTEL)
- return;
-
- /* This is the size of op from size of operand. */
- switch (GET_MODE_SIZE (GET_MODE (x)))
- {
- case 2:
-#ifdef HAVE_GAS_FILDS_FISTS
- putc ('s', file);
-#endif
- return;
-
- case 4:
- if (GET_MODE (x) == SFmode)
- {
- putc ('s', file);
- return;
- }
- else
- putc ('l', file);
- return;
-
- case 12:
- case 16:
- putc ('t', file);
- return;
-
- case 8:
- if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
- {
-#ifdef GAS_MNEMONICS
- putc ('q', file);
-#else
- putc ('l', file);
- putc ('l', file);
-#endif
- }
- else
- putc ('l', file);
- return;
-
- default:
- gcc_unreachable ();
- }
-
- case 'b':
- case 'w':
- case 'k':
- case 'q':
- case 'h':
- case 'y':
- case 'X':
- case 'P':
- break;
-
- case 's':
- if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
- {
- PRINT_OPERAND (file, x, 0);
- putc (',', file);
- }
- return;
-
- case 'D':
- /* Little bit of braindamage here. The SSE compare instructions
- does use completely different names for the comparisons that the
- fp conditional moves. */
- switch (GET_CODE (x))
- {
- case EQ:
- case UNEQ:
- fputs ("eq", file);
- break;
- case LT:
- case UNLT:
- fputs ("lt", file);
- break;
- case LE:
- case UNLE:
- fputs ("le", file);
- break;
- case UNORDERED:
- fputs ("unord", file);
- break;
- case NE:
- case LTGT:
- fputs ("neq", file);
- break;
- case UNGE:
- case GE:
- fputs ("nlt", file);
- break;
- case UNGT:
- case GT:
- fputs ("nle", file);
- break;
- case ORDERED:
- fputs ("ord", file);
- break;
- default:
- gcc_unreachable ();
- }
- return;
- case 'O':
-#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
- if (ASSEMBLER_DIALECT == ASM_ATT)
- {
- switch (GET_MODE (x))
- {
- case HImode: putc ('w', file); break;
- case SImode:
- case SFmode: putc ('l', file); break;
- case DImode:
- case DFmode: putc ('q', file); break;
- default: gcc_unreachable ();
- }
- putc ('.', file);
- }
-#endif
- return;
- case 'C':
- put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
- return;
- case 'F':
-#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
- if (ASSEMBLER_DIALECT == ASM_ATT)
- putc ('.', file);
-#endif
- put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
- return;
-
- /* Like above, but reverse condition */
- case 'c':
- /* Check to see if argument to %c is really a constant
- and not a condition code which needs to be reversed. */
- if (!COMPARISON_P (x))
- {
- output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
- return;
- }
- put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
- return;
- case 'f':
-#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
- if (ASSEMBLER_DIALECT == ASM_ATT)
- putc ('.', file);
-#endif
- put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
- return;
-
- case 'H':
- /* It doesn't actually matter what mode we use here, as we're
- only going to use this for printing. */
- x = adjust_address_nv (x, DImode, 8);
- break;
-
- case '+':
- {
- rtx x;
-
- if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
- return;
-
- x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
- if (x)
- {
- int pred_val = INTVAL (XEXP (x, 0));
-
- if (pred_val < REG_BR_PROB_BASE * 45 / 100
- || pred_val > REG_BR_PROB_BASE * 55 / 100)
- {
- int taken = pred_val > REG_BR_PROB_BASE / 2;
- int cputaken = final_forward_branch_p (current_output_insn) == 0;
-
- /* Emit hints only in the case default branch prediction
- heuristics would fail. */
- if (taken != cputaken)
- {
- /* We use 3e (DS) prefix for taken branches and
- 2e (CS) prefix for not taken branches. */
- if (taken)
- fputs ("ds ; ", file);
- else
- fputs ("cs ; ", file);
- }
- }
- }
- return;
- }
- default:
- output_operand_lossage ("invalid operand code '%c'", code);
- }
- }
-
- if (GET_CODE (x) == REG)
- print_reg (x, code, file);
-
- else if (GET_CODE (x) == MEM)
- {
- /* No `byte ptr' prefix for call instructions. */
- if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
- {
- const char * size;
- switch (GET_MODE_SIZE (GET_MODE (x)))
- {
- case 1: size = "BYTE"; break;
- case 2: size = "WORD"; break;
- case 4: size = "DWORD"; break;
- case 8: size = "QWORD"; break;
- case 12: size = "XWORD"; break;
- case 16: size = "XMMWORD"; break;
- default:
- gcc_unreachable ();
- }
-
- /* Check for explicit size override (codes 'b', 'w' and 'k') */
- if (code == 'b')
- size = "BYTE";
- else if (code == 'w')
- size = "WORD";
- else if (code == 'k')
- size = "DWORD";
-
- fputs (size, file);
- fputs (" PTR ", file);
- }
-
- x = XEXP (x, 0);
- /* Avoid (%rip) for call operands. */
- if (CONSTANT_ADDRESS_P (x) && code == 'P'
- && GET_CODE (x) != CONST_INT)
- output_addr_const (file, x);
- else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
- output_operand_lossage ("invalid constraints for operand");
- else
- output_address (x);
- }
-
- else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
- {
- REAL_VALUE_TYPE r;
- long l;
-
- REAL_VALUE_FROM_CONST_DOUBLE (r, x);
- REAL_VALUE_TO_TARGET_SINGLE (r, l);
-
- if (ASSEMBLER_DIALECT == ASM_ATT)
- putc ('$', file);
- fprintf (file, "0x%08lx", l);
- }
-
- /* These float cases don't actually occur as immediate operands. */
- else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
- {
- char dstr[30];
-
- real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
- fprintf (file, "%s", dstr);
- }
-
- else if (GET_CODE (x) == CONST_DOUBLE
- && GET_MODE (x) == XFmode)
- {
- char dstr[30];
-
- real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
- fprintf (file, "%s", dstr);
- }
-
- else
- {
- /* We have patterns that allow zero sets of memory, for instance.
- In 64-bit mode, we should probably support all 8-byte vectors,
- since we can in fact encode that into an immediate. */
- if (GET_CODE (x) == CONST_VECTOR)
- {
- gcc_assert (x == CONST0_RTX (GET_MODE (x)));
- x = const0_rtx;
- }
-
- if (code != 'P')
- {
- if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
- {
- if (ASSEMBLER_DIALECT == ASM_ATT)
- putc ('$', file);
- }
- else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
- || GET_CODE (x) == LABEL_REF)
- {
- if (ASSEMBLER_DIALECT == ASM_ATT)
- putc ('$', file);
- else
- fputs ("OFFSET FLAT:", file);
- }
- }
- if (GET_CODE (x) == CONST_INT)
- fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
- /* APPLE LOCAL begin dynamic-no-pic */
- else if (flag_pic || (TARGET_MACHO && MACHOPIC_INDIRECT))
- /* APPLE LOCAL end dynamic-no-pic */
- output_pic_addr_const (file, x, code);
- else
- output_addr_const (file, x);
- }
-}
-
-/* Print a memory operand whose address is ADDR. */
-
-void
-print_operand_address (FILE *file, rtx addr)
-{
- struct ix86_address parts;
- rtx base, index, disp;
- int scale;
- int ok = ix86_decompose_address (addr, &parts);
-
- gcc_assert (ok);
-
- base = parts.base;
- index = parts.index;
- disp = parts.disp;
- scale = parts.scale;
-
- switch (parts.seg)
- {
- case SEG_DEFAULT:
- break;
- case SEG_FS:
- case SEG_GS:
- if (USER_LABEL_PREFIX[0] == 0)
- putc ('%', file);
- fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
- break;
- default:
- gcc_unreachable ();
- }
-
- if (!base && !index)
- {
- /* Displacement only requires special attention. */
-
- if (GET_CODE (disp) == CONST_INT)
- {
- if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
- {
- if (USER_LABEL_PREFIX[0] == 0)
- putc ('%', file);
- fputs ("ds:", file);
- }
- fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
- }
- else if (flag_pic)
- output_pic_addr_const (file, disp, 0);
- else
- output_addr_const (file, disp);
-
- /* Use one byte shorter RIP relative addressing for 64bit mode. */
- if (TARGET_64BIT)
- {
- if (GET_CODE (disp) == CONST
- && GET_CODE (XEXP (disp, 0)) == PLUS
- && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
- disp = XEXP (XEXP (disp, 0), 0);
- if (GET_CODE (disp) == LABEL_REF
- || (GET_CODE (disp) == SYMBOL_REF
- && SYMBOL_REF_TLS_MODEL (disp) == 0))
- fputs ("(%rip)", file);
- }
- }
- else
- {
- if (ASSEMBLER_DIALECT == ASM_ATT)
- {
- if (disp)
- {
- if (flag_pic)
- output_pic_addr_const (file, disp, 0);
- else if (GET_CODE (disp) == LABEL_REF)
- output_asm_label (disp);
- else
- output_addr_const (file, disp);
- }
-
- putc ('(', file);
- if (base)
- print_reg (base, 0, file);
- if (index)
- {
- putc (',', file);
- print_reg (index, 0, file);
- if (scale != 1)
- fprintf (file, ",%d", scale);
- }
- putc (')', file);
- }
- else
- {
- rtx offset = NULL_RTX;
-
- if (disp)
- {
- /* Pull out the offset of a symbol; print any symbol itself. */
- if (GET_CODE (disp) == CONST
- && GET_CODE (XEXP (disp, 0)) == PLUS
- && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
- {
- offset = XEXP (XEXP (disp, 0), 1);
- disp = gen_rtx_CONST (VOIDmode,
- XEXP (XEXP (disp, 0), 0));
- }
-
- if (flag_pic)
- output_pic_addr_const (file, disp, 0);
- else if (GET_CODE (disp) == LABEL_REF)
- output_asm_label (disp);
- else if (GET_CODE (disp) == CONST_INT)
- offset = disp;
- else
- output_addr_const (file, disp);
- }
-
- putc ('[', file);
- if (base)
- {
- print_reg (base, 0, file);
- if (offset)
- {
- if (INTVAL (offset) >= 0)
- putc ('+', file);
- fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
- }
- }
- else if (offset)
- fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
- else
- putc ('0', file);
-
- if (index)
- {
- putc ('+', file);
- print_reg (index, 0, file);
- if (scale != 1)
- fprintf (file, "*%d", scale);
- }
- putc (']', file);
- }
- }
-}
-
-bool
-output_addr_const_extra (FILE *file, rtx x)
-{
- rtx op;
-
- if (GET_CODE (x) != UNSPEC)
- return false;
-
- op = XVECEXP (x, 0, 0);
- switch (XINT (x, 1))
- {
- case UNSPEC_GOTTPOFF:
- output_addr_const (file, op);
- /* FIXME: This might be @TPOFF in Sun ld. */
- fputs ("@GOTTPOFF", file);
- break;
- case UNSPEC_TPOFF:
- output_addr_const (file, op);
- fputs ("@TPOFF", file);
- break;
- case UNSPEC_NTPOFF:
- output_addr_const (file, op);
- if (TARGET_64BIT)
- fputs ("@TPOFF", file);
- else
- fputs ("@NTPOFF", file);
- break;
- case UNSPEC_DTPOFF:
- output_addr_const (file, op);
- fputs ("@DTPOFF", file);
- break;
- case UNSPEC_GOTNTPOFF:
- output_addr_const (file, op);
- if (TARGET_64BIT)
- fputs ("@GOTTPOFF(%rip)", file);
- else
- fputs ("@GOTNTPOFF", file);
- break;
- case UNSPEC_INDNTPOFF:
- output_addr_const (file, op);
- fputs ("@INDNTPOFF", file);
- break;
-
- default:
- return false;
- }
-
- return true;
-}
-
-/* Split one or more DImode RTL references into pairs of SImode
- references. The RTL can be REG, offsettable MEM, integer constant, or
- CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
- split and "num" is its length. lo_half and hi_half are output arrays
- that parallel "operands". */
-
-void
-split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
-{
- while (num--)
- {
- rtx op = operands[num];
-
- /* simplify_subreg refuse to split volatile memory addresses,
- but we still have to handle it. */
- if (GET_CODE (op) == MEM)
- {
- lo_half[num] = adjust_address (op, SImode, 0);
- hi_half[num] = adjust_address (op, SImode, 4);
- }
- else
- {
- lo_half[num] = simplify_gen_subreg (SImode, op,
- GET_MODE (op) == VOIDmode
- ? DImode : GET_MODE (op), 0);
- hi_half[num] = simplify_gen_subreg (SImode, op,
- GET_MODE (op) == VOIDmode
- ? DImode : GET_MODE (op), 4);
- }
- }
-}
-/* Split one or more TImode RTL references into pairs of DImode
- references. The RTL can be REG, offsettable MEM, integer constant, or
- CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
- split and "num" is its length. lo_half and hi_half are output arrays
- that parallel "operands". */
-
-void
-split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
-{
- while (num--)
- {
- rtx op = operands[num];
-
- /* simplify_subreg refuse to split volatile memory addresses, but we
- still have to handle it. */
- if (GET_CODE (op) == MEM)
- {
- lo_half[num] = adjust_address (op, DImode, 0);
- hi_half[num] = adjust_address (op, DImode, 8);
- }
- else
- {
- lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
- hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
- }
- }
-}
-
-/* Output code to perform a 387 binary operation in INSN, one of PLUS,
- MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
- is the expression of the binary operation. The output may either be
- emitted here, or returned to the caller, like all output_* functions.
-
- There is no guarantee that the operands are the same mode, as they
- might be within FLOAT or FLOAT_EXTEND expressions. */
-
-#ifndef SYSV386_COMPAT
-/* Set to 1 for compatibility with brain-damaged assemblers. No-one
- wants to fix the assemblers because that causes incompatibility
- with gcc. No-one wants to fix gcc because that causes
- incompatibility with assemblers... You can use the option of
- -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
-#define SYSV386_COMPAT 1
-#endif
-
-const char *
-output_387_binary_op (rtx insn, rtx *operands)
-{
- static char buf[30];
- const char *p;
- const char *ssep;
- int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
-
-#ifdef ENABLE_CHECKING
- /* Even if we do not want to check the inputs, this documents input
- constraints. Which helps in understanding the following code. */
- if (STACK_REG_P (operands[0])
- && ((REG_P (operands[1])
- && REGNO (operands[0]) == REGNO (operands[1])
- && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
- || (REG_P (operands[2])
- && REGNO (operands[0]) == REGNO (operands[2])
- && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
- && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
- ; /* ok */
- else
- gcc_assert (is_sse);
-#endif
-
- switch (GET_CODE (operands[3]))
- {
- case PLUS:
- if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
- || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
- p = "fiadd";
- else
- p = "fadd";
- ssep = "add";
- break;
-
- case MINUS:
- if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
- || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
- p = "fisub";
- else
- p = "fsub";
- ssep = "sub";
- break;
-
- case MULT:
- if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
- || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
- p = "fimul";
- else
- p = "fmul";
- ssep = "mul";
- break;
-
- case DIV:
- if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
- || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
- p = "fidiv";
- else
- p = "fdiv";
- ssep = "div";
- break;
-
- default:
- gcc_unreachable ();
- }
-
- if (is_sse)
- {
- strcpy (buf, ssep);
- if (GET_MODE (operands[0]) == SFmode)
- strcat (buf, "ss\t{%2, %0|%0, %2}");
- else
- strcat (buf, "sd\t{%2, %0|%0, %2}");
- return buf;
- }
- strcpy (buf, p);
-
- switch (GET_CODE (operands[3]))
- {
- case MULT:
- case PLUS:
- if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
- {
- rtx temp = operands[2];
- operands[2] = operands[1];
- operands[1] = temp;
- }
-
- /* know operands[0] == operands[1]. */
-
- if (GET_CODE (operands[2]) == MEM)
- {
- p = "%z2\t%2";
- break;
- }
-
- if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
- {
- if (STACK_TOP_P (operands[0]))
- /* How is it that we are storing to a dead operand[2]?
- Well, presumably operands[1] is dead too. We can't
- store the result to st(0) as st(0) gets popped on this
- instruction. Instead store to operands[2] (which I
- think has to be st(1)). st(1) will be popped later.
- gcc <= 2.8.1 didn't have this check and generated
- assembly code that the Unixware assembler rejected. */
- p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
- else
- p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
- break;
- }
-
- if (STACK_TOP_P (operands[0]))
- p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
- else
- p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
- break;
-
- case MINUS:
- case DIV:
- if (GET_CODE (operands[1]) == MEM)
- {
- p = "r%z1\t%1";
- break;
- }
-
- if (GET_CODE (operands[2]) == MEM)
- {
- p = "%z2\t%2";
- break;
- }
-
- if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
- {
-#if SYSV386_COMPAT
- /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
- derived assemblers, confusingly reverse the direction of
- the operation for fsub{r} and fdiv{r} when the
- destination register is not st(0). The Intel assembler
- doesn't have this brain damage. Read !SYSV386_COMPAT to
- figure out what the hardware really does. */
- if (STACK_TOP_P (operands[0]))
- p = "{p\t%0, %2|rp\t%2, %0}";
- else
- p = "{rp\t%2, %0|p\t%0, %2}";
-#else
- if (STACK_TOP_P (operands[0]))
- /* As above for fmul/fadd, we can't store to st(0). */
- p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
- else
- p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
-#endif
- break;
- }
-
- if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
- {
-#if SYSV386_COMPAT
- if (STACK_TOP_P (operands[0]))
- p = "{rp\t%0, %1|p\t%1, %0}";
- else
- p = "{p\t%1, %0|rp\t%0, %1}";
-#else
- if (STACK_TOP_P (operands[0]))
- p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
- else
- p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
-#endif
- break;
- }
-
- if (STACK_TOP_P (operands[0]))
- {
- if (STACK_TOP_P (operands[1]))
- p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
- else
- p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
- break;
- }
- else if (STACK_TOP_P (operands[1]))
- {
-#if SYSV386_COMPAT
- p = "{\t%1, %0|r\t%0, %1}";
-#else
- p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
-#endif
- }
- else
- {
-#if SYSV386_COMPAT
- p = "{r\t%2, %0|\t%0, %2}";
-#else
- p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
-#endif
- }
- break;
-
- default:
- gcc_unreachable ();
- }
-
- strcat (buf, p);
- return buf;
-}
-
-/* Return needed mode for entity in optimize_mode_switching pass. */
-
-int
-ix86_mode_needed (int entity, rtx insn)
-{
- enum attr_i387_cw mode;
-
- /* The mode UNINITIALIZED is used to store control word after a
- function call or ASM pattern. The mode ANY specify that function
- has no requirements on the control word and make no changes in the
- bits we are interested in. */
-
- if (CALL_P (insn)
- || (NONJUMP_INSN_P (insn)
- && (asm_noperands (PATTERN (insn)) >= 0
- || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
- return I387_CW_UNINITIALIZED;
-
- if (recog_memoized (insn) < 0)
- return I387_CW_ANY;
-
- mode = get_attr_i387_cw (insn);
-
- switch (entity)
- {
- case I387_TRUNC:
- if (mode == I387_CW_TRUNC)
- return mode;
- break;
-
- case I387_FLOOR:
- if (mode == I387_CW_FLOOR)
- return mode;
- break;
-
- case I387_CEIL:
- if (mode == I387_CW_CEIL)
- return mode;
- break;
-
- case I387_MASK_PM:
- if (mode == I387_CW_MASK_PM)
- return mode;
- break;
-
- default:
- gcc_unreachable ();
- }
-
- return I387_CW_ANY;
-}
-
-/* Output code to initialize control word copies used by trunc?f?i and
- rounding patterns. CURRENT_MODE is set to current control word,
- while NEW_MODE is set to new control word. */
-
-void
-emit_i387_cw_initialization (int mode)
-{
- rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
- rtx new_mode;
-
- int slot;
-
- rtx reg = gen_reg_rtx (HImode);
-
- emit_insn (gen_x86_fnstcw_1 (stored_mode));
- emit_move_insn (reg, stored_mode);
-
- if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
- {
- switch (mode)
- {
- case I387_CW_TRUNC:
- /* round toward zero (truncate) */
- emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
- slot = SLOT_CW_TRUNC;
- break;
-
- case I387_CW_FLOOR:
- /* round down toward -oo */
- emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
- emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
- slot = SLOT_CW_FLOOR;
- break;
-
- case I387_CW_CEIL:
- /* round up toward +oo */
- emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
- emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
- slot = SLOT_CW_CEIL;
- break;
-
- case I387_CW_MASK_PM:
- /* mask precision exception for nearbyint() */
- emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
- slot = SLOT_CW_MASK_PM;
- break;
-
- default:
- gcc_unreachable ();
- }
- }
- else
- {
- switch (mode)
- {
- case I387_CW_TRUNC:
- /* round toward zero (truncate) */
- emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
- slot = SLOT_CW_TRUNC;
- break;
-
- case I387_CW_FLOOR:
- /* round down toward -oo */
- emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
- slot = SLOT_CW_FLOOR;
- break;
-
- case I387_CW_CEIL:
- /* round up toward +oo */
- emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
- slot = SLOT_CW_CEIL;
- break;
-
- case I387_CW_MASK_PM:
- /* mask precision exception for nearbyint() */
- emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
- slot = SLOT_CW_MASK_PM;
- break;
-
- default:
- gcc_unreachable ();
- }
- }
-
- gcc_assert (slot < MAX_386_STACK_LOCALS);
-
- new_mode = assign_386_stack_local (HImode, slot);
- emit_move_insn (new_mode, reg);
-}
-
-/* Output code for INSN to convert a float to a signed int. OPERANDS
- are the insn operands. The output may be [HSD]Imode and the input
- operand may be [SDX]Fmode. */
-
-const char *
-output_fix_trunc (rtx insn, rtx *operands, int fisttp)
-{
- int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
- int dimode_p = GET_MODE (operands[0]) == DImode;
- int round_mode = get_attr_i387_cw (insn);
-
- /* Jump through a hoop or two for DImode, since the hardware has no
- non-popping instruction. We used to do this a different way, but
- that was somewhat fragile and broke with post-reload splitters. */
- if ((dimode_p || fisttp) && !stack_top_dies)
- output_asm_insn ("fld\t%y1", operands);
-
- gcc_assert (STACK_TOP_P (operands[1]));
- gcc_assert (GET_CODE (operands[0]) == MEM);
-
- if (fisttp)
- output_asm_insn ("fisttp%z0\t%0", operands);
- else
- {
- if (round_mode != I387_CW_ANY)
- output_asm_insn ("fldcw\t%3", operands);
- if (stack_top_dies || dimode_p)
- output_asm_insn ("fistp%z0\t%0", operands);
- else
- output_asm_insn ("fist%z0\t%0", operands);
- if (round_mode != I387_CW_ANY)
- output_asm_insn ("fldcw\t%2", operands);
- }
-
- return "";
-}
-
-/* Output code for x87 ffreep insn. The OPNO argument, which may only
- have the values zero or one, indicates the ffreep insn's operand
- from the OPERANDS array. */
-
-static const char *
-output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
-{
- if (TARGET_USE_FFREEP)
-#if HAVE_AS_IX86_FFREEP
- return opno ? "ffreep\t%y1" : "ffreep\t%y0";
-#else
- switch (REGNO (operands[opno]))
- {
- case FIRST_STACK_REG + 0: return ".word\t0xc0df";
- case FIRST_STACK_REG + 1: return ".word\t0xc1df";
- case FIRST_STACK_REG + 2: return ".word\t0xc2df";
- case FIRST_STACK_REG + 3: return ".word\t0xc3df";
- case FIRST_STACK_REG + 4: return ".word\t0xc4df";
- case FIRST_STACK_REG + 5: return ".word\t0xc5df";
- case FIRST_STACK_REG + 6: return ".word\t0xc6df";
- case FIRST_STACK_REG + 7: return ".word\t0xc7df";
- }
-#endif
-
- return opno ? "fstp\t%y1" : "fstp\t%y0";
-}
-
-
-/* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
- should be used. UNORDERED_P is true when fucom should be used. */
-
-const char *
-output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
-{
- int stack_top_dies;
- rtx cmp_op0, cmp_op1;
- int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
-
- if (eflags_p)
- {
- cmp_op0 = operands[0];
- cmp_op1 = operands[1];
- }
- else
- {
- cmp_op0 = operands[1];
- cmp_op1 = operands[2];
- }
-
- if (is_sse)
- {
- if (GET_MODE (operands[0]) == SFmode)
- if (unordered_p)
- return "ucomiss\t{%1, %0|%0, %1}";
- else
- return "comiss\t{%1, %0|%0, %1}";
- else
- if (unordered_p)
- return "ucomisd\t{%1, %0|%0, %1}";
- else
- return "comisd\t{%1, %0|%0, %1}";
- }
-
- gcc_assert (STACK_TOP_P (cmp_op0));
-
- stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
-
- if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
- {
- if (stack_top_dies)
- {
- output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
- return output_387_ffreep (operands, 1);
- }
- else
- return "ftst\n\tfnstsw\t%0";
- }
-
- if (STACK_REG_P (cmp_op1)
- && stack_top_dies
- && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
- && REGNO (cmp_op1) != FIRST_STACK_REG)
- {
- /* If both the top of the 387 stack dies, and the other operand
- is also a stack register that dies, then this must be a
- `fcompp' float compare */
-
- if (eflags_p)
- {
- /* There is no double popping fcomi variant. Fortunately,
- eflags is immune from the fstp's cc clobbering. */
- if (unordered_p)
- output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
- else
- output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
- return output_387_ffreep (operands, 0);
- }
- else
- {
- if (unordered_p)
- return "fucompp\n\tfnstsw\t%0";
- else
- return "fcompp\n\tfnstsw\t%0";
- }
- }
- else
- {
- /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
-
- static const char * const alt[16] =
- {
- "fcom%z2\t%y2\n\tfnstsw\t%0",
- "fcomp%z2\t%y2\n\tfnstsw\t%0",
- "fucom%z2\t%y2\n\tfnstsw\t%0",
- "fucomp%z2\t%y2\n\tfnstsw\t%0",
-
- "ficom%z2\t%y2\n\tfnstsw\t%0",
- "ficomp%z2\t%y2\n\tfnstsw\t%0",
- NULL,
- NULL,
-
- "fcomi\t{%y1, %0|%0, %y1}",
- "fcomip\t{%y1, %0|%0, %y1}",
- "fucomi\t{%y1, %0|%0, %y1}",
- "fucomip\t{%y1, %0|%0, %y1}",
-
- NULL,
- NULL,
- NULL,
- NULL
- };
-
- int mask;
- const char *ret;
-
- mask = eflags_p << 3;
- mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
- mask |= unordered_p << 1;
- mask |= stack_top_dies;
-
- gcc_assert (mask < 16);
- ret = alt[mask];
- gcc_assert (ret);
-
- return ret;
- }
-}
-
-void
-ix86_output_addr_vec_elt (FILE *file, int value)
-{
- const char *directive = ASM_LONG;
-
-#ifdef ASM_QUAD
- if (TARGET_64BIT)
- directive = ASM_QUAD;
-#else
- gcc_assert (!TARGET_64BIT);
-#endif
-
- fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
-}
-
-void
-ix86_output_addr_diff_elt (FILE *file, int value, int rel)
-{
- if (TARGET_64BIT)
- fprintf (file, "%s%s%d-%s%d\n",
- ASM_LONG, LPREFIX, value, LPREFIX, rel);
- else if (HAVE_AS_GOTOFF_IN_DATA)
- fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
-#if TARGET_MACHO
- else if (TARGET_MACHO)
- {
- fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
- machopic_output_function_base_name (file);
- fprintf(file, "\n");
- }
-#endif
- else
- asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
- ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
-}
-
-/* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
- for the target. */
-
-void
-ix86_expand_clear (rtx dest)
-{
- rtx tmp;
-
- /* We play register width games, which are only valid after reload. */
- gcc_assert (reload_completed);
-
- /* Avoid HImode and its attendant prefix byte. */
- if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
- dest = gen_rtx_REG (SImode, REGNO (dest));
-
- tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
-
- /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
- if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
- {
- rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
- tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
- }
-
- emit_insn (tmp);
-}
-
-/* X is an unchanging MEM. If it is a constant pool reference, return
- the constant pool rtx, else NULL. */
-
-rtx
-maybe_get_pool_constant (rtx x)
-{
- x = ix86_delegitimize_address (XEXP (x, 0));
-
- if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
- return get_pool_constant (x);
-
- return NULL_RTX;
-}
-
-void
-ix86_expand_move (enum machine_mode mode, rtx operands[])
-{
- int strict = (reload_in_progress || reload_completed);
- /* APPLE LOCAL dynamic-no-pic */
- rtx insn, op0, op1;
- enum tls_model model;
-
- op0 = operands[0];
- op1 = operands[1];
-
- if (GET_CODE (op1) == SYMBOL_REF)
- {
- model = SYMBOL_REF_TLS_MODEL (op1);
- if (model)
- {
- op1 = legitimize_tls_address (op1, model, true);
- op1 = force_operand (op1, op0);
- if (op1 == op0)
- return;
- }
- }
- else if (GET_CODE (op1) == CONST
- && GET_CODE (XEXP (op1, 0)) == PLUS
- && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
- {
- model = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1, 0), 0));
- if (model)
- {
- rtx addend = XEXP (XEXP (op1, 0), 1);
- op1 = legitimize_tls_address (XEXP (XEXP (op1, 0), 0), model, true);
- op1 = force_operand (op1, NULL);
- op1 = expand_simple_binop (Pmode, PLUS, op1, addend,
- op0, 1, OPTAB_DIRECT);
- if (op1 == op0)
- return;
- }
- }
-
- /* APPLE LOCAL begin dynamic-no-pic */
- /* allow macho & macho for x86_64 to coexist */
- if (((TARGET_MACHO && MACHOPIC_INDIRECT)
- || flag_pic)
- && mode == Pmode && symbolic_operand (op1, Pmode))
- /* APPLE LOCAL end dynamic-no-pic */
- {
- if (TARGET_MACHO && !TARGET_64BIT)
- {
-#if TARGET_MACHO
- /* APPLE LOCAL begin dynamic-no-pic */
- if (MACHOPIC_INDIRECT)
- {
- rtx temp = ((reload_in_progress
- || ((op0 && GET_CODE (op0) == REG)
- && mode == Pmode))
- ? op0 : gen_reg_rtx (Pmode));
- op1 = machopic_indirect_data_reference (op1, temp);
- if (MACHOPIC_PURE)
- op1 = machopic_legitimize_pic_address (op1, mode,
- temp == op1 ? 0 : temp);
- }
- if (op0 != op1 && GET_CODE (op0) != MEM)
- {
- insn = gen_rtx_SET (VOIDmode, op0, op1);
- emit_insn (insn);
- return;
- }
- if (GET_CODE (op0) == MEM)
- op1 = force_reg (Pmode, op1);
- else
- {
- rtx temp = op0;
- if (GET_CODE (temp) != REG)
- temp = gen_reg_rtx (Pmode);
- temp = legitimize_pic_address (op1, temp);
- if (temp == op0)
- return;
- op1 = temp;
- }
- /* APPLE LOCAL end dynamic-no-pic */
-#endif
- }
- else
- {
- if (GET_CODE (op0) == MEM)
- op1 = force_reg (Pmode, op1);
- else
- op1 = legitimize_address (op1, op1, Pmode);
- }
- }
- else
- {
- if (GET_CODE (op0) == MEM
- && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
- || !push_operand (op0, mode))
- && GET_CODE (op1) == MEM)
- op1 = force_reg (mode, op1);
-
- if (push_operand (op0, mode)
- && ! general_no_elim_operand (op1, mode))
- op1 = copy_to_mode_reg (mode, op1);
-
- /* Force large constants in 64bit compilation into register
- to get them CSEed. */
- if (TARGET_64BIT && mode == DImode
- && immediate_operand (op1, mode)
- && !x86_64_zext_immediate_operand (op1, VOIDmode)
- && !register_operand (op0, mode)
- && optimize && !reload_completed && !reload_in_progress)
- op1 = copy_to_mode_reg (mode, op1);
-
- if (FLOAT_MODE_P (mode))
- {
- /* If we are loading a floating point constant to a register,
- force the value to memory now, since we'll get better code
- out the back end. */
-
- if (strict)
- ;
- else if (GET_CODE (op1) == CONST_DOUBLE)
- {
- op1 = validize_mem (force_const_mem (mode, op1));
- if (!register_operand (op0, mode))
- {
- rtx temp = gen_reg_rtx (mode);
- emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
- emit_move_insn (op0, temp);
- return;
- }
- }
- }
- }
-
- emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
-}
-
-void
-ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
-{
- rtx op0 = operands[0], op1 = operands[1];
- /* APPLE LOCAL begin radar 4614623 */
- cfun->uses_vector = 1;
- /* APPLE LOCAL end radar 4614623 */
-
- /* Force constants other than zero into memory. We do not know how
- the instructions used to build constants modify the upper 64 bits
- of the register, once we have that information we may be able
- to handle some of them more efficiently. */
- if ((reload_in_progress | reload_completed) == 0
- && register_operand (op0, mode)
- && CONSTANT_P (op1)
- && standard_sse_constant_p (op1) <= 0)
- op1 = validize_mem (force_const_mem (mode, op1));
-
- /* Make operand1 a register if it isn't already. */
- if (!no_new_pseudos
- && !register_operand (op0, mode)
- && !register_operand (op1, mode))
- {
- emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
- return;
- }
-
- emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
-}
-
-/* Implement the movmisalign patterns for SSE. Non-SSE modes go
- straight to ix86_expand_vector_move. */
-
-void
-ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
-{
- rtx op0, op1, m;
-
- op0 = operands[0];
- op1 = operands[1];
-
- if (MEM_P (op1))
- {
- /* If we're optimizing for size, movups is the smallest. */
- if (optimize_size)
- {
- op0 = gen_lowpart (V4SFmode, op0);
- op1 = gen_lowpart (V4SFmode, op1);
- emit_insn (gen_sse_movups (op0, op1));
- return;
- }
-
- /* ??? If we have typed data, then it would appear that using
- movdqu is the only way to get unaligned data loaded with
- integer type. */
- if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
- {
- op0 = gen_lowpart (V16QImode, op0);
- op1 = gen_lowpart (V16QImode, op1);
- emit_insn (gen_sse2_movdqu (op0, op1));
- return;
- }
-
- if (TARGET_SSE2 && mode == V2DFmode)
- {
- rtx zero;
-
- /* When SSE registers are split into halves, we can avoid
- writing to the top half twice. */
- if (TARGET_SSE_SPLIT_REGS)
- {
- emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
- zero = op0;
- }
- else
- {
- /* ??? Not sure about the best option for the Intel chips.
- The following would seem to satisfy; the register is
- entirely cleared, breaking the dependency chain. We
- then store to the upper half, with a dependency depth
- of one. A rumor has it that Intel recommends two movsd
- followed by an unpacklpd, but this is unconfirmed. And
- given that the dependency depth of the unpacklpd would
- still be one, I'm not sure why this would be better. */
- zero = CONST0_RTX (V2DFmode);
- }
-
- m = adjust_address (op1, DFmode, 0);
- emit_insn (gen_sse2_loadlpd (op0, zero, m));
- m = adjust_address (op1, DFmode, 8);
- emit_insn (gen_sse2_loadhpd (op0, op0, m));
- }
- else
- {
- if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
- emit_move_insn (op0, CONST0_RTX (mode));
- else
- emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
-
- if (mode != V4SFmode)
- op0 = gen_lowpart (V4SFmode, op0);
- m = adjust_address (op1, V2SFmode, 0);
- emit_insn (gen_sse_loadlps (op0, op0, m));
- m = adjust_address (op1, V2SFmode, 8);
- emit_insn (gen_sse_loadhps (op0, op0, m));
- }
- }
- else if (MEM_P (op0))
- {
- /* If we're optimizing for size, movups is the smallest. */
- if (optimize_size)
- {
- op0 = gen_lowpart (V4SFmode, op0);
- op1 = gen_lowpart (V4SFmode, op1);
- emit_insn (gen_sse_movups (op0, op1));
- return;
- }
-
- /* ??? Similar to above, only less clear because of quote
- typeless stores unquote. */
- if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
- && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
- {
- op0 = gen_lowpart (V16QImode, op0);
- op1 = gen_lowpart (V16QImode, op1);
- emit_insn (gen_sse2_movdqu (op0, op1));
- return;
- }
-
- if (TARGET_SSE2 && mode == V2DFmode)
- {
- m = adjust_address (op0, DFmode, 0);
- emit_insn (gen_sse2_storelpd (m, op1));
- m = adjust_address (op0, DFmode, 8);
- emit_insn (gen_sse2_storehpd (m, op1));
- }
- else
- {
- if (mode != V4SFmode)
- op1 = gen_lowpart (V4SFmode, op1);
- m = adjust_address (op0, V2SFmode, 0);
- emit_insn (gen_sse_storelps (m, op1));
- m = adjust_address (op0, V2SFmode, 8);
- emit_insn (gen_sse_storehps (m, op1));
- }
- }
- else
- gcc_unreachable ();
-}
-
-/* Expand a push in MODE. This is some mode for which we do not support
- proper push instructions, at least from the registers that we expect
- the value to live in. */
-
-void
-ix86_expand_push (enum machine_mode mode, rtx x)
-{
- rtx tmp;
-
- tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
- GEN_INT (-GET_MODE_SIZE (mode)),
- stack_pointer_rtx, 1, OPTAB_DIRECT);
- if (tmp != stack_pointer_rtx)
- emit_move_insn (stack_pointer_rtx, tmp);
-
- tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
- emit_move_insn (tmp, x);
-}
-
-/* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
- destination to use for the operation. If different from the true
- destination in operands[0], a copy operation will be required. */
-
-rtx
-ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
- rtx operands[])
-{
- int matching_memory;
- rtx src1, src2, dst;
-
- dst = operands[0];
- src1 = operands[1];
- src2 = operands[2];
-
- /* Recognize <var1> = <value> <op> <var1> for commutative operators */
- if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
- && (rtx_equal_p (dst, src2)
- || immediate_operand (src1, mode)))
- {
- rtx temp = src1;
- src1 = src2;
- src2 = temp;
- }
-
- /* If the destination is memory, and we do not have matching source
- operands, do things in registers. */
- matching_memory = 0;
- if (GET_CODE (dst) == MEM)
- {
- if (rtx_equal_p (dst, src1))
- matching_memory = 1;
- else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
- && rtx_equal_p (dst, src2))
- matching_memory = 2;
- else
- dst = gen_reg_rtx (mode);
- }
-
- /* Both source operands cannot be in memory. */
- if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
- {
- if (matching_memory != 2)
- src2 = force_reg (mode, src2);
- else
- src1 = force_reg (mode, src1);
- }
-
- /* If the operation is not commutable, source 1 cannot be a constant
- or non-matching memory. */
- if ((CONSTANT_P (src1)
- || (!matching_memory && GET_CODE (src1) == MEM))
- && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
- src1 = force_reg (mode, src1);
-
- src1 = operands[1] = src1;
- src2 = operands[2] = src2;
- return dst;
-}
-
-/* Similarly, but assume that the destination has already been
- set up properly. */
-
-void
-ix86_fixup_binary_operands_no_copy (enum rtx_code code,
- enum machine_mode mode, rtx operands[])
-{
- rtx dst = ix86_fixup_binary_operands (code, mode, operands);
- gcc_assert (dst == operands[0]);
-}
-
-/* Attempt to expand a binary operator. Make the expansion closer to the
- actual machine, then just general_operand, which will allow 3 separate
- memory references (one output, two input) in a single insn. */
-
-void
-ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
- rtx operands[])
-{
- rtx src1, src2, dst, op, clob;
-
- dst = ix86_fixup_binary_operands (code, mode, operands);
- src1 = operands[1];
- src2 = operands[2];
-
- /* Emit the instruction. */
-
- op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
- if (reload_in_progress)
- {
- /* Reload doesn't know about the flags register, and doesn't know that
- it doesn't want to clobber it. We can only do this with PLUS. */
- gcc_assert (code == PLUS);
- emit_insn (op);
- }
- else
- {
- clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
- emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
- }
-
- /* Fix up the destination if needed. */
- if (dst != operands[0])
- emit_move_insn (operands[0], dst);
-}
-
-/* Return TRUE or FALSE depending on whether the binary operator meets the
- appropriate constraints. */
-
-int
-ix86_binary_operator_ok (enum rtx_code code,
- enum machine_mode mode ATTRIBUTE_UNUSED,
- rtx operands[3])
-{
- /* Both source operands cannot be in memory. */
- if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
- return 0;
- /* If the operation is not commutable, source 1 cannot be a constant. */
- if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
- return 0;
- /* If the destination is memory, we must have a matching source operand. */
- if (GET_CODE (operands[0]) == MEM
- && ! (rtx_equal_p (operands[0], operands[1])
- || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
- && rtx_equal_p (operands[0], operands[2]))))
- return 0;
- /* If the operation is not commutable and the source 1 is memory, we must
- have a matching destination. */
- if (GET_CODE (operands[1]) == MEM
- && GET_RTX_CLASS (code) != RTX_COMM_ARITH
- && ! rtx_equal_p (operands[0], operands[1]))
- return 0;
- return 1;
-}
-
-/* Attempt to expand a unary operator. Make the expansion closer to the
- actual machine, then just general_operand, which will allow 2 separate
- memory references (one output, one input) in a single insn. */
-
-void
-ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
- rtx operands[])
-{
- int matching_memory;
- rtx src, dst, op, clob;
-
- dst = operands[0];
- src = operands[1];
-
- /* If the destination is memory, and we do not have matching source
- operands, do things in registers. */
- matching_memory = 0;
- if (MEM_P (dst))
- {
- if (rtx_equal_p (dst, src))
- matching_memory = 1;
- else
- dst = gen_reg_rtx (mode);
- }
-
- /* When source operand is memory, destination must match. */
- if (MEM_P (src) && !matching_memory)
- src = force_reg (mode, src);
-
- /* Emit the instruction. */
-
- op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
- if (reload_in_progress || code == NOT)
- {
- /* Reload doesn't know about the flags register, and doesn't know that
- it doesn't want to clobber it. */
- gcc_assert (code == NOT);
- emit_insn (op);
- }
- else
- {
- clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
- emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
- }
-
- /* Fix up the destination if needed. */
- if (dst != operands[0])
- emit_move_insn (operands[0], dst);
-}
-
-/* Return TRUE or FALSE depending on whether the unary operator meets the
- appropriate constraints. */
-
-int
-ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
- enum machine_mode mode ATTRIBUTE_UNUSED,
- rtx operands[2] ATTRIBUTE_UNUSED)
-{
- /* If one of operands is memory, source and destination must match. */
- if ((GET_CODE (operands[0]) == MEM
- || GET_CODE (operands[1]) == MEM)
- && ! rtx_equal_p (operands[0], operands[1]))
- return FALSE;
- return TRUE;
-}
-
-/* APPLE LOCAL begin 4176531 4424891 */
-static void
-ix86_expand_vector_move2 (enum machine_mode mode, rtx op0, rtx op1)
-{
- rtx operands[2];
- operands[0] = op0;
- operands[1] = op1;
- ix86_expand_vector_move (mode, operands);
-}
-
-static rtvec
-gen_2_4_rtvec (int scalars_per_vector, rtx val, enum machine_mode mode)
-{
- rtvec rval;
- switch (scalars_per_vector)
- {
- case 2: rval = gen_rtvec (2, val, CONST0_RTX (mode));
- break;
- case 4: rval = gen_rtvec (4, val, CONST0_RTX (mode),
- CONST0_RTX (mode), CONST0_RTX (mode));
- break;
- default: abort ();
- }
- return rval;
-}
-
-/* Convert a DFmode value in an SSE register into an unsigned SImode.
- When -fpmath=387, this is done with an x87 st(0)_FP->signed-int-64
- conversion, and ignoring the upper 32 bits of the result. On
- x86_64, there is an equivalent SSE %xmm->signed-int-64 conversion.
- On x86_32, we don't have the instruction, nor the 64-bit
- destination register it requires. Do the conversion inline in the
- SSE registers. Requires SSE2. For x86_32, -mfpmath=sse,
- !optimize_size only. */
-const char *
-ix86_expand_convert_uns_DF2SI_sse (rtx operands[])
-{
- rtx int_zero_as_fp, int_maxval_as_fp, int_two31_as_fp;
- REAL_VALUE_TYPE rvt_zero, rvt_int_maxval, rvt_int_two31;
- rtx int_zero_as_xmm, int_maxval_as_xmm;
- rtx fp_value = operands[1];
- rtx target = operands[0];
- rtx large_xmm;
- rtx large_xmm_v2di;
- rtx le_op;
- rtx zero_or_two31_xmm;
- rtx final_result_rtx;
- rtx v_rtx;
- rtx incoming_value;
-
- cfun->uses_vector = 1;
-
- real_from_integer (&rvt_zero, DFmode, 0ULL, 0ULL, 1);
- int_zero_as_fp = const_double_from_real_value (rvt_zero, DFmode);
-
- real_from_integer (&rvt_int_maxval, DFmode, 0xffffffffULL, 0ULL, 1);
- int_maxval_as_fp = const_double_from_real_value (rvt_int_maxval, DFmode);
-
- real_from_integer (&rvt_int_two31, DFmode, 0x80000000ULL, 0ULL, 1);
- int_two31_as_fp = const_double_from_real_value (rvt_int_two31, DFmode);
-
- incoming_value = force_reg (GET_MODE (operands[1]), operands[1]);
-
- gcc_assert (ix86_preferred_stack_boundary >= 128);
-
- fp_value = gen_reg_rtx (V2DFmode);
- ix86_expand_vector_move2 (V2DFmode, fp_value,
- gen_rtx_SUBREG (V2DFmode, incoming_value, 0));
- large_xmm = gen_reg_rtx (V2DFmode);
-
- v_rtx = gen_rtx_CONST_VECTOR (V2DFmode,
- gen_2_4_rtvec (2, int_two31_as_fp, DFmode));
- ix86_expand_vector_move2 (DFmode, large_xmm, v_rtx);
- le_op = gen_rtx_fmt_ee (LE, V2DFmode,
- gen_rtx_SUBREG (V2DFmode, fp_value, 0), large_xmm);
- /* large_xmm = (fp_value >= 2**31) ? -1 : 0 ; */
- emit_insn (gen_sse2_vmmaskcmpv2df3 (large_xmm, large_xmm, fp_value, le_op));
-
- int_maxval_as_xmm = gen_reg_rtx (V2DFmode);
- v_rtx = gen_rtx_CONST_VECTOR (V2DFmode,
- gen_2_4_rtvec (2, int_maxval_as_fp, DFmode));
- ix86_expand_vector_move2 (DFmode, int_maxval_as_xmm, v_rtx);
-
- emit_insn (gen_sse2_vmsminv2df3 (fp_value, fp_value, int_maxval_as_xmm));
-
- int_zero_as_xmm = gen_reg_rtx (V2DFmode);
- v_rtx = gen_rtx_CONST_VECTOR (V2DFmode,
- gen_2_4_rtvec (2, int_zero_as_fp, DFmode));
-
- ix86_expand_vector_move2 (DFmode, int_zero_as_xmm, v_rtx);
-
- emit_insn (gen_sse2_vmsmaxv2df3 (fp_value, fp_value, int_zero_as_xmm));
-
- zero_or_two31_xmm = gen_reg_rtx (V2DFmode);
- v_rtx = gen_rtx_CONST_VECTOR (V2DFmode,
- gen_2_4_rtvec (2, int_two31_as_fp, DFmode));
- ix86_expand_vector_move2 (DFmode, zero_or_two31_xmm, v_rtx);
-
- /* zero_or_two31 = (large_xmm) ? 2**31 : 0; */
- emit_insn (gen_andv2df3 (zero_or_two31_xmm, zero_or_two31_xmm, large_xmm));
- /* if (large_xmm) fp_value -= 2**31; */
- emit_insn (gen_subv2df3 (fp_value, fp_value, zero_or_two31_xmm));
- /* assert (0 <= fp_value && fp_value < 2**31);
- int_result = trunc (fp_value); */
- final_result_rtx = gen_reg_rtx (V4SImode);
- emit_insn (gen_sse2_cvttpd2dq (final_result_rtx, fp_value));
-
- large_xmm_v2di = gen_reg_rtx (V2DImode);
- emit_move_insn (large_xmm_v2di, gen_rtx_SUBREG (V2DImode, large_xmm, 0));
- emit_insn (gen_ashlv2di3 (large_xmm_v2di, large_xmm_v2di,
- gen_rtx_CONST_INT (SImode, 31)));
-
- emit_insn (gen_xorv4si3 (final_result_rtx, final_result_rtx,
- gen_rtx_SUBREG (V4SImode, large_xmm_v2di, 0)));
- if (!rtx_equal_p (target, final_result_rtx))
- emit_insn (gen_sse2_stored (target, final_result_rtx));
- return "";
-}
-
-/* Convert a SFmode value in an SSE register into an unsigned DImode.
- When -fpmath=387, this is done with an x87 st(0)_FP->signed-int-64
- conversion, and subsequently ignoring the upper 32 bits of the
- result. On x86_64, there is an equivalent SSE %xmm->signed-int-64
- conversion. On x86_32, we don't have the instruction, nor the
- 64-bit destination register it requires. Do the conversion inline
- in the SSE registers. Requires SSE2. For x86_32, -mfpmath=sse,
- !optimize_size only. */
-const char *
-ix86_expand_convert_uns_SF2SI_sse (rtx operands[])
-{
- rtx int_zero_as_fp, int_two31_as_fp, int_two32_as_fp;
- REAL_VALUE_TYPE rvt_zero, rvt_int_two31, rvt_int_two32;
- rtx int_zero_as_xmm;
- rtx fp_value = operands[1];
- rtx target = operands[0];
- rtx large_xmm;
- rtx two31_xmm, two32_xmm;
- rtx above_two31_xmm, above_two32_xmm;
- rtx zero_or_two31_SI_xmm;
- rtx le_op;
- rtx zero_or_two31_SF_xmm;
- rtx int_result_xmm;
- rtx v_rtx;
- rtx incoming_value;
-
- cfun->uses_vector = 1;
-
- real_from_integer (&rvt_zero, SFmode, 0ULL, 0ULL, 1);
- int_zero_as_fp = const_double_from_real_value (rvt_zero, SFmode);
-
- real_from_integer (&rvt_int_two31, SFmode, 0x80000000ULL, 0ULL, 1);
- int_two31_as_fp = const_double_from_real_value (rvt_int_two31, SFmode);
-
- real_from_integer (&rvt_int_two32, SFmode, (HOST_WIDE_INT)0x100000000ULL,
- 0ULL, 1);
- int_two32_as_fp = const_double_from_real_value (rvt_int_two32, SFmode);
-
- incoming_value = force_reg (GET_MODE (operands[1]), operands[1]);
-
- gcc_assert (ix86_preferred_stack_boundary >= 128);
-
- fp_value = gen_reg_rtx (V4SFmode);
- ix86_expand_vector_move2 (V4SFmode, fp_value,
- gen_rtx_SUBREG (V4SFmode, incoming_value, 0));
- large_xmm = gen_reg_rtx (V4SFmode);
-
- /* fp_value = MAX (fp_value, 0.0); */
- /* Preclude negative values; truncate at zero. */
- int_zero_as_xmm = gen_reg_rtx (V4SFmode);
- v_rtx = gen_rtx_CONST_VECTOR (V4SFmode,
- gen_2_4_rtvec (4, int_zero_as_fp, SFmode));
- ix86_expand_vector_move2 (SFmode, int_zero_as_xmm, v_rtx);
- emit_insn (gen_sse_vmsmaxv4sf3 (fp_value, fp_value, int_zero_as_xmm));
-
- /* two31_xmm = 0x8000000; */
- two31_xmm = gen_reg_rtx (V4SFmode);
- v_rtx = gen_rtx_CONST_VECTOR (V4SFmode,
- gen_2_4_rtvec (4, int_two31_as_fp, SFmode));
- ix86_expand_vector_move2 (SFmode, two31_xmm, v_rtx);
-
- /* zero_or_two31_xmm = 0x8000000; */
- zero_or_two31_SF_xmm = gen_reg_rtx (V4SFmode);
- ix86_expand_vector_move2 (SFmode, zero_or_two31_SF_xmm, two31_xmm);
-
- /* above_two31_xmm = (fp_value >= 2**31) ? 0xffff_ffff : 0 ; */
- above_two31_xmm = gen_reg_rtx (V4SFmode);
- ix86_expand_vector_move2 (SFmode, above_two31_xmm, two31_xmm);
- le_op = gen_rtx_fmt_ee (LE, V4SFmode, above_two31_xmm,
- gen_rtx_SUBREG (V4SFmode, two31_xmm, 0));
- emit_insn (gen_sse_vmmaskcmpv4sf3 (above_two31_xmm, above_two31_xmm,
- fp_value, le_op));
-
- /* two32_xmm = 0x1_0000_0000; */
- two32_xmm = gen_reg_rtx (V4SFmode);
- v_rtx = gen_rtx_CONST_VECTOR (V4SFmode,
- gen_2_4_rtvec (4, int_two32_as_fp, SFmode));
- ix86_expand_vector_move2 (SFmode, two32_xmm, v_rtx);
-
- /* above_two32_xmm = (fp_value >= 2**32) ? 0xffff_ffff : 0 ; */
- above_two32_xmm = gen_reg_rtx (V4SFmode);
- ix86_expand_vector_move2 (SFmode, above_two32_xmm, two32_xmm);
- le_op = gen_rtx_fmt_ee (LE, V4SFmode, above_two32_xmm,
- gen_rtx_SUBREG (V4SFmode, two32_xmm, 0));
- emit_insn (gen_sse_vmmaskcmpv4sf3 (above_two32_xmm, above_two32_xmm,
- fp_value, le_op));
-
- /* zero_or_two31_SF_xmm = (above_two31_xmm) ? 2**31 : 0; */
- emit_insn (gen_andv4sf3 (zero_or_two31_SF_xmm, zero_or_two31_SF_xmm,
- above_two31_xmm));
-
- /* zero_or_two31_SI_xmm = (above_two31_xmm & 0x8000_0000); */
- zero_or_two31_SI_xmm = gen_reg_rtx (V4SImode);
- emit_move_insn (zero_or_two31_SI_xmm,
- gen_rtx_SUBREG (V4SImode, above_two31_xmm, 0));
- emit_insn (gen_ashlv4si3 (zero_or_two31_SI_xmm, zero_or_two31_SI_xmm,
- gen_rtx_CONST_INT (SImode, 31)));
-
- /* zero_or_two31_SI_xmm = (above_two_31_xmm << 31); */
- zero_or_two31_SI_xmm = gen_reg_rtx (V4SImode);
- emit_move_insn (zero_or_two31_SI_xmm,
- gen_rtx_SUBREG (V4SImode, above_two31_xmm, 0));
- emit_insn (gen_ashlv4si3 (zero_or_two31_SI_xmm, zero_or_two31_SI_xmm,
- gen_rtx_CONST_INT (SImode, 31)));
-
- /* if (above_two31_xmm) fp_value -= 2**31; */
- /* If the input FP value is greater than 2**31, subtract that amount
- from the FP value before conversion. We'll re-add that amount as
- an integer after the conversion. */
- emit_insn (gen_subv4sf3 (fp_value, fp_value, zero_or_two31_SF_xmm));
-
- /* assert (0.0 <= fp_value && fp_value < 2**31);
- int_result_xmm = trunc (fp_value); */
- /* Apply the SSE double -> signed_int32 conversion to our biased,
- clamped SF value. */
- int_result_xmm = gen_reg_rtx (V4SImode);
- emit_insn (gen_sse2_cvttps2dq (int_result_xmm, fp_value));
-
- /* int_result_xmm += zero_or_two_31_SI_xmm; */
- /* Restore the 2**31 bias we may have subtracted earlier. If the
- input FP value was between 2**31 and 2**32, this will unbias the
- result.
-
- input_fp_value < 2**31: this won't change the value
- 2**31 <= input_fp_value < 2**32:
- this will restore the 2**31 bias we subtracted earler
- input_fp_value >= 2**32: this insn doesn't matter;
- the next insn will clobber this result
- */
- emit_insn (gen_addv4si3 (int_result_xmm, int_result_xmm,
- zero_or_two31_SI_xmm));
-
- /* int_result_xmm |= above_two32_xmm; */
- /* If the input value was greater than 2**32, force the integral
- result to 0xffff_ffff. */
- emit_insn (gen_iorv4si3 (int_result_xmm, int_result_xmm,
- gen_rtx_SUBREG (V4SImode, above_two32_xmm, 0)));
-
- if (!rtx_equal_p (target, int_result_xmm))
- emit_insn (gen_sse2_stored (target, int_result_xmm));
- return "";
-}
-
-/* Convert an unsigned DImode value into a DFmode, using only SSE.
- Expects the 64-bit DImode to be supplied as two 32-bit parts in two
- SSE %xmm registers; result returned in an %xmm register. Requires
- SSE2; will use SSE3 if available. For x86_32, -mfpmath=sse,
- !optimize_size only. */
-const char *
-ix86_expand_convert_uns_DI2DF_sse (rtx operands[])
-{
- REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
- rtx bias_lo_rtx, bias_hi_rtx;
- rtx target = operands[0];
- rtx fp_value = operands[1];
- rtx fp_value_hi, fp_value_lo;
- rtx fp_value_hi_xmm, fp_value_lo_xmm;
- rtx int_xmm;
- rtx final_result_xmm, result_lo_xmm;
- rtx biases, exponents;
- rtvec biases_rtvec, exponents_rtvec;
-
- cfun->uses_vector = 1;
-
- gcc_assert (ix86_preferred_stack_boundary >= 128);
-
- int_xmm = gen_reg_rtx (V4SImode);
-
- fp_value = force_reg (GET_MODE (operands[1]), operands[1]);
-
- fp_value_lo = gen_rtx_SUBREG (SImode, fp_value, 0);
- fp_value_lo_xmm = gen_reg_rtx (V4SImode);
- emit_insn (gen_sse2_loadld (fp_value_lo_xmm, CONST0_RTX (V4SImode),
- fp_value_lo));
-
- fp_value_hi = gen_rtx_SUBREG (SImode, fp_value, 4);
- fp_value_hi_xmm = gen_reg_rtx (V4SImode);
- emit_insn (gen_sse2_loadld (fp_value_hi_xmm, CONST0_RTX (V4SImode),
- fp_value_hi));
-
- ix86_expand_vector_move2 (V4SImode, int_xmm, fp_value_hi_xmm);
- emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, fp_value_lo_xmm));
-
- exponents_rtvec = gen_rtvec (4, GEN_INT (0x45300000UL),
- GEN_INT (0x43300000UL),
- CONST0_RTX (SImode), CONST0_RTX (SImode));
- exponents = validize_mem (
- force_const_mem (V4SImode, gen_rtx_CONST_VECTOR (V4SImode,
- exponents_rtvec)));
- emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
-
- final_result_xmm = gen_reg_rtx (V2DFmode);
- ix86_expand_vector_move2 (V2DFmode, final_result_xmm,
- gen_rtx_SUBREG (V2DFmode, int_xmm, 0));
-
- /* Integral versions of the DFmode 'exponents' above. */
- REAL_VALUE_FROM_INT (bias_lo_rvt, 0x00000000000000ULL, 0x100000ULL, DFmode);
- REAL_VALUE_FROM_INT (bias_hi_rvt, 0x10000000000000ULL, 0x000000ULL, DFmode);
- bias_lo_rtx = CONST_DOUBLE_FROM_REAL_VALUE (bias_lo_rvt, DFmode);
- bias_hi_rtx = CONST_DOUBLE_FROM_REAL_VALUE (bias_hi_rvt, DFmode);
- biases_rtvec = gen_rtvec (2, bias_lo_rtx, bias_hi_rtx);
- biases = validize_mem (force_const_mem (V2DFmode,
- gen_rtx_CONST_VECTOR (V2DFmode,
- biases_rtvec)));
- emit_insn (gen_subv2df3 (final_result_xmm, final_result_xmm, biases));
-
- if (TARGET_SSE3)
- {
- emit_insn (gen_sse3_haddv2df3 (final_result_xmm, final_result_xmm,
- final_result_xmm));
- }
- else
- {
- result_lo_xmm = gen_reg_rtx (V2DFmode);
- ix86_expand_vector_move2 (V2DFmode, result_lo_xmm, final_result_xmm);
- emit_insn (gen_sse2_unpckhpd (final_result_xmm, final_result_xmm,
- final_result_xmm));
- emit_insn (gen_addv2df3 (final_result_xmm, final_result_xmm,
- result_lo_xmm));
- }
-
- if (!rtx_equal_p (target, final_result_xmm))
- emit_move_insn (target, gen_rtx_SUBREG (DFmode, final_result_xmm, 0));
-
- return "";
-}
-/* APPLE LOCAL end 4176531 4424891 */
-
-/* APPLE LOCAL begin 4424891 */
-/* Convert an unsigned SImode value into a DFmode, using only SSE.
- Result returned in an %xmm register. For x86_32, -mfpmath=sse,
- !optimize_size only. */
-const char *
-ix86_expand_convert_uns_SI2DF_sse (rtx operands[])
-{
- REAL_VALUE_TYPE rvt_int_two31;
- rtx int_value_reg;
- rtx fp_value_xmm, fp_value_as_int_xmm;
- rtx final_result_xmm;
- rtx int_two31_as_fp, int_two31_as_fp_vec;
- rtx v_rtx;
- rtx target = operands[0];
-
- gcc_assert (ix86_preferred_stack_boundary >= 128);
- gcc_assert (GET_MODE (operands[1]) == SImode);
-
- cfun->uses_vector = 1;
-
- int_value_reg = gen_reg_rtx (SImode);
- emit_move_insn (int_value_reg, operands[1]);
- emit_insn (gen_addsi3 (int_value_reg, int_value_reg,
- GEN_INT (-2147483648LL /* MIN_INT */)));
-
- fp_value_as_int_xmm = gen_reg_rtx (V4SImode);
- emit_insn (gen_sse2_loadld (fp_value_as_int_xmm, CONST0_RTX (V4SImode),
- int_value_reg));
-
- fp_value_xmm = gen_reg_rtx (V2DFmode);
- emit_insn (gen_sse2_cvtdq2pd (fp_value_xmm,
- gen_rtx_SUBREG (V4SImode,
- fp_value_as_int_xmm, 0)));
-
- real_from_integer (&rvt_int_two31, DFmode, 0x80000000ULL, 0ULL, 1);
- int_two31_as_fp = const_double_from_real_value (rvt_int_two31, DFmode);
- v_rtx = gen_rtx_CONST_VECTOR (V2DFmode,
- gen_2_4_rtvec (2, int_two31_as_fp, DFmode));
-
- int_two31_as_fp_vec = validize_mem (force_const_mem (V2DFmode, v_rtx));
-
- final_result_xmm = gen_reg_rtx (V2DFmode);
- emit_move_insn (final_result_xmm, fp_value_xmm);
- emit_insn (gen_sse2_vmaddv2df3 (final_result_xmm, final_result_xmm,
- int_two31_as_fp_vec));
-
- if (!rtx_equal_p (target, final_result_xmm))
- emit_move_insn (target, gen_rtx_SUBREG (DFmode, final_result_xmm, 0));
-
- return "";
-}
-
-/* Convert a signed DImode value into a DFmode, using only SSE.
- Result returned in an %xmm register. For x86_32, -mfpmath=sse,
- !optimize_size only. */
-const char *
-ix86_expand_convert_sign_DI2DF_sse (rtx operands[])
-{
- rtx my_operands[2];
- REAL_VALUE_TYPE rvt_int_two32;
- rtx rvt_int_two32_vec;
- rtx fp_value_hi_xmm, fp_value_hi_shifted_xmm;
- rtx final_result_xmm;
- rtx int_two32_as_fp, int_two32_as_fp_vec;
- rtx target = operands[0];
- rtx input = force_reg (DImode, operands[1]);
-
- gcc_assert (ix86_preferred_stack_boundary >= 128);
- gcc_assert (GET_MODE (input) == DImode);
-
- cfun->uses_vector = 1;
-
- fp_value_hi_xmm = gen_reg_rtx (V2DFmode);
- emit_insn (gen_sse2_cvtsi2sd (fp_value_hi_xmm, fp_value_hi_xmm,
- gen_rtx_SUBREG (SImode, input, 4)));
-
- real_from_integer (&rvt_int_two32, DFmode, 0x100000000ULL, 0ULL, 1);
- int_two32_as_fp = const_double_from_real_value (rvt_int_two32, DFmode);
- rvt_int_two32_vec = gen_rtx_CONST_VECTOR (V2DFmode,
- gen_2_4_rtvec (2, int_two32_as_fp, DFmode));
-
- int_two32_as_fp_vec = validize_mem (force_const_mem (V2DFmode,
- rvt_int_two32_vec));
-
- fp_value_hi_shifted_xmm = gen_reg_rtx (V2DFmode);
- emit_move_insn (fp_value_hi_shifted_xmm, fp_value_hi_xmm);
- emit_insn (gen_sse2_vmmulv2df3 (fp_value_hi_shifted_xmm,
- fp_value_hi_shifted_xmm,
- int_two32_as_fp_vec));
-
- my_operands[0] = gen_reg_rtx (DFmode);
- my_operands[1] = gen_rtx_SUBREG (SImode, input, 0);
- (void) ix86_expand_convert_uns_SI2DF_sse (my_operands);
-
- final_result_xmm = REG_P (target) && GET_MODE (target) == V2DFmode
- ? target : gen_reg_rtx (V2DFmode);
- emit_move_insn (final_result_xmm, gen_rtx_SUBREG (V2DFmode,
- my_operands[0], 0));
- emit_insn (gen_sse2_vmaddv2df3 (final_result_xmm, final_result_xmm,
- fp_value_hi_shifted_xmm));
-
- if (!rtx_equal_p (target, final_result_xmm))
- emit_move_insn (target, gen_rtx_SUBREG (DFmode, final_result_xmm, 0));
-
- return "";
-}
-/* APPLE LOCAL end 4424891 */
-
-/* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
- Create a mask for the sign bit in MODE for an SSE register. If VECT is
- true, then replicate the mask for all elements of the vector register.
- If INVERT is true, then create a mask excluding the sign bit. */
-
-rtx
-ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
-{
- enum machine_mode vec_mode;
- HOST_WIDE_INT hi, lo;
- int shift = 63;
- rtvec v;
- rtx mask;
-
- /* Find the sign bit, sign extended to 2*HWI. */
- if (mode == SFmode)
- lo = 0x80000000, hi = lo < 0;
- else if (HOST_BITS_PER_WIDE_INT >= 64)
- lo = (HOST_WIDE_INT)1 << shift, hi = -1;
- else
- lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
-
- if (invert)
- lo = ~lo, hi = ~hi;
-
- /* Force this value into the low part of a fp vector constant. */
- mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
- mask = gen_lowpart (mode, mask);
-
- if (mode == SFmode)
- {
- if (vect)
- v = gen_rtvec (4, mask, mask, mask, mask);
- else
- v = gen_rtvec (4, mask, CONST0_RTX (SFmode),
- CONST0_RTX (SFmode), CONST0_RTX (SFmode));
- vec_mode = V4SFmode;
- }
- else
- {
- if (vect)
- v = gen_rtvec (2, mask, mask);
- else
- v = gen_rtvec (2, mask, CONST0_RTX (DFmode));
- vec_mode = V2DFmode;
- }
-
- return force_reg (vec_mode, gen_rtx_CONST_VECTOR (vec_mode, v));
-}
-
-/* Generate code for floating point ABS or NEG. */
-
-void
-ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
- rtx operands[])
-{
- rtx mask, set, use, clob, dst, src;
- bool matching_memory;
- bool use_sse = false;
- bool vector_mode = VECTOR_MODE_P (mode);
- enum machine_mode elt_mode = mode;
-
- if (vector_mode)
- {
- elt_mode = GET_MODE_INNER (mode);
- use_sse = true;
- }
- else if (TARGET_SSE_MATH)
- use_sse = SSE_FLOAT_MODE_P (mode);
-
- /* NEG and ABS performed with SSE use bitwise mask operations.
- Create the appropriate mask now. */
- if (use_sse)
- mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
- else
- mask = NULL_RTX;
-
- dst = operands[0];
- src = operands[1];
-
- /* If the destination is memory, and we don't have matching source
- operands or we're using the x87, do things in registers. */
- matching_memory = false;
- if (MEM_P (dst))
- {
- if (use_sse && rtx_equal_p (dst, src))
- matching_memory = true;
- else
- dst = gen_reg_rtx (mode);
- }
- if (MEM_P (src) && !matching_memory)
- src = force_reg (mode, src);
-
- if (vector_mode)
- {
- set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
- set = gen_rtx_SET (VOIDmode, dst, set);
- emit_insn (set);
- }
- else
- {
- set = gen_rtx_fmt_e (code, mode, src);
- set = gen_rtx_SET (VOIDmode, dst, set);
- if (mask)
- {
- use = gen_rtx_USE (VOIDmode, mask);
- clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
- emit_insn (gen_rtx_PARALLEL (VOIDmode,
- gen_rtvec (3, set, use, clob)));
- }
- else
- emit_insn (set);
- }
-
- if (dst != operands[0])
- emit_move_insn (operands[0], dst);
-}
-
-/* Expand a copysign operation. Special case operand 0 being a constant. */
-
-void
-ix86_expand_copysign (rtx operands[])
-{
- enum machine_mode mode, vmode;
- rtx dest, op0, op1, mask, nmask;
-
- dest = operands[0];
- op0 = operands[1];
- op1 = operands[2];
-
- mode = GET_MODE (dest);
- vmode = mode == SFmode ? V4SFmode : V2DFmode;
-
- if (GET_CODE (op0) == CONST_DOUBLE)
- {
- rtvec v;
-
- if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
- op0 = simplify_unary_operation (ABS, mode, op0, mode);
-
- if (op0 == CONST0_RTX (mode))
- op0 = CONST0_RTX (vmode);
- else
- {
- if (mode == SFmode)
- v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
- CONST0_RTX (SFmode), CONST0_RTX (SFmode));
- else
- v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
- op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
- }
-
- mask = ix86_build_signbit_mask (mode, 0, 0);
-
- if (mode == SFmode)
- emit_insn (gen_copysignsf3_const (dest, op0, op1, mask));
- else
- emit_insn (gen_copysigndf3_const (dest, op0, op1, mask));
- }
- else
- {
- nmask = ix86_build_signbit_mask (mode, 0, 1);
- mask = ix86_build_signbit_mask (mode, 0, 0);
-
- if (mode == SFmode)
- emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask));
- else
- emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask));
- }
-}
-
-/* Deconstruct a copysign operation into bit masks. Operand 0 is known to
- be a constant, and so has already been expanded into a vector constant. */
-
-void
-ix86_split_copysign_const (rtx operands[])
-{
- enum machine_mode mode, vmode;
- rtx dest, op0, op1, mask, x;
-
- dest = operands[0];
- op0 = operands[1];
- op1 = operands[2];
- mask = operands[3];
-
- mode = GET_MODE (dest);
- vmode = GET_MODE (mask);
-
- dest = simplify_gen_subreg (vmode, dest, mode, 0);
- x = gen_rtx_AND (vmode, dest, mask);
- emit_insn (gen_rtx_SET (VOIDmode, dest, x));
-
- if (op0 != CONST0_RTX (vmode))
- {
- x = gen_rtx_IOR (vmode, dest, op0);
- emit_insn (gen_rtx_SET (VOIDmode, dest, x));
- }
-}
-
-/* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
- so we have to do two masks. */
-
-void
-ix86_split_copysign_var (rtx operands[])
-{
- enum machine_mode mode, vmode;
- rtx dest, scratch, op0, op1, mask, nmask, x;
-
- dest = operands[0];
- scratch = operands[1];
- op0 = operands[2];
- op1 = operands[3];
- nmask = operands[4];
- mask = operands[5];
-
- mode = GET_MODE (dest);
- vmode = GET_MODE (mask);
-
- if (rtx_equal_p (op0, op1))
- {
- /* Shouldn't happen often (it's useless, obviously), but when it does
- we'd generate incorrect code if we continue below. */
- emit_move_insn (dest, op0);
- return;
- }
-
- if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
- {
- gcc_assert (REGNO (op1) == REGNO (scratch));
-
- x = gen_rtx_AND (vmode, scratch, mask);
- emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
-
- dest = mask;
- op0 = simplify_gen_subreg (vmode, op0, mode, 0);
- x = gen_rtx_NOT (vmode, dest);
- x = gen_rtx_AND (vmode, x, op0);
- emit_insn (gen_rtx_SET (VOIDmode, dest, x));
- }
- else
- {
- if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
- {
- x = gen_rtx_AND (vmode, scratch, mask);
- }
- else /* alternative 2,4 */
- {
- gcc_assert (REGNO (mask) == REGNO (scratch));
- op1 = simplify_gen_subreg (vmode, op1, mode, 0);
- x = gen_rtx_AND (vmode, scratch, op1);
- }
- emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
-
- if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
- {
- dest = simplify_gen_subreg (vmode, op0, mode, 0);
- x = gen_rtx_AND (vmode, dest, nmask);
- }
- else /* alternative 3,4 */
- {
- gcc_assert (REGNO (nmask) == REGNO (dest));
- dest = nmask;
- op0 = simplify_gen_subreg (vmode, op0, mode, 0);
- x = gen_rtx_AND (vmode, dest, op0);
- }
- emit_insn (gen_rtx_SET (VOIDmode, dest, x));
- }
-
- x = gen_rtx_IOR (vmode, dest, scratch);
- emit_insn (gen_rtx_SET (VOIDmode, dest, x));
-}
-
-/* Return TRUE or FALSE depending on whether the first SET in INSN
- has source and destination with matching CC modes, and that the
- CC mode is at least as constrained as REQ_MODE. */
-
-int
-ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
-{
- rtx set;
- enum machine_mode set_mode;
-
- set = PATTERN (insn);
- if (GET_CODE (set) == PARALLEL)
- set = XVECEXP (set, 0, 0);
- gcc_assert (GET_CODE (set) == SET);
- gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
-
- set_mode = GET_MODE (SET_DEST (set));
- switch (set_mode)
- {
- case CCNOmode:
- if (req_mode != CCNOmode
- && (req_mode != CCmode
- || XEXP (SET_SRC (set), 1) != const0_rtx))
- return 0;
- break;
- case CCmode:
- if (req_mode == CCGCmode)
- return 0;
- /* FALLTHRU */
- case CCGCmode:
- if (req_mode == CCGOCmode || req_mode == CCNOmode)
- return 0;
- /* FALLTHRU */
- case CCGOCmode:
- if (req_mode == CCZmode)
- return 0;
- /* FALLTHRU */
- case CCZmode:
- break;
-
- default:
- gcc_unreachable ();
- }
-
- return (GET_MODE (SET_SRC (set)) == set_mode);
-}
-
-/* Generate insn patterns to do an integer compare of OPERANDS. */
-
-static rtx
-ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
-{
- enum machine_mode cmpmode;
- rtx tmp, flags;
-
- cmpmode = SELECT_CC_MODE (code, op0, op1);
- flags = gen_rtx_REG (cmpmode, FLAGS_REG);
-
- /* This is very simple, but making the interface the same as in the
- FP case makes the rest of the code easier. */
- tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
- emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
-
- /* Return the test that should be put into the flags user, i.e.
- the bcc, scc, or cmov instruction. */
- return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
-}
-
-/* Figure out whether to use ordered or unordered fp comparisons.
- Return the appropriate mode to use. */
-
-enum machine_mode
-ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
-{
- /* ??? In order to make all comparisons reversible, we do all comparisons
- non-trapping when compiling for IEEE. Once gcc is able to distinguish
- all forms trapping and nontrapping comparisons, we can make inequality
- comparisons trapping again, since it results in better code when using
- FCOM based compares. */
- return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
-}
-
-enum machine_mode
-ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
-{
- if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
- return ix86_fp_compare_mode (code);
- switch (code)
- {
- /* Only zero flag is needed. */
- case EQ: /* ZF=0 */
- case NE: /* ZF!=0 */
- return CCZmode;
- /* Codes needing carry flag. */
- case GEU: /* CF=0 */
- case GTU: /* CF=0 & ZF=0 */
- case LTU: /* CF=1 */
- case LEU: /* CF=1 | ZF=1 */
- return CCmode;
- /* Codes possibly doable only with sign flag when
- comparing against zero. */
- case GE: /* SF=OF or SF=0 */
- case LT: /* SF<>OF or SF=1 */
- if (op1 == const0_rtx)
- return CCGOCmode;
- else
- /* For other cases Carry flag is not required. */
- return CCGCmode;
- /* Codes doable only with sign flag when comparing
- against zero, but we miss jump instruction for it
- so we need to use relational tests against overflow
- that thus needs to be zero. */
- case GT: /* ZF=0 & SF=OF */
- case LE: /* ZF=1 | SF<>OF */
- if (op1 == const0_rtx)
- return CCNOmode;
- else
- return CCGCmode;
- /* strcmp pattern do (use flags) and combine may ask us for proper
- mode. */
- case USE:
- return CCmode;
- default:
- gcc_unreachable ();
- }
-}
-
-/* Return the fixed registers used for condition codes. */
-
-static bool
-ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
-{
- *p1 = FLAGS_REG;
- *p2 = FPSR_REG;
- return true;
-}
-
-/* If two condition code modes are compatible, return a condition code
- mode which is compatible with both. Otherwise, return
- VOIDmode. */
-
-static enum machine_mode
-ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
-{
- if (m1 == m2)
- return m1;
-
- if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
- return VOIDmode;
-
- if ((m1 == CCGCmode && m2 == CCGOCmode)
- || (m1 == CCGOCmode && m2 == CCGCmode))
- return CCGCmode;
-
- switch (m1)
- {
- default:
- gcc_unreachable ();
-
- case CCmode:
- case CCGCmode:
- case CCGOCmode:
- case CCNOmode:
- case CCZmode:
- switch (m2)
- {
- default:
- return VOIDmode;
-
- case CCmode:
- case CCGCmode:
- case CCGOCmode:
- case CCNOmode:
- case CCZmode:
- return CCmode;
- }
-
- case CCFPmode:
- case CCFPUmode:
- /* These are only compatible with themselves, which we already
- checked above. */
- return VOIDmode;
- }
-}
-
-/* Return true if we should use an FCOMI instruction for this fp comparison. */
-
-int
-ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
-{
- enum rtx_code swapped_code = swap_condition (code);
- return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
- || (ix86_fp_comparison_cost (swapped_code)
- == ix86_fp_comparison_fcomi_cost (swapped_code)));
-}
-
-/* Swap, force into registers, or otherwise massage the two operands
- to a fp comparison. The operands are updated in place; the new
- comparison code is returned. */
-
-static enum rtx_code
-ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
-{
- enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
- rtx op0 = *pop0, op1 = *pop1;
- enum machine_mode op_mode = GET_MODE (op0);
- int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
-
- /* All of the unordered compare instructions only work on registers.
- The same is true of the fcomi compare instructions. The XFmode
- compare instructions require registers except when comparing
- against zero or when converting operand 1 from fixed point to
- floating point. */
-
- if (!is_sse
- && (fpcmp_mode == CCFPUmode
- || (op_mode == XFmode
- && ! (standard_80387_constant_p (op0) == 1
- || standard_80387_constant_p (op1) == 1)
- && GET_CODE (op1) != FLOAT)
- || ix86_use_fcomi_compare (code)))
- {
- op0 = force_reg (op_mode, op0);
- op1 = force_reg (op_mode, op1);
- }
- else
- {
- /* %%% We only allow op1 in memory; op0 must be st(0). So swap
- things around if they appear profitable, otherwise force op0
- into a register. */
-
- if (standard_80387_constant_p (op0) == 0
- || (GET_CODE (op0) == MEM
- && ! (standard_80387_constant_p (op1) == 0
- || GET_CODE (op1) == MEM)))
- {
- rtx tmp;
- tmp = op0, op0 = op1, op1 = tmp;
- code = swap_condition (code);
- }
-
- if (GET_CODE (op0) != REG)
- op0 = force_reg (op_mode, op0);
-
- if (CONSTANT_P (op1))
- {
- int tmp = standard_80387_constant_p (op1);
- if (tmp == 0)
- op1 = validize_mem (force_const_mem (op_mode, op1));
- else if (tmp == 1)
- {
- if (TARGET_CMOVE)
- op1 = force_reg (op_mode, op1);
- }
- else
- op1 = force_reg (op_mode, op1);
- }
- }
-
- /* Try to rearrange the comparison to make it cheaper. */
- if (ix86_fp_comparison_cost (code)
- > ix86_fp_comparison_cost (swap_condition (code))
- && (GET_CODE (op1) == REG || !no_new_pseudos))
- {
- rtx tmp;
- tmp = op0, op0 = op1, op1 = tmp;
- code = swap_condition (code);
- if (GET_CODE (op0) != REG)
- op0 = force_reg (op_mode, op0);
- }
-
- *pop0 = op0;
- *pop1 = op1;
- return code;
-}
-
-/* Convert comparison codes we use to represent FP comparison to integer
- code that will result in proper branch. Return UNKNOWN if no such code
- is available. */
-
-enum rtx_code
-ix86_fp_compare_code_to_integer (enum rtx_code code)
-{
- switch (code)
- {
- case GT:
- return GTU;
- case GE:
- return GEU;
- case ORDERED:
- case UNORDERED:
- return code;
- break;
- case UNEQ:
- return EQ;
- break;
- case UNLT:
- return LTU;
- break;
- case UNLE:
- return LEU;
- break;
- case LTGT:
- return NE;
- break;
- default:
- return UNKNOWN;
- }
-}
-
-/* Split comparison code CODE into comparisons we can do using branch
- instructions. BYPASS_CODE is comparison code for branch that will
- branch around FIRST_CODE and SECOND_CODE. If some of branches
- is not required, set value to UNKNOWN.
- We never require more than two branches. */
-
-void
-ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
- enum rtx_code *first_code,
- enum rtx_code *second_code)
-{
- *first_code = code;
- *bypass_code = UNKNOWN;
- *second_code = UNKNOWN;
-
- /* The fcomi comparison sets flags as follows:
-
- cmp ZF PF CF
- > 0 0 0
- < 0 0 1
- = 1 0 0
- un 1 1 1 */
-
- switch (code)
- {
- case GT: /* GTU - CF=0 & ZF=0 */
- case GE: /* GEU - CF=0 */
- case ORDERED: /* PF=0 */
- case UNORDERED: /* PF=1 */
- case UNEQ: /* EQ - ZF=1 */
- case UNLT: /* LTU - CF=1 */
- case UNLE: /* LEU - CF=1 | ZF=1 */
- case LTGT: /* EQ - ZF=0 */
- break;
- case LT: /* LTU - CF=1 - fails on unordered */
- *first_code = UNLT;
- *bypass_code = UNORDERED;
- break;
- case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
- *first_code = UNLE;
- *bypass_code = UNORDERED;
- break;
- case EQ: /* EQ - ZF=1 - fails on unordered */
- *first_code = UNEQ;
- *bypass_code = UNORDERED;
- break;
- case NE: /* NE - ZF=0 - fails on unordered */
- *first_code = LTGT;
- *second_code = UNORDERED;
- break;
- case UNGE: /* GEU - CF=0 - fails on unordered */
- *first_code = GE;
- *second_code = UNORDERED;
- break;
- case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
- *first_code = GT;
- *second_code = UNORDERED;
- break;
- default:
- gcc_unreachable ();
- }
- if (!TARGET_IEEE_FP)
- {
- *second_code = UNKNOWN;
- *bypass_code = UNKNOWN;
- }
-}
-
-/* Return cost of comparison done fcom + arithmetics operations on AX.
- All following functions do use number of instructions as a cost metrics.
- In future this should be tweaked to compute bytes for optimize_size and
- take into account performance of various instructions on various CPUs. */
-static int
-ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
-{
- if (!TARGET_IEEE_FP)
- return 4;
- /* The cost of code output by ix86_expand_fp_compare. */
- switch (code)
- {
- case UNLE:
- case UNLT:
- case LTGT:
- case GT:
- case GE:
- case UNORDERED:
- case ORDERED:
- case UNEQ:
- return 4;
- break;
- case LT:
- case NE:
- case EQ:
- case UNGE:
- return 5;
- break;
- case LE:
- case UNGT:
- return 6;
- break;
- default:
- gcc_unreachable ();
- }
-}
-
-/* Return cost of comparison done using fcomi operation.
- See ix86_fp_comparison_arithmetics_cost for the metrics. */
-static int
-ix86_fp_comparison_fcomi_cost (enum rtx_code code)
-{
- enum rtx_code bypass_code, first_code, second_code;
- /* Return arbitrarily high cost when instruction is not supported - this
- prevents gcc from using it. */
- if (!TARGET_CMOVE)
- return 1024;
- ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
- return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
-}
-
-/* Return cost of comparison done using sahf operation.
- See ix86_fp_comparison_arithmetics_cost for the metrics. */
-static int
-ix86_fp_comparison_sahf_cost (enum rtx_code code)
-{
- enum rtx_code bypass_code, first_code, second_code;
- /* Return arbitrarily high cost when instruction is not preferred - this
- avoids gcc from using it. */
- if (!TARGET_USE_SAHF && !optimize_size)
- return 1024;
- ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
- return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
-}
-
-/* Compute cost of the comparison done using any method.
- See ix86_fp_comparison_arithmetics_cost for the metrics. */
-static int
-ix86_fp_comparison_cost (enum rtx_code code)
-{
- int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
- int min;
-
- fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
- sahf_cost = ix86_fp_comparison_sahf_cost (code);
-
- min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
- if (min > sahf_cost)
- min = sahf_cost;
- if (min > fcomi_cost)
- min = fcomi_cost;
- return min;
-}
-
-/* Generate insn patterns to do a floating point compare of OPERANDS. */
-
-static rtx
-ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
- rtx *second_test, rtx *bypass_test)
-{
- enum machine_mode fpcmp_mode, intcmp_mode;
- rtx tmp, tmp2;
- int cost = ix86_fp_comparison_cost (code);
- enum rtx_code bypass_code, first_code, second_code;
-
- fpcmp_mode = ix86_fp_compare_mode (code);
- code = ix86_prepare_fp_compare_args (code, &op0, &op1);
-
- if (second_test)
- *second_test = NULL_RTX;
- if (bypass_test)
- *bypass_test = NULL_RTX;
-
- ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
-
- /* Do fcomi/sahf based test when profitable. */
- if ((bypass_code == UNKNOWN || bypass_test)
- && (second_code == UNKNOWN || second_test)
- && ix86_fp_comparison_arithmetics_cost (code) > cost)
- {
- if (TARGET_CMOVE)
- {
- tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
- tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
- tmp);
- emit_insn (tmp);
- }
- else
- {
- tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
- tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
- if (!scratch)
- scratch = gen_reg_rtx (HImode);
- emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
- emit_insn (gen_x86_sahf_1 (scratch));
- }
-
- /* The FP codes work out to act like unsigned. */
- intcmp_mode = fpcmp_mode;
- code = first_code;
- if (bypass_code != UNKNOWN)
- *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
- gen_rtx_REG (intcmp_mode, FLAGS_REG),
- const0_rtx);
- if (second_code != UNKNOWN)
- *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
- gen_rtx_REG (intcmp_mode, FLAGS_REG),
- const0_rtx);
- }
- else
- {
- /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
- tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
- tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
- if (!scratch)
- scratch = gen_reg_rtx (HImode);
- emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
-
- /* In the unordered case, we have to check C2 for NaN's, which
- doesn't happen to work out to anything nice combination-wise.
- So do some bit twiddling on the value we've got in AH to come
- up with an appropriate set of condition codes. */
-
- intcmp_mode = CCNOmode;
- switch (code)
- {
- case GT:
- case UNGT:
- if (code == GT || !TARGET_IEEE_FP)
- {
- emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
- code = EQ;
- }
- else
- {
- emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
- emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
- emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
- intcmp_mode = CCmode;
- code = GEU;
- }
- break;
- case LT:
- case UNLT:
- if (code == LT && TARGET_IEEE_FP)
- {
- emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
- emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
- intcmp_mode = CCmode;
- code = EQ;
- }
- else
- {
- emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
- code = NE;
- }
- break;
- case GE:
- case UNGE:
- if (code == GE || !TARGET_IEEE_FP)
- {
- emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
- code = EQ;
- }
- else
- {
- emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
- emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
- GEN_INT (0x01)));
- code = NE;
- }
- break;
- case LE:
- case UNLE:
- if (code == LE && TARGET_IEEE_FP)
- {
- emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
- emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
- emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
- intcmp_mode = CCmode;
- code = LTU;
- }
- else
- {
- emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
- code = NE;
- }
- break;
- case EQ:
- case UNEQ:
- if (code == EQ && TARGET_IEEE_FP)
- {
- emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
- emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
- intcmp_mode = CCmode;
- code = EQ;
- }
- else
- {
- emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
- code = NE;
- break;
- }
- break;
- case NE:
- case LTGT:
- if (code == NE && TARGET_IEEE_FP)
- {
- emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
- emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
- GEN_INT (0x40)));
- code = NE;
- }
- else
- {
- emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
- code = EQ;
- }
- break;
-
- case UNORDERED:
- emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
- code = NE;
- break;
- case ORDERED:
- emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
- code = EQ;
- break;
-
- default:
- gcc_unreachable ();
- }
- }
-
- /* Return the test that should be put into the flags user, i.e.
- the bcc, scc, or cmov instruction. */
- return gen_rtx_fmt_ee (code, VOIDmode,
- gen_rtx_REG (intcmp_mode, FLAGS_REG),
- const0_rtx);
-}
-
-rtx
-ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
-{
- rtx op0, op1, ret;
- op0 = ix86_compare_op0;
- op1 = ix86_compare_op1;
-
- if (second_test)
- *second_test = NULL_RTX;
- if (bypass_test)
- *bypass_test = NULL_RTX;
-
- if (ix86_compare_emitted)
- {
- ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
- ix86_compare_emitted = NULL_RTX;
- }
- else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
- ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
- second_test, bypass_test);
- else
- ret = ix86_expand_int_compare (code, op0, op1);
-
- return ret;
-}
-
-/* Return true if the CODE will result in nontrivial jump sequence. */
-bool
-ix86_fp_jump_nontrivial_p (enum rtx_code code)
-{
- enum rtx_code bypass_code, first_code, second_code;
- if (!TARGET_CMOVE)
- return true;
- ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
- return bypass_code != UNKNOWN || second_code != UNKNOWN;
-}
-
-void
-ix86_expand_branch (enum rtx_code code, rtx label)
-{
- rtx tmp;
-
- /* If we have emitted a compare insn, go straight to simple.
- ix86_expand_compare won't emit anything if ix86_compare_emitted
- is non NULL. */
- if (ix86_compare_emitted)
- goto simple;
-
- switch (GET_MODE (ix86_compare_op0))
- {
- case QImode:
- case HImode:
- case SImode:
- simple:
- tmp = ix86_expand_compare (code, NULL, NULL);
- tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
- gen_rtx_LABEL_REF (VOIDmode, label),
- pc_rtx);
- emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
- return;
-
- case SFmode:
- case DFmode:
- case XFmode:
- {
- rtvec vec;
- int use_fcomi;
- enum rtx_code bypass_code, first_code, second_code;
-
- code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
- &ix86_compare_op1);
-
- ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
-
- /* Check whether we will use the natural sequence with one jump. If
- so, we can expand jump early. Otherwise delay expansion by
- creating compound insn to not confuse optimizers. */
- if (bypass_code == UNKNOWN && second_code == UNKNOWN
- && TARGET_CMOVE)
- {
- ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
- gen_rtx_LABEL_REF (VOIDmode, label),
- pc_rtx, NULL_RTX, NULL_RTX);
- }
- else
- {
- tmp = gen_rtx_fmt_ee (code, VOIDmode,
- ix86_compare_op0, ix86_compare_op1);
- tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
- gen_rtx_LABEL_REF (VOIDmode, label),
- pc_rtx);
- tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
-
- use_fcomi = ix86_use_fcomi_compare (code);
- vec = rtvec_alloc (3 + !use_fcomi);
- RTVEC_ELT (vec, 0) = tmp;
- RTVEC_ELT (vec, 1)
- = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
- RTVEC_ELT (vec, 2)
- = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
- if (! use_fcomi)
- RTVEC_ELT (vec, 3)
- = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
-
- emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
- }
- return;
- }
-
- case DImode:
- if (TARGET_64BIT)
- goto simple;
- case TImode:
- /* Expand DImode branch into multiple compare+branch. */
- {
- rtx lo[2], hi[2], label2;
- enum rtx_code code1, code2, code3;
- enum machine_mode submode;
-
- if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
- {
- tmp = ix86_compare_op0;
- ix86_compare_op0 = ix86_compare_op1;
- ix86_compare_op1 = tmp;
- code = swap_condition (code);
- }
- if (GET_MODE (ix86_compare_op0) == DImode)
- {
- split_di (&ix86_compare_op0, 1, lo+0, hi+0);
- split_di (&ix86_compare_op1, 1, lo+1, hi+1);
- submode = SImode;
- }
- else
- {
- split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
- split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
- submode = DImode;
- }
-
- /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
- avoid two branches. This costs one extra insn, so disable when
- optimizing for size. */
-
- if ((code == EQ || code == NE)
- && (!optimize_size
- || hi[1] == const0_rtx || lo[1] == const0_rtx))
- {
- rtx xor0, xor1;
-
- xor1 = hi[0];
- if (hi[1] != const0_rtx)
- xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
- NULL_RTX, 0, OPTAB_WIDEN);
-
- xor0 = lo[0];
- if (lo[1] != const0_rtx)
- xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
- NULL_RTX, 0, OPTAB_WIDEN);
-
- tmp = expand_binop (submode, ior_optab, xor1, xor0,
- NULL_RTX, 0, OPTAB_WIDEN);
-
- ix86_compare_op0 = tmp;
- ix86_compare_op1 = const0_rtx;
- ix86_expand_branch (code, label);
- return;
- }
-
- /* Otherwise, if we are doing less-than or greater-or-equal-than,
- op1 is a constant and the low word is zero, then we can just
- examine the high word. */
-
- if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
- switch (code)
- {
- case LT: case LTU: case GE: case GEU:
- ix86_compare_op0 = hi[0];
- ix86_compare_op1 = hi[1];
- ix86_expand_branch (code, label);
- return;
- default:
- break;
- }
-
- /* Otherwise, we need two or three jumps. */
-
- label2 = gen_label_rtx ();
-
- code1 = code;
- code2 = swap_condition (code);
- code3 = unsigned_condition (code);
-
- switch (code)
- {
- case LT: case GT: case LTU: case GTU:
- break;
-
- case LE: code1 = LT; code2 = GT; break;
- case GE: code1 = GT; code2 = LT; break;
- case LEU: code1 = LTU; code2 = GTU; break;
- case GEU: code1 = GTU; code2 = LTU; break;
-
- case EQ: code1 = UNKNOWN; code2 = NE; break;
- case NE: code2 = UNKNOWN; break;
-
- default:
- gcc_unreachable ();
- }
-
- /*
- * a < b =>
- * if (hi(a) < hi(b)) goto true;
- * if (hi(a) > hi(b)) goto false;
- * if (lo(a) < lo(b)) goto true;
- * false:
- */
-
- ix86_compare_op0 = hi[0];
- ix86_compare_op1 = hi[1];
-
- if (code1 != UNKNOWN)
- ix86_expand_branch (code1, label);
- if (code2 != UNKNOWN)
- ix86_expand_branch (code2, label2);
-
- ix86_compare_op0 = lo[0];
- ix86_compare_op1 = lo[1];
- ix86_expand_branch (code3, label);
-
- if (code2 != UNKNOWN)
- emit_label (label2);
- return;
- }
-
- default:
- gcc_unreachable ();
- }
-}
-
-/* Split branch based on floating point condition. */
-void
-ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
- rtx target1, rtx target2, rtx tmp, rtx pushed)
-{
- rtx second, bypass;
- rtx label = NULL_RTX;
- rtx condition;
- int bypass_probability = -1, second_probability = -1, probability = -1;
- rtx i;
-
- if (target2 != pc_rtx)
- {
- rtx tmp = target2;
- code = reverse_condition_maybe_unordered (code);
- target2 = target1;
- target1 = tmp;
- }
-
- condition = ix86_expand_fp_compare (code, op1, op2,
- tmp, &second, &bypass);
-
- /* Remove pushed operand from stack. */
- if (pushed)
- ix86_free_from_memory (GET_MODE (pushed));
-
- if (split_branch_probability >= 0)
- {
- /* Distribute the probabilities across the jumps.
- Assume the BYPASS and SECOND to be always test
- for UNORDERED. */
- probability = split_branch_probability;
-
- /* Value of 1 is low enough to make no need for probability
- to be updated. Later we may run some experiments and see
- if unordered values are more frequent in practice. */
- if (bypass)
- bypass_probability = 1;
- if (second)
- second_probability = 1;
- }
- if (bypass != NULL_RTX)
- {
- label = gen_label_rtx ();
- i = emit_jump_insn (gen_rtx_SET
- (VOIDmode, pc_rtx,
- gen_rtx_IF_THEN_ELSE (VOIDmode,
- bypass,
- gen_rtx_LABEL_REF (VOIDmode,
- label),
- pc_rtx)));
- if (bypass_probability >= 0)
- REG_NOTES (i)
- = gen_rtx_EXPR_LIST (REG_BR_PROB,
- GEN_INT (bypass_probability),
- REG_NOTES (i));
- }
- i = emit_jump_insn (gen_rtx_SET
- (VOIDmode, pc_rtx,
- gen_rtx_IF_THEN_ELSE (VOIDmode,
- condition, target1, target2)));
- if (probability >= 0)
- REG_NOTES (i)
- = gen_rtx_EXPR_LIST (REG_BR_PROB,
- GEN_INT (probability),
- REG_NOTES (i));
- if (second != NULL_RTX)
- {
- i = emit_jump_insn (gen_rtx_SET
- (VOIDmode, pc_rtx,
- gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
- target2)));
- if (second_probability >= 0)
- REG_NOTES (i)
- = gen_rtx_EXPR_LIST (REG_BR_PROB,
- GEN_INT (second_probability),
- REG_NOTES (i));
- }
- if (label != NULL_RTX)
- emit_label (label);
-}
-
-int
-ix86_expand_setcc (enum rtx_code code, rtx dest)
-{
- rtx ret, tmp, tmpreg, equiv;
- rtx second_test, bypass_test;
-
- if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
- return 0; /* FAIL */
-
- gcc_assert (GET_MODE (dest) == QImode);
-
- ret = ix86_expand_compare (code, &second_test, &bypass_test);
- PUT_MODE (ret, QImode);
-
- tmp = dest;
- tmpreg = dest;
-
- emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
- if (bypass_test || second_test)
- {
- rtx test = second_test;
- int bypass = 0;
- rtx tmp2 = gen_reg_rtx (QImode);
- if (bypass_test)
- {
- gcc_assert (!second_test);
- test = bypass_test;
- bypass = 1;
- PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
- }
- PUT_MODE (test, QImode);
- emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
-
- if (bypass)
- emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
- else
- emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
- }
-
- /* Attach a REG_EQUAL note describing the comparison result. */
- if (ix86_compare_op0 && ix86_compare_op1)
- {
- equiv = simplify_gen_relational (code, QImode,
- GET_MODE (ix86_compare_op0),
- ix86_compare_op0, ix86_compare_op1);
- set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
- }
-
- return 1; /* DONE */
-}
-
-/* Expand comparison setting or clearing carry flag. Return true when
- successful and set pop for the operation. */
-static bool
-ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
-{
- enum machine_mode mode =
- GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
-
- /* Do not handle DImode compares that go through special path. Also we can't
- deal with FP compares yet. This is possible to add. */
- if (mode == (TARGET_64BIT ? TImode : DImode))
- return false;
- if (FLOAT_MODE_P (mode))
- {
- rtx second_test = NULL, bypass_test = NULL;
- rtx compare_op, compare_seq;
-
- /* Shortcut: following common codes never translate into carry flag compares. */
- if (code == EQ || code == NE || code == UNEQ || code == LTGT
- || code == ORDERED || code == UNORDERED)
- return false;
-
- /* These comparisons require zero flag; swap operands so they won't. */
- if ((code == GT || code == UNLE || code == LE || code == UNGT)
- && !TARGET_IEEE_FP)
- {
- rtx tmp = op0;
- op0 = op1;
- op1 = tmp;
- code = swap_condition (code);
- }
-
- /* Try to expand the comparison and verify that we end up with carry flag
- based comparison. This is fails to be true only when we decide to expand
- comparison using arithmetic that is not too common scenario. */
- start_sequence ();
- compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
- &second_test, &bypass_test);
- compare_seq = get_insns ();
- end_sequence ();
-
- if (second_test || bypass_test)
- return false;
- if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
- || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
- code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
- else
- code = GET_CODE (compare_op);
- if (code != LTU && code != GEU)
- return false;
- emit_insn (compare_seq);
- *pop = compare_op;
- return true;
- }
- if (!INTEGRAL_MODE_P (mode))
- return false;
- switch (code)
- {
- case LTU:
- case GEU:
- break;
-
- /* Convert a==0 into (unsigned)a<1. */
- case EQ:
- case NE:
- if (op1 != const0_rtx)
- return false;
- op1 = const1_rtx;
- code = (code == EQ ? LTU : GEU);
- break;
-
- /* Convert a>b into b<a or a>=b-1. */
- case GTU:
- case LEU:
- if (GET_CODE (op1) == CONST_INT)
- {
- op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
- /* Bail out on overflow. We still can swap operands but that
- would force loading of the constant into register. */
- if (op1 == const0_rtx
- || !x86_64_immediate_operand (op1, GET_MODE (op1)))
- return false;
- code = (code == GTU ? GEU : LTU);
- }
- else
- {
- rtx tmp = op1;
- op1 = op0;
- op0 = tmp;
- code = (code == GTU ? LTU : GEU);
- }
- break;
-
- /* Convert a>=0 into (unsigned)a<0x80000000. */
- case LT:
- case GE:
- if (mode == DImode || op1 != const0_rtx)
- return false;
- op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
- code = (code == LT ? GEU : LTU);
- break;
- case LE:
- case GT:
- if (mode == DImode || op1 != constm1_rtx)
- return false;
- op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
- code = (code == LE ? GEU : LTU);
- break;
-
- default:
- return false;
- }
- /* Swapping operands may cause constant to appear as first operand. */
- if (!nonimmediate_operand (op0, VOIDmode))
- {
- if (no_new_pseudos)
- return false;
- op0 = force_reg (mode, op0);
- }
- ix86_compare_op0 = op0;
- ix86_compare_op1 = op1;
- *pop = ix86_expand_compare (code, NULL, NULL);
- gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
- return true;
-}
-
-int
-ix86_expand_int_movcc (rtx operands[])
-{
- enum rtx_code code = GET_CODE (operands[1]), compare_code;
- rtx compare_seq, compare_op;
- rtx second_test, bypass_test;
- enum machine_mode mode = GET_MODE (operands[0]);
- bool sign_bit_compare_p = false;;
-
- start_sequence ();
- compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
- compare_seq = get_insns ();
- end_sequence ();
-
- compare_code = GET_CODE (compare_op);
-
- if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
- || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
- sign_bit_compare_p = true;
-
- /* Don't attempt mode expansion here -- if we had to expand 5 or 6
- HImode insns, we'd be swallowed in word prefix ops. */
-
- if ((mode != HImode || TARGET_FAST_PREFIX)
- && (mode != (TARGET_64BIT ? TImode : DImode))
- && GET_CODE (operands[2]) == CONST_INT
- && GET_CODE (operands[3]) == CONST_INT)
- {
- rtx out = operands[0];
- HOST_WIDE_INT ct = INTVAL (operands[2]);
- HOST_WIDE_INT cf = INTVAL (operands[3]);
- HOST_WIDE_INT diff;
-
- diff = ct - cf;
- /* Sign bit compares are better done using shifts than we do by using
- sbb. */
- if (sign_bit_compare_p
- || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
- ix86_compare_op1, &compare_op))
- {
- /* Detect overlap between destination and compare sources. */
- rtx tmp = out;
-
- if (!sign_bit_compare_p)
- {
- bool fpcmp = false;
-
- compare_code = GET_CODE (compare_op);
-
- if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
- || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
- {
- fpcmp = true;
- compare_code = ix86_fp_compare_code_to_integer (compare_code);
- }
-
- /* To simplify rest of code, restrict to the GEU case. */
- if (compare_code == LTU)
- {
- HOST_WIDE_INT tmp = ct;
- ct = cf;
- cf = tmp;
- compare_code = reverse_condition (compare_code);
- code = reverse_condition (code);
- }
- else
- {
- if (fpcmp)
- PUT_CODE (compare_op,
- reverse_condition_maybe_unordered
- (GET_CODE (compare_op)));
- else
- PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
- }
- diff = ct - cf;
-
- if (reg_overlap_mentioned_p (out, ix86_compare_op0)
- || reg_overlap_mentioned_p (out, ix86_compare_op1))
- tmp = gen_reg_rtx (mode);
-
- if (mode == DImode)
- emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
- else
- emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
- }
- else
- {
- if (code == GT || code == GE)
- code = reverse_condition (code);
- else
- {
- HOST_WIDE_INT tmp = ct;
- ct = cf;
- cf = tmp;
- diff = ct - cf;
- }
- tmp = emit_store_flag (tmp, code, ix86_compare_op0,
- ix86_compare_op1, VOIDmode, 0, -1);
- }
-
- if (diff == 1)
- {
- /*
- * cmpl op0,op1
- * sbbl dest,dest
- * [addl dest, ct]
- *
- * Size 5 - 8.
- */
- if (ct)
- tmp = expand_simple_binop (mode, PLUS,
- tmp, GEN_INT (ct),
- copy_rtx (tmp), 1, OPTAB_DIRECT);
- }
- else if (cf == -1)
- {
- /*
- * cmpl op0,op1
- * sbbl dest,dest
- * orl $ct, dest
- *
- * Size 8.
- */
- tmp = expand_simple_binop (mode, IOR,
- tmp, GEN_INT (ct),
- copy_rtx (tmp), 1, OPTAB_DIRECT);
- }
- else if (diff == -1 && ct)
- {
- /*
- * cmpl op0,op1
- * sbbl dest,dest
- * notl dest
- * [addl dest, cf]
- *
- * Size 8 - 11.
- */
- tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
- if (cf)
- tmp = expand_simple_binop (mode, PLUS,
- copy_rtx (tmp), GEN_INT (cf),
- copy_rtx (tmp), 1, OPTAB_DIRECT);
- }
- else
- {
- /*
- * cmpl op0,op1
- * sbbl dest,dest
- * [notl dest]
- * andl cf - ct, dest
- * [addl dest, ct]
- *
- * Size 8 - 11.
- */
-
- if (cf == 0)
- {
- cf = ct;
- ct = 0;
- tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
- }
-
- tmp = expand_simple_binop (mode, AND,
- copy_rtx (tmp),
- gen_int_mode (cf - ct, mode),
- copy_rtx (tmp), 1, OPTAB_DIRECT);
- if (ct)
- tmp = expand_simple_binop (mode, PLUS,
- copy_rtx (tmp), GEN_INT (ct),
- copy_rtx (tmp), 1, OPTAB_DIRECT);
- }
-
- if (!rtx_equal_p (tmp, out))
- emit_move_insn (copy_rtx (out), copy_rtx (tmp));
-
- return 1; /* DONE */
- }
-
- if (diff < 0)
- {
- HOST_WIDE_INT tmp;
- tmp = ct, ct = cf, cf = tmp;
- diff = -diff;
- if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
- {
- /* We may be reversing unordered compare to normal compare, that
- is not valid in general (we may convert non-trapping condition
- to trapping one), however on i386 we currently emit all
- comparisons unordered. */
- compare_code = reverse_condition_maybe_unordered (compare_code);
- code = reverse_condition_maybe_unordered (code);
- }
- else
- {
- compare_code = reverse_condition (compare_code);
- code = reverse_condition (code);
- }
- }
-
- compare_code = UNKNOWN;
- if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
- && GET_CODE (ix86_compare_op1) == CONST_INT)
- {
- if (ix86_compare_op1 == const0_rtx
- && (code == LT || code == GE))
- compare_code = code;
- else if (ix86_compare_op1 == constm1_rtx)
- {
- if (code == LE)
- compare_code = LT;
- else if (code == GT)
- compare_code = GE;
- }
- }
-
- /* Optimize dest = (op0 < 0) ? -1 : cf. */
- if (compare_code != UNKNOWN
- && GET_MODE (ix86_compare_op0) == GET_MODE (out)
- && (cf == -1 || ct == -1))
- {
- /* If lea code below could be used, only optimize
- if it results in a 2 insn sequence. */
-
- if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
- || diff == 3 || diff == 5 || diff == 9)
- || (compare_code == LT && ct == -1)
- || (compare_code == GE && cf == -1))
- {
- /*
- * notl op1 (if necessary)
- * sarl $31, op1
- * orl cf, op1
- */
- if (ct != -1)
- {
- cf = ct;
- ct = -1;
- code = reverse_condition (code);
- }
-
- out = emit_store_flag (out, code, ix86_compare_op0,
- ix86_compare_op1, VOIDmode, 0, -1);
-
- out = expand_simple_binop (mode, IOR,
- out, GEN_INT (cf),
- out, 1, OPTAB_DIRECT);
- if (out != operands[0])
- emit_move_insn (operands[0], out);
-
- return 1; /* DONE */
- }
- }
-
-
- if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
- || diff == 3 || diff == 5 || diff == 9)
- && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
- && (mode != DImode
- || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
- {
- /*
- * xorl dest,dest
- * cmpl op1,op2
- * setcc dest
- * lea cf(dest*(ct-cf)),dest
- *
- * Size 14.
- *
- * This also catches the degenerate setcc-only case.
- */
-
- rtx tmp;
- int nops;
-
- out = emit_store_flag (out, code, ix86_compare_op0,
- ix86_compare_op1, VOIDmode, 0, 1);
-
- nops = 0;
- /* On x86_64 the lea instruction operates on Pmode, so we need
- to get arithmetics done in proper mode to match. */
- if (diff == 1)
- tmp = copy_rtx (out);
- else
- {
- rtx out1;
- out1 = copy_rtx (out);
- tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
- nops++;
- if (diff & 1)
- {
- tmp = gen_rtx_PLUS (mode, tmp, out1);
- nops++;
- }
- }
- if (cf != 0)
- {
- tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
- nops++;
- }
- if (!rtx_equal_p (tmp, out))
- {
- if (nops == 1)
- out = force_operand (tmp, copy_rtx (out));
- else
- emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
- }
- if (!rtx_equal_p (out, operands[0]))
- emit_move_insn (operands[0], copy_rtx (out));
-
- return 1; /* DONE */
- }
-
- /*
- * General case: Jumpful:
- * xorl dest,dest cmpl op1, op2
- * cmpl op1, op2 movl ct, dest
- * setcc dest jcc 1f
- * decl dest movl cf, dest
- * andl (cf-ct),dest 1:
- * addl ct,dest
- *
- * Size 20. Size 14.
- *
- * This is reasonably steep, but branch mispredict costs are
- * high on modern cpus, so consider failing only if optimizing
- * for space.
- */
-
- if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
- && BRANCH_COST >= 2)
- {
- if (cf == 0)
- {
- cf = ct;
- ct = 0;
- if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
- /* We may be reversing unordered compare to normal compare,
- that is not valid in general (we may convert non-trapping
- condition to trapping one), however on i386 we currently
- emit all comparisons unordered. */
- code = reverse_condition_maybe_unordered (code);
- else
- {
- code = reverse_condition (code);
- if (compare_code != UNKNOWN)
- compare_code = reverse_condition (compare_code);
- }
- }
-
- if (compare_code != UNKNOWN)
- {
- /* notl op1 (if needed)
- sarl $31, op1
- andl (cf-ct), op1
- addl ct, op1
-
- For x < 0 (resp. x <= -1) there will be no notl,
- so if possible swap the constants to get rid of the
- complement.
- True/false will be -1/0 while code below (store flag
- followed by decrement) is 0/-1, so the constants need
- to be exchanged once more. */
-
- if (compare_code == GE || !cf)
- {
- code = reverse_condition (code);
- compare_code = LT;
- }
- else
- {
- HOST_WIDE_INT tmp = cf;
- cf = ct;
- ct = tmp;
- }
-
- out = emit_store_flag (out, code, ix86_compare_op0,
- ix86_compare_op1, VOIDmode, 0, -1);
- }
- else
- {
- out = emit_store_flag (out, code, ix86_compare_op0,
- ix86_compare_op1, VOIDmode, 0, 1);
-
- out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
- copy_rtx (out), 1, OPTAB_DIRECT);
- }
-
- out = expand_simple_binop (mode, AND, copy_rtx (out),
- gen_int_mode (cf - ct, mode),
- copy_rtx (out), 1, OPTAB_DIRECT);
- if (ct)
- out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
- copy_rtx (out), 1, OPTAB_DIRECT);
- if (!rtx_equal_p (out, operands[0]))
- emit_move_insn (operands[0], copy_rtx (out));
-
- return 1; /* DONE */
- }
- }
-
- if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
- {
- /* Try a few things more with specific constants and a variable. */
-
- optab op;
- rtx var, orig_out, out, tmp;
-
- if (BRANCH_COST <= 2)
- return 0; /* FAIL */
-
- /* If one of the two operands is an interesting constant, load a
- constant with the above and mask it in with a logical operation. */
-
- if (GET_CODE (operands[2]) == CONST_INT)
- {
- var = operands[3];
- if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
- operands[3] = constm1_rtx, op = and_optab;
- else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
- operands[3] = const0_rtx, op = ior_optab;
- else
- return 0; /* FAIL */
- }
- else if (GET_CODE (operands[3]) == CONST_INT)
- {
- var = operands[2];
- if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
- operands[2] = constm1_rtx, op = and_optab;
- else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
- operands[2] = const0_rtx, op = ior_optab;
- else
- return 0; /* FAIL */
- }
- else
- return 0; /* FAIL */
-
- orig_out = operands[0];
- tmp = gen_reg_rtx (mode);
- operands[0] = tmp;
-
- /* Recurse to get the constant loaded. */
- if (ix86_expand_int_movcc (operands) == 0)
- return 0; /* FAIL */
-
- /* Mask in the interesting variable. */
- out = expand_binop (mode, op, var, tmp, orig_out, 0,
- OPTAB_WIDEN);
- if (!rtx_equal_p (out, orig_out))
- emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
-
- return 1; /* DONE */
- }
-
- /*
- * For comparison with above,
- *
- * movl cf,dest
- * movl ct,tmp
- * cmpl op1,op2
- * cmovcc tmp,dest
- *
- * Size 15.
- */
-
- if (! nonimmediate_operand (operands[2], mode))
- operands[2] = force_reg (mode, operands[2]);
- if (! nonimmediate_operand (operands[3], mode))
- operands[3] = force_reg (mode, operands[3]);
-
- if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
- {
- rtx tmp = gen_reg_rtx (mode);
- emit_move_insn (tmp, operands[3]);
- operands[3] = tmp;
- }
- if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
- {
- rtx tmp = gen_reg_rtx (mode);
- emit_move_insn (tmp, operands[2]);
- operands[2] = tmp;
- }
-
- if (! register_operand (operands[2], VOIDmode)
- && (mode == QImode
- || ! register_operand (operands[3], VOIDmode)))
- operands[2] = force_reg (mode, operands[2]);
-
- if (mode == QImode
- && ! register_operand (operands[3], VOIDmode))
- operands[3] = force_reg (mode, operands[3]);
-
- emit_insn (compare_seq);
- emit_insn (gen_rtx_SET (VOIDmode, operands[0],
- gen_rtx_IF_THEN_ELSE (mode,
- compare_op, operands[2],
- operands[3])));
- if (bypass_test)
- emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
- gen_rtx_IF_THEN_ELSE (mode,
- bypass_test,
- copy_rtx (operands[3]),
- copy_rtx (operands[0]))));
- if (second_test)
- emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
- gen_rtx_IF_THEN_ELSE (mode,
- second_test,
- copy_rtx (operands[2]),
- copy_rtx (operands[0]))));
-
- return 1; /* DONE */
-}
-
-/* Swap, force into registers, or otherwise massage the two operands
- to an sse comparison with a mask result. Thus we differ a bit from
- ix86_prepare_fp_compare_args which expects to produce a flags result.
-
- The DEST operand exists to help determine whether to commute commutative
- operators. The POP0/POP1 operands are updated in place. The new
- comparison code is returned, or UNKNOWN if not implementable. */
-
-static enum rtx_code
-ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
- rtx *pop0, rtx *pop1)
-{
- rtx tmp;
-
- switch (code)
- {
- case LTGT:
- case UNEQ:
- /* We have no LTGT as an operator. We could implement it with
- NE & ORDERED, but this requires an extra temporary. It's
- not clear that it's worth it. */
- return UNKNOWN;
-
- case LT:
- case LE:
- case UNGT:
- case UNGE:
- /* These are supported directly. */
- break;
-
- case EQ:
- case NE:
- case UNORDERED:
- case ORDERED:
- /* For commutative operators, try to canonicalize the destination
- operand to be first in the comparison - this helps reload to
- avoid extra moves. */
- if (!dest || !rtx_equal_p (dest, *pop1))
- break;
- /* FALLTHRU */
-
- case GE:
- case GT:
- case UNLE:
- case UNLT:
- /* These are not supported directly. Swap the comparison operands
- to transform into something that is supported. */
- tmp = *pop0;
- *pop0 = *pop1;
- *pop1 = tmp;
- code = swap_condition (code);
- break;
-
- default:
- gcc_unreachable ();
- }
-
- return code;
-}
-
-/* Detect conditional moves that exactly match min/max operational
- semantics. Note that this is IEEE safe, as long as we don't
- interchange the operands.
-
- Returns FALSE if this conditional move doesn't match a MIN/MAX,
- and TRUE if the operation is successful and instructions are emitted. */
-
-static bool
-ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
- rtx cmp_op1, rtx if_true, rtx if_false)
-{
- enum machine_mode mode;
- bool is_min;
- rtx tmp;
-
- if (code == LT)
- ;
- else if (code == UNGE)
- {
- tmp = if_true;
- if_true = if_false;
- if_false = tmp;
- }
- else
- return false;
-
- if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
- is_min = true;
- else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
- is_min = false;
- else
- return false;
-
- mode = GET_MODE (dest);
-
- /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
- but MODE may be a vector mode and thus not appropriate. */
- if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
- {
- int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
- rtvec v;
-
- if_true = force_reg (mode, if_true);
- v = gen_rtvec (2, if_true, if_false);
- tmp = gen_rtx_UNSPEC (mode, v, u);
- }
- else
- {
- code = is_min ? SMIN : SMAX;
- tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
- }
-
- emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
- return true;
-}
-
-/* Expand an sse vector comparison. Return the register with the result. */
-
-static rtx
-ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
- rtx op_true, rtx op_false)
-{
- enum machine_mode mode = GET_MODE (dest);
- rtx x;
-
- cmp_op0 = force_reg (mode, cmp_op0);
- if (!nonimmediate_operand (cmp_op1, mode))
- cmp_op1 = force_reg (mode, cmp_op1);
-
- if (optimize
- || reg_overlap_mentioned_p (dest, op_true)
- || reg_overlap_mentioned_p (dest, op_false))
- dest = gen_reg_rtx (mode);
-
- x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
- emit_insn (gen_rtx_SET (VOIDmode, dest, x));
-
- return dest;
-}
-
-/* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
- operations. This is used for both scalar and vector conditional moves. */
-
-static void
-ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
-{
- enum machine_mode mode = GET_MODE (dest);
- rtx t2, t3, x;
-
- if (op_false == CONST0_RTX (mode))
- {
- op_true = force_reg (mode, op_true);
- x = gen_rtx_AND (mode, cmp, op_true);
- emit_insn (gen_rtx_SET (VOIDmode, dest, x));
- }
- else if (op_true == CONST0_RTX (mode))
- {
- op_false = force_reg (mode, op_false);
- x = gen_rtx_NOT (mode, cmp);
- x = gen_rtx_AND (mode, x, op_false);
- emit_insn (gen_rtx_SET (VOIDmode, dest, x));
- }
- else
- {
- op_true = force_reg (mode, op_true);
- op_false = force_reg (mode, op_false);
-
- t2 = gen_reg_rtx (mode);
- if (optimize)
- t3 = gen_reg_rtx (mode);
- else
- t3 = dest;
-
- x = gen_rtx_AND (mode, op_true, cmp);
- emit_insn (gen_rtx_SET (VOIDmode, t2, x));
-
- x = gen_rtx_NOT (mode, cmp);
- x = gen_rtx_AND (mode, x, op_false);
- emit_insn (gen_rtx_SET (VOIDmode, t3, x));
-
- x = gen_rtx_IOR (mode, t3, t2);
- emit_insn (gen_rtx_SET (VOIDmode, dest, x));
- }
-}
-
-/* Expand a floating-point conditional move. Return true if successful. */
-
-int
-ix86_expand_fp_movcc (rtx operands[])
-{
- enum machine_mode mode = GET_MODE (operands[0]);
- enum rtx_code code = GET_CODE (operands[1]);
- rtx tmp, compare_op, second_test, bypass_test;
-
- if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
- {
- enum machine_mode cmode;
-
- /* Since we've no cmove for sse registers, don't force bad register
- allocation just to gain access to it. Deny movcc when the
- comparison mode doesn't match the move mode. */
- cmode = GET_MODE (ix86_compare_op0);
- if (cmode == VOIDmode)
- cmode = GET_MODE (ix86_compare_op1);
- if (cmode != mode)
- return 0;
-
- code = ix86_prepare_sse_fp_compare_args (operands[0], code,
- &ix86_compare_op0,
- &ix86_compare_op1);
- if (code == UNKNOWN)
- return 0;
-
- if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
- ix86_compare_op1, operands[2],
- operands[3]))
- return 1;
-
- tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
- ix86_compare_op1, operands[2], operands[3]);
- ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
- return 1;
- }
-
- /* The floating point conditional move instructions don't directly
- support conditions resulting from a signed integer comparison. */
-
- compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
-
- /* The floating point conditional move instructions don't directly
- support signed integer comparisons. */
-
- if (!fcmov_comparison_operator (compare_op, VOIDmode))
- {
- gcc_assert (!second_test && !bypass_test);
- tmp = gen_reg_rtx (QImode);
- ix86_expand_setcc (code, tmp);
- code = NE;
- ix86_compare_op0 = tmp;
- ix86_compare_op1 = const0_rtx;
- compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
- }
- if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
- {
- tmp = gen_reg_rtx (mode);
- emit_move_insn (tmp, operands[3]);
- operands[3] = tmp;
- }
- if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
- {
- tmp = gen_reg_rtx (mode);
- emit_move_insn (tmp, operands[2]);
- operands[2] = tmp;
- }
-
- emit_insn (gen_rtx_SET (VOIDmode, operands[0],
- gen_rtx_IF_THEN_ELSE (mode, compare_op,
- operands[2], operands[3])));
- if (bypass_test)
- emit_insn (gen_rtx_SET (VOIDmode, operands[0],
- gen_rtx_IF_THEN_ELSE (mode, bypass_test,
- operands[3], operands[0])));
- if (second_test)
- emit_insn (gen_rtx_SET (VOIDmode, operands[0],
- gen_rtx_IF_THEN_ELSE (mode, second_test,
- operands[2], operands[0])));
-
- return 1;
-}
-
-/* Expand a floating-point vector conditional move; a vcond operation
- rather than a movcc operation. */
-
-bool
-ix86_expand_fp_vcond (rtx operands[])
-{
- enum rtx_code code = GET_CODE (operands[3]);
- rtx cmp;
-
- code = ix86_prepare_sse_fp_compare_args (operands[0], code,
- &operands[4], &operands[5]);
- if (code == UNKNOWN)
- return false;
-
- if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
- operands[5], operands[1], operands[2]))
- return true;
-
- cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
- operands[1], operands[2]);
- ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
- return true;
-}
-
-/* Expand a signed integral vector conditional move. */
-
-bool
-ix86_expand_int_vcond (rtx operands[])
-{
- enum machine_mode mode = GET_MODE (operands[0]);
- enum rtx_code code = GET_CODE (operands[3]);
- bool negate = false;
- rtx x, cop0, cop1;
-
- cop0 = operands[4];
- cop1 = operands[5];
-
- /* Canonicalize the comparison to EQ, GT, GTU. */
- switch (code)
- {
- case EQ:
- case GT:
- case GTU:
- break;
-
- case NE:
- case LE:
- case LEU:
- code = reverse_condition (code);
- negate = true;
- break;
-
- case GE:
- case GEU:
- code = reverse_condition (code);
- negate = true;
- /* FALLTHRU */
-
- case LT:
- case LTU:
- code = swap_condition (code);
- x = cop0, cop0 = cop1, cop1 = x;
- break;
-
- default:
- gcc_unreachable ();
- }
-
- /* APPLE LOCAL begin 5612787 mainline sse4 */
- /* Only SSE4.1/SSE4.2 supports V2DImode. */
- if (mode == V2DImode)
- {
- switch (code)
- {
- case EQ:
- /* SSE4.1 supports EQ. */
- if (!TARGET_SSE4_1)
- return false;
- break;
-
- case GT:
- case GTU:
- /* SSE4.2 supports GT/GTU. */
- if (!TARGET_SSE4_2)
- return false;
- break;
-
- default:
- gcc_unreachable ();
- }
- }
- /* APPLE LOCAL end 5612787 mainline sse4 */
-
- /* Unsigned parallel compare is not supported by the hardware. Play some
- tricks to turn this into a signed comparison against 0. */
- if (code == GTU)
- {
- cop0 = force_reg (mode, cop0);
-
- switch (mode)
- {
- case V4SImode:
- {
- rtx t1, t2, mask;
-
- /* Perform a parallel modulo subtraction. */
- t1 = gen_reg_rtx (mode);
- emit_insn (gen_subv4si3 (t1, cop0, cop1));
-
- /* Extract the original sign bit of op0. */
- mask = GEN_INT (-0x80000000);
- mask = gen_rtx_CONST_VECTOR (mode,
- gen_rtvec (4, mask, mask, mask, mask));
- mask = force_reg (mode, mask);
- t2 = gen_reg_rtx (mode);
- emit_insn (gen_andv4si3 (t2, cop0, mask));
-
- /* XOR it back into the result of the subtraction. This results
- in the sign bit set iff we saw unsigned underflow. */
- x = gen_reg_rtx (mode);
- emit_insn (gen_xorv4si3 (x, t1, t2));
-
- code = GT;
- }
- break;
-
- case V16QImode:
- case V8HImode:
- /* Perform a parallel unsigned saturating subtraction. */
- x = gen_reg_rtx (mode);
- emit_insn (gen_rtx_SET (VOIDmode, x,
- gen_rtx_US_MINUS (mode, cop0, cop1)));
-
- code = EQ;
- negate = !negate;
- break;
-
- default:
- gcc_unreachable ();
- }
-
- cop0 = x;
- cop1 = CONST0_RTX (mode);
- }
-
- x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
- operands[1+negate], operands[2-negate]);
-
- ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
- operands[2-negate]);
- return true;
-}
-
-/* APPLE LOCAL begin 5612787 mainline sse4 */
-/* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
- true if we should do zero extension, else sign extension. HIGH_P is
- true if we want the N/2 high elements, else the low elements. */
-
-void
-ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
-{
- enum machine_mode imode = GET_MODE (operands[1]);
- rtx (*unpack)(rtx, rtx, rtx);
- rtx se, dest;
-
- switch (imode)
- {
- case V16QImode:
- if (high_p)
- unpack = gen_vec_interleave_highv16qi;
- else
- unpack = gen_vec_interleave_lowv16qi;
- break;
- case V8HImode:
- if (high_p)
- unpack = gen_vec_interleave_highv8hi;
- else
- unpack = gen_vec_interleave_lowv8hi;
- break;
- case V4SImode:
- if (high_p)
- unpack = gen_vec_interleave_highv4si;
- else
- unpack = gen_vec_interleave_lowv4si;
- break;
- default:
- gcc_unreachable ();
- }
-
- dest = gen_lowpart (imode, operands[0]);
-
- if (unsigned_p)
- se = force_reg (imode, CONST0_RTX (imode));
- else
- se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
- operands[1], pc_rtx, pc_rtx);
-
- emit_insn (unpack (dest, operands[1], se));
-}
-
-/* This function performs the same task as ix86_expand_sse_unpack,
- but with SSE4.1 instructions. */
-
-void
-ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
-{
- enum machine_mode imode = GET_MODE (operands[1]);
- rtx (*unpack)(rtx, rtx);
- rtx src, dest;
-
- switch (imode)
- {
- case V16QImode:
- if (unsigned_p)
- unpack = gen_sse4_1_zero_extendv8qiv8hi2;
- else
- unpack = gen_sse4_1_extendv8qiv8hi2;
- break;
- case V8HImode:
- if (unsigned_p)
- unpack = gen_sse4_1_zero_extendv4hiv4si2;
- else
- unpack = gen_sse4_1_extendv4hiv4si2;
- break;
- case V4SImode:
- if (unsigned_p)
- unpack = gen_sse4_1_zero_extendv2siv2di2;
- else
- unpack = gen_sse4_1_extendv2siv2di2;
- break;
- default:
- gcc_unreachable ();
- }
-
- dest = operands[0];
- if (high_p)
- {
- /* Shift higher 8 bytes to lower 8 bytes. */
- src = gen_reg_rtx (imode);
- emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src),
- gen_lowpart (TImode, operands[1]),
- GEN_INT (64)));
- }
- else
- src = operands[1];
-
- emit_insn (unpack (dest, src));
-}
-/* APPLE LOCAL end 5612787 mainline sse4 */
-
-/* Expand conditional increment or decrement using adb/sbb instructions.
- The default case using setcc followed by the conditional move can be
- done by generic code. */
-int
-ix86_expand_int_addcc (rtx operands[])
-{
- enum rtx_code code = GET_CODE (operands[1]);
- rtx compare_op;
- rtx val = const0_rtx;
- bool fpcmp = false;
- enum machine_mode mode = GET_MODE (operands[0]);
-
- if (operands[3] != const1_rtx
- && operands[3] != constm1_rtx)
- return 0;
- if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
- ix86_compare_op1, &compare_op))
- return 0;
- code = GET_CODE (compare_op);
-
- if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
- || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
- {
- fpcmp = true;
- code = ix86_fp_compare_code_to_integer (code);
- }
-
- if (code != LTU)
- {
- val = constm1_rtx;
- if (fpcmp)
- PUT_CODE (compare_op,
- reverse_condition_maybe_unordered
- (GET_CODE (compare_op)));
- else
- PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
- }
- PUT_MODE (compare_op, mode);
-
- /* Construct either adc or sbb insn. */
- if ((code == LTU) == (operands[3] == constm1_rtx))
- {
- switch (GET_MODE (operands[0]))
- {
- case QImode:
- emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
- break;
- case HImode:
- emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
- break;
- case SImode:
- emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
- break;
- case DImode:
- emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
- break;
- default:
- gcc_unreachable ();
- }
- }
- else
- {
- switch (GET_MODE (operands[0]))
- {
- case QImode:
- emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
- break;
- case HImode:
- emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
- break;
- case SImode:
- emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
- break;
- case DImode:
- emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
- break;
- default:
- gcc_unreachable ();
- }
- }
- return 1; /* DONE */
-}
-
-
-/* Split operands 0 and 1 into SImode parts. Similar to split_di, but
- works for floating pointer parameters and nonoffsetable memories.
- For pushes, it returns just stack offsets; the values will be saved
- in the right order. Maximally three parts are generated. */
-
-static int
-ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
-{
- int size;
-
- if (!TARGET_64BIT)
- size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
- else
- size = (GET_MODE_SIZE (mode) + 4) / 8;
-
- gcc_assert (GET_CODE (operand) != REG || !MMX_REGNO_P (REGNO (operand)));
- gcc_assert (size >= 2 && size <= 3);
-
- /* Optimize constant pool reference to immediates. This is used by fp
- moves, that force all constants to memory to allow combining. */
- if (GET_CODE (operand) == MEM && MEM_READONLY_P (operand))
- {
- rtx tmp = maybe_get_pool_constant (operand);
- if (tmp)
- operand = tmp;
- }
-
- if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
- {
- /* The only non-offsetable memories we handle are pushes. */
- int ok = push_operand (operand, VOIDmode);
-
- gcc_assert (ok);
-
- operand = copy_rtx (operand);
- PUT_MODE (operand, Pmode);
- parts[0] = parts[1] = parts[2] = operand;
- return size;
- }
-
- if (GET_CODE (operand) == CONST_VECTOR)
- {
- enum machine_mode imode = int_mode_for_mode (mode);
- /* Caution: if we looked through a constant pool memory above,
- the operand may actually have a different mode now. That's
- ok, since we want to pun this all the way back to an integer. */
- operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
- gcc_assert (operand != NULL);
- mode = imode;
- }
-
- if (!TARGET_64BIT)
- {
- if (mode == DImode)
- split_di (&operand, 1, &parts[0], &parts[1]);
- else
- {
- if (REG_P (operand))
- {
- gcc_assert (reload_completed);
- parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
- parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
- if (size == 3)
- parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
- }
- else if (offsettable_memref_p (operand))
- {
- operand = adjust_address (operand, SImode, 0);
- parts[0] = operand;
- parts[1] = adjust_address (operand, SImode, 4);
- if (size == 3)
- parts[2] = adjust_address (operand, SImode, 8);
- }
- else if (GET_CODE (operand) == CONST_DOUBLE)
- {
- REAL_VALUE_TYPE r;
- long l[4];
-
- REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
- switch (mode)
- {
- case XFmode:
- REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
- parts[2] = gen_int_mode (l[2], SImode);
- break;
- case DFmode:
- REAL_VALUE_TO_TARGET_DOUBLE (r, l);
- break;
- default:
- gcc_unreachable ();
- }
- parts[1] = gen_int_mode (l[1], SImode);
- parts[0] = gen_int_mode (l[0], SImode);
- }
- else
- gcc_unreachable ();
- }
- }
- else
- {
- if (mode == TImode)
- split_ti (&operand, 1, &parts[0], &parts[1]);
- if (mode == XFmode || mode == TFmode)
- {
- enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
- if (REG_P (operand))
- {
- gcc_assert (reload_completed);
- parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
- parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
- }
- else if (offsettable_memref_p (operand))
- {
- operand = adjust_address (operand, DImode, 0);
- parts[0] = operand;
- parts[1] = adjust_address (operand, upper_mode, 8);
- }
- else if (GET_CODE (operand) == CONST_DOUBLE)
- {
- REAL_VALUE_TYPE r;
- long l[4];
-
- REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
- real_to_target (l, &r, mode);
-
- /* Do not use shift by 32 to avoid warning on 32bit systems. */
- if (HOST_BITS_PER_WIDE_INT >= 64)
- parts[0]
- = gen_int_mode
- ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
- + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
- DImode);
- else
- parts[0] = immed_double_const (l[0], l[1], DImode);
-
- if (upper_mode == SImode)
- parts[1] = gen_int_mode (l[2], SImode);
- else if (HOST_BITS_PER_WIDE_INT >= 64)
- parts[1]
- = gen_int_mode
- ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
- + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
- DImode);
- else
- parts[1] = immed_double_const (l[2], l[3], DImode);
- }
- else
- gcc_unreachable ();
- }
- }
-
- return size;
-}
-
-/* Emit insns to perform a move or push of DI, DF, and XF values.
- Return false when normal moves are needed; true when all required
- insns have been emitted. Operands 2-4 contain the input values
- int the correct order; operands 5-7 contain the output values. */
-
-void
-ix86_split_long_move (rtx operands[])
-{
- rtx part[2][3];
- int nparts;
- int push = 0;
- int collisions = 0;
- enum machine_mode mode = GET_MODE (operands[0]);
-
- /* The DFmode expanders may ask us to move double.
- For 64bit target this is single move. By hiding the fact
- here we simplify i386.md splitters. */
- if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
- {
- /* Optimize constant pool reference to immediates. This is used by
- fp moves, that force all constants to memory to allow combining. */
-
- if (GET_CODE (operands[1]) == MEM
- && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
- && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
- operands[1] = get_pool_constant (XEXP (operands[1], 0));
- if (push_operand (operands[0], VOIDmode))
- {
- operands[0] = copy_rtx (operands[0]);
- PUT_MODE (operands[0], Pmode);
- }
- else
- operands[0] = gen_lowpart (DImode, operands[0]);
- operands[1] = gen_lowpart (DImode, operands[1]);
- emit_move_insn (operands[0], operands[1]);
- return;
- }
-
- /* The only non-offsettable memory we handle is push. */
- if (push_operand (operands[0], VOIDmode))
- push = 1;
- else
- gcc_assert (GET_CODE (operands[0]) != MEM
- || offsettable_memref_p (operands[0]));
-
- nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
- ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
-
- /* When emitting push, take care for source operands on the stack. */
- if (push && GET_CODE (operands[1]) == MEM
- && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
- {
- /* APPLE LOCAL begin 4099768 */
- if (nparts == 3 && TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
- part[1][2] = adjust_address (part[1][2], SImode, 4);
- /* APPLE LOCAL end 4099768 */
- if (nparts == 3)
- part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
- XEXP (part[1][2], 0));
- part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
- XEXP (part[1][1], 0));
- }
-
- /* We need to do copy in the right order in case an address register
- of the source overlaps the destination. */
- if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
- {
- if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
- collisions++;
- if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
- collisions++;
- if (nparts == 3
- && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
- collisions++;
-
- /* Collision in the middle part can be handled by reordering. */
- if (collisions == 1 && nparts == 3
- && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
- {
- rtx tmp;
- tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
- tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
- }
-
- /* If there are more collisions, we can't handle it by reordering.
- Do an lea to the last part and use only one colliding move. */
- else if (collisions > 1)
- {
- rtx base;
-
- collisions = 1;
-
- base = part[0][nparts - 1];
-
- /* Handle the case when the last part isn't valid for lea.
- Happens in 64-bit mode storing the 12-byte XFmode. */
- if (GET_MODE (base) != Pmode)
- base = gen_rtx_REG (Pmode, REGNO (base));
-
- emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
- part[1][0] = replace_equiv_address (part[1][0], base);
- part[1][1] = replace_equiv_address (part[1][1],
- plus_constant (base, UNITS_PER_WORD));
- if (nparts == 3)
- part[1][2] = replace_equiv_address (part[1][2],
- plus_constant (base, 8));
- }
- }
-
- if (push)
- {
- if (!TARGET_64BIT)
- {
- if (nparts == 3)
- {
- if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
- emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
- emit_move_insn (part[0][2], part[1][2]);
- }
- }
- else
- {
- /* In 64bit mode we don't have 32bit push available. In case this is
- register, it is OK - we will just use larger counterpart. We also
- retype memory - these comes from attempt to avoid REX prefix on
- moving of second half of TFmode value. */
- if (GET_MODE (part[1][1]) == SImode)
- {
- switch (GET_CODE (part[1][1]))
- {
- case MEM:
- part[1][1] = adjust_address (part[1][1], DImode, 0);
- break;
-
- case REG:
- part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
- break;
-
- default:
- gcc_unreachable ();
- }
-
- if (GET_MODE (part[1][0]) == SImode)
- part[1][0] = part[1][1];
- }
- }
- emit_move_insn (part[0][1], part[1][1]);
- emit_move_insn (part[0][0], part[1][0]);
- return;
- }
-
- /* Choose correct order to not overwrite the source before it is copied. */
- if ((REG_P (part[0][0])
- && REG_P (part[1][1])
- && (REGNO (part[0][0]) == REGNO (part[1][1])
- || (nparts == 3
- && REGNO (part[0][0]) == REGNO (part[1][2]))))
- || (collisions > 0
- && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
- {
- if (nparts == 3)
- {
- operands[2] = part[0][2];
- operands[3] = part[0][1];
- operands[4] = part[0][0];
- operands[5] = part[1][2];
- operands[6] = part[1][1];
- operands[7] = part[1][0];
- }
- else
- {
- operands[2] = part[0][1];
- operands[3] = part[0][0];
- operands[5] = part[1][1];
- operands[6] = part[1][0];
- }
- }
- else
- {
- if (nparts == 3)
- {
- operands[2] = part[0][0];
- operands[3] = part[0][1];
- operands[4] = part[0][2];
- operands[5] = part[1][0];
- operands[6] = part[1][1];
- operands[7] = part[1][2];
- }
- else
- {
- operands[2] = part[0][0];
- operands[3] = part[0][1];
- operands[5] = part[1][0];
- operands[6] = part[1][1];
- }
- }
-
- /* If optimizing for size, attempt to locally unCSE nonzero constants. */
- if (optimize_size)
- {
- if (GET_CODE (operands[5]) == CONST_INT
- && operands[5] != const0_rtx
- && REG_P (operands[2]))
- {
- if (GET_CODE (operands[6]) == CONST_INT
- && INTVAL (operands[6]) == INTVAL (operands[5]))
- operands[6] = operands[2];
-
- if (nparts == 3
- && GET_CODE (operands[7]) == CONST_INT
- && INTVAL (operands[7]) == INTVAL (operands[5]))
- operands[7] = operands[2];
- }
-
- if (nparts == 3
- && GET_CODE (operands[6]) == CONST_INT
- && operands[6] != const0_rtx
- && REG_P (operands[3])
- && GET_CODE (operands[7]) == CONST_INT
- && INTVAL (operands[7]) == INTVAL (operands[6]))
- operands[7] = operands[3];
- }
-
- emit_move_insn (operands[2], operands[5]);
- emit_move_insn (operands[3], operands[6]);
- if (nparts == 3)
- emit_move_insn (operands[4], operands[7]);
-
- return;
-}
-
-/* Helper function of ix86_split_ashl used to generate an SImode/DImode
- left shift by a constant, either using a single shift or
- a sequence of add instructions. */
-
-static void
-ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
-{
- if (count == 1)
- {
- emit_insn ((mode == DImode
- ? gen_addsi3
- : gen_adddi3) (operand, operand, operand));
- }
- else if (!optimize_size
- && count * ix86_cost->add <= ix86_cost->shift_const)
- {
- int i;
- for (i=0; i<count; i++)
- {
- emit_insn ((mode == DImode
- ? gen_addsi3
- : gen_adddi3) (operand, operand, operand));
- }
- }
- else
- emit_insn ((mode == DImode
- ? gen_ashlsi3
- : gen_ashldi3) (operand, operand, GEN_INT (count)));
-}
-
-void
-ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
-{
- rtx low[2], high[2];
- int count;
- const int single_width = mode == DImode ? 32 : 64;
-
- if (GET_CODE (operands[2]) == CONST_INT)
- {
- (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
- count = INTVAL (operands[2]) & (single_width * 2 - 1);
-
- if (count >= single_width)
- {
- emit_move_insn (high[0], low[1]);
- emit_move_insn (low[0], const0_rtx);
-
- if (count > single_width)
- ix86_expand_ashl_const (high[0], count - single_width, mode);
- }
- else
- {
- if (!rtx_equal_p (operands[0], operands[1]))
- emit_move_insn (operands[0], operands[1]);
- emit_insn ((mode == DImode
- ? gen_x86_shld_1
- : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
- ix86_expand_ashl_const (low[0], count, mode);
- }
- return;
- }
-
- (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
-
- if (operands[1] == const1_rtx)
- {
- /* Assuming we've chosen a QImode capable registers, then 1 << N
- can be done with two 32/64-bit shifts, no branches, no cmoves. */
- if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
- {
- rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
-
- ix86_expand_clear (low[0]);
- ix86_expand_clear (high[0]);
- emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
-
- d = gen_lowpart (QImode, low[0]);
- d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
- s = gen_rtx_EQ (QImode, flags, const0_rtx);
- emit_insn (gen_rtx_SET (VOIDmode, d, s));
-
- d = gen_lowpart (QImode, high[0]);
- d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
- s = gen_rtx_NE (QImode, flags, const0_rtx);
- emit_insn (gen_rtx_SET (VOIDmode, d, s));
- }
-
- /* Otherwise, we can get the same results by manually performing
- a bit extract operation on bit 5/6, and then performing the two
- shifts. The two methods of getting 0/1 into low/high are exactly
- the same size. Avoiding the shift in the bit extract case helps
- pentium4 a bit; no one else seems to care much either way. */
- else
- {
- rtx x;
-
- if (TARGET_PARTIAL_REG_STALL && !optimize_size)
- x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
- else
- x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
- emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
-
- emit_insn ((mode == DImode
- ? gen_lshrsi3
- : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
- emit_insn ((mode == DImode
- ? gen_andsi3
- : gen_anddi3) (high[0], high[0], GEN_INT (1)));
- emit_move_insn (low[0], high[0]);
- emit_insn ((mode == DImode
- ? gen_xorsi3
- : gen_xordi3) (low[0], low[0], GEN_INT (1)));
- }
-
- emit_insn ((mode == DImode
- ? gen_ashlsi3
- : gen_ashldi3) (low[0], low[0], operands[2]));
- emit_insn ((mode == DImode
- ? gen_ashlsi3
- : gen_ashldi3) (high[0], high[0], operands[2]));
- return;
- }
-
- if (operands[1] == constm1_rtx)
- {
- /* For -1 << N, we can avoid the shld instruction, because we
- know that we're shifting 0...31/63 ones into a -1. */
- emit_move_insn (low[0], constm1_rtx);
- if (optimize_size)
- emit_move_insn (high[0], low[0]);
- else
- emit_move_insn (high[0], constm1_rtx);
- }
- else
- {
- if (!rtx_equal_p (operands[0], operands[1]))
- emit_move_insn (operands[0], operands[1]);
-
- (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
- emit_insn ((mode == DImode
- ? gen_x86_shld_1
- : gen_x86_64_shld) (high[0], low[0], operands[2]));
- }
-
- emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
-
- if (TARGET_CMOVE && scratch)
- {
- ix86_expand_clear (scratch);
- emit_insn ((mode == DImode
- ? gen_x86_shift_adj_1
- : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
- }
- else
- emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
-}
-
-void
-ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
-{
- rtx low[2], high[2];
- int count;
- const int single_width = mode == DImode ? 32 : 64;
-
- if (GET_CODE (operands[2]) == CONST_INT)
- {
- (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
- count = INTVAL (operands[2]) & (single_width * 2 - 1);
-
- if (count == single_width * 2 - 1)
- {
- emit_move_insn (high[0], high[1]);
- emit_insn ((mode == DImode
- ? gen_ashrsi3
- : gen_ashrdi3) (high[0], high[0],
- GEN_INT (single_width - 1)));
- emit_move_insn (low[0], high[0]);
-
- }
- else if (count >= single_width)
- {
- emit_move_insn (low[0], high[1]);
- emit_move_insn (high[0], low[0]);
- emit_insn ((mode == DImode
- ? gen_ashrsi3
- : gen_ashrdi3) (high[0], high[0],
- GEN_INT (single_width - 1)));
- if (count > single_width)
- emit_insn ((mode == DImode
- ? gen_ashrsi3
- : gen_ashrdi3) (low[0], low[0],
- GEN_INT (count - single_width)));
- }
- else
- {
- if (!rtx_equal_p (operands[0], operands[1]))
- emit_move_insn (operands[0], operands[1]);
- emit_insn ((mode == DImode
- ? gen_x86_shrd_1
- : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
- emit_insn ((mode == DImode
- ? gen_ashrsi3
- : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
- }
- }
- else
- {
- if (!rtx_equal_p (operands[0], operands[1]))
- emit_move_insn (operands[0], operands[1]);
-
- (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
-
- emit_insn ((mode == DImode
- ? gen_x86_shrd_1
- : gen_x86_64_shrd) (low[0], high[0], operands[2]));
- emit_insn ((mode == DImode
- ? gen_ashrsi3
- : gen_ashrdi3) (high[0], high[0], operands[2]));
-
- if (TARGET_CMOVE && scratch)
- {
- emit_move_insn (scratch, high[0]);
- emit_insn ((mode == DImode
- ? gen_ashrsi3
- : gen_ashrdi3) (scratch, scratch,
- GEN_INT (single_width - 1)));
- emit_insn ((mode == DImode
- ? gen_x86_shift_adj_1
- : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
- scratch));
- }
- else
- emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
- }
-}
-
-void
-ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
-{
- rtx low[2], high[2];
- int count;
- const int single_width = mode == DImode ? 32 : 64;
-
- if (GET_CODE (operands[2]) == CONST_INT)
- {
- (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
- count = INTVAL (operands[2]) & (single_width * 2 - 1);
-
- if (count >= single_width)
- {
- emit_move_insn (low[0], high[1]);
- ix86_expand_clear (high[0]);
-
- if (count > single_width)
- emit_insn ((mode == DImode
- ? gen_lshrsi3
- : gen_lshrdi3) (low[0], low[0],
- GEN_INT (count - single_width)));
- }
- else
- {
- if (!rtx_equal_p (operands[0], operands[1]))
- emit_move_insn (operands[0], operands[1]);
- emit_insn ((mode == DImode
- ? gen_x86_shrd_1
- : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
- emit_insn ((mode == DImode
- ? gen_lshrsi3
- : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
- }
- }
- else
- {
- if (!rtx_equal_p (operands[0], operands[1]))
- emit_move_insn (operands[0], operands[1]);
-
- (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
-
- emit_insn ((mode == DImode
- ? gen_x86_shrd_1
- : gen_x86_64_shrd) (low[0], high[0], operands[2]));
- emit_insn ((mode == DImode
- ? gen_lshrsi3
- : gen_lshrdi3) (high[0], high[0], operands[2]));
-
- /* Heh. By reversing the arguments, we can reuse this pattern. */
- if (TARGET_CMOVE && scratch)
- {
- ix86_expand_clear (scratch);
- emit_insn ((mode == DImode
- ? gen_x86_shift_adj_1
- : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
- scratch));
- }
- else
- emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
- }
-}
-
-/* Helper function for the string operations below. Dest VARIABLE whether
- it is aligned to VALUE bytes. If true, jump to the label. */
-static rtx
-ix86_expand_aligntest (rtx variable, int value)
-{
- rtx label = gen_label_rtx ();
- rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
- if (GET_MODE (variable) == DImode)
- emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
- else
- emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
- emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
- 1, label);
- return label;
-}
-
-/* Adjust COUNTER by the VALUE. */
-static void
-ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
-{
- if (GET_MODE (countreg) == DImode)
- emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
- else
- emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
-}
-
-/* Zero extend possibly SImode EXP to Pmode register. */
-rtx
-ix86_zero_extend_to_Pmode (rtx exp)
-{
- rtx r;
- if (GET_MODE (exp) == VOIDmode)
- return force_reg (Pmode, exp);
- if (GET_MODE (exp) == Pmode)
- return copy_to_mode_reg (Pmode, exp);
- r = gen_reg_rtx (Pmode);
- emit_insn (gen_zero_extendsidi2 (r, exp));
- return r;
-}
-
-/* Expand string move (memcpy) operation. Use i386 string operations when
- profitable. expand_clrmem contains similar code. */
-int
-ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp)
-{
- rtx srcreg, destreg, countreg, srcexp, destexp;
- enum machine_mode counter_mode;
- HOST_WIDE_INT align = 0;
- unsigned HOST_WIDE_INT count = 0;
-
- if (GET_CODE (align_exp) == CONST_INT)
- align = INTVAL (align_exp);
-
- /* Can't use any of this if the user has appropriated esi or edi. */
- if (global_regs[4] || global_regs[5])
- return 0;
-
- /* This simple hack avoids all inlining code and simplifies code below. */
- if (!TARGET_ALIGN_STRINGOPS)
- align = 64;
-
- if (GET_CODE (count_exp) == CONST_INT)
- {
- count = INTVAL (count_exp);
- if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
- return 0;
- }
-
- /* Figure out proper mode for counter. For 32bits it is always SImode,
- for 64bits use SImode when possible, otherwise DImode.
- Set count to number of bytes copied when known at compile time. */
- if (!TARGET_64BIT
- || GET_MODE (count_exp) == SImode
- || x86_64_zext_immediate_operand (count_exp, VOIDmode))
- counter_mode = SImode;
- else
- counter_mode = DImode;
-
- gcc_assert (counter_mode == SImode || counter_mode == DImode);
-
- destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
- if (destreg != XEXP (dst, 0))
- dst = replace_equiv_address_nv (dst, destreg);
- srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
- if (srcreg != XEXP (src, 0))
- src = replace_equiv_address_nv (src, srcreg);
-
- /* When optimizing for size emit simple rep ; movsb instruction for
- counts not divisible by 4, except when (movsl;)*(movsw;)?(movsb;)?
- sequence is shorter than mov{b,l} $count, %{ecx,cl}; rep; movsb.
- Sice of (movsl;)*(movsw;)?(movsb;)? sequence is
- count / 4 + (count & 3), the other sequence is either 4 or 7 bytes,
- but we don't know whether upper 24 (resp. 56) bits of %ecx will be
- known to be zero or not. The rep; movsb sequence causes higher
- register pressure though, so take that into account. */
-
- if ((!optimize || optimize_size)
- && (count == 0
- || ((count & 0x03)
- && (!optimize_size
- || count > 5 * 4
- || (count & 3) + count / 4 > 6))))
- {
- emit_insn (gen_cld ());
- countreg = ix86_zero_extend_to_Pmode (count_exp);
- destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
- srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
- emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
- destexp, srcexp));
- }
-
- /* For constant aligned (or small unaligned) copies use rep movsl
- followed by code copying the rest. For PentiumPro ensure 8 byte
- alignment to allow rep movsl acceleration. */
-
- else if (count != 0
- && (align >= 8
- || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
- || optimize_size || count < (unsigned int) 64))
- {
- unsigned HOST_WIDE_INT offset = 0;
- int size = TARGET_64BIT && !optimize_size ? 8 : 4;
- rtx srcmem, dstmem;
-
- emit_insn (gen_cld ());
- if (count & ~(size - 1))
- {
- if ((TARGET_SINGLE_STRINGOP || optimize_size) && count < 5 * 4)
- {
- enum machine_mode movs_mode = size == 4 ? SImode : DImode;
-
- while (offset < (count & ~(size - 1)))
- {
- srcmem = adjust_automodify_address_nv (src, movs_mode,
- srcreg, offset);
- dstmem = adjust_automodify_address_nv (dst, movs_mode,
- destreg, offset);
- emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
- offset += size;
- }
- }
- else
- {
- countreg = GEN_INT ((count >> (size == 4 ? 2 : 3))
- & (TARGET_64BIT ? -1 : 0x3fffffff));
- countreg = copy_to_mode_reg (counter_mode, countreg);
- countreg = ix86_zero_extend_to_Pmode (countreg);
-
- destexp = gen_rtx_ASHIFT (Pmode, countreg,
- GEN_INT (size == 4 ? 2 : 3));
- srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
- destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
-
- emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
- countreg, destexp, srcexp));
- offset = count & ~(size - 1);
- }
- }
- if (size == 8 && (count & 0x04))
- {
- srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
- offset);
- dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
- offset);
- emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
- offset += 4;
- }
- if (count & 0x02)
- {
- srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
- offset);
- dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
- offset);
- emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
- offset += 2;
- }
- if (count & 0x01)
- {
- srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
- offset);
- dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
- offset);
- emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
- }
- }
- /* The generic code based on the glibc implementation:
- - align destination to 4 bytes (8 byte alignment is used for PentiumPro
- allowing accelerated copying there)
- - copy the data using rep movsl
- - copy the rest. */
- else
- {
- rtx countreg2;
- rtx label = NULL;
- rtx srcmem, dstmem;
- int desired_alignment = (TARGET_PENTIUMPRO
- && (count == 0 || count >= (unsigned int) 260)
- ? 8 : UNITS_PER_WORD);
- /* Get rid of MEM_OFFSETs, they won't be accurate. */
- dst = change_address (dst, BLKmode, destreg);
- src = change_address (src, BLKmode, srcreg);
-
- /* In case we don't know anything about the alignment, default to
- library version, since it is usually equally fast and result in
- shorter code.
-
- Also emit call when we know that the count is large and call overhead
- will not be important. */
- if (!TARGET_INLINE_ALL_STRINGOPS
- && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
- return 0;
-
- if (TARGET_SINGLE_STRINGOP)
- emit_insn (gen_cld ());
-
- countreg2 = gen_reg_rtx (Pmode);
- countreg = copy_to_mode_reg (counter_mode, count_exp);
-
- /* We don't use loops to align destination and to copy parts smaller
- than 4 bytes, because gcc is able to optimize such code better (in
- the case the destination or the count really is aligned, gcc is often
- able to predict the branches) and also it is friendlier to the
- hardware branch prediction.
-
- Using loops is beneficial for generic case, because we can
- handle small counts using the loops. Many CPUs (such as Athlon)
- have large REP prefix setup costs.
-
- This is quite costly. Maybe we can revisit this decision later or
- add some customizability to this code. */
-
- if (count == 0 && align < desired_alignment)
- {
- label = gen_label_rtx ();
- emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
- LEU, 0, counter_mode, 1, label);
- }
- if (align <= 1)
- {
- rtx label = ix86_expand_aligntest (destreg, 1);
- srcmem = change_address (src, QImode, srcreg);
- dstmem = change_address (dst, QImode, destreg);
- emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
- ix86_adjust_counter (countreg, 1);
- emit_label (label);
- LABEL_NUSES (label) = 1;
- }
- if (align <= 2)
- {
- rtx label = ix86_expand_aligntest (destreg, 2);
- srcmem = change_address (src, HImode, srcreg);
- dstmem = change_address (dst, HImode, destreg);
- emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
- ix86_adjust_counter (countreg, 2);
- emit_label (label);
- LABEL_NUSES (label) = 1;
- }
- if (align <= 4 && desired_alignment > 4)
- {
- rtx label = ix86_expand_aligntest (destreg, 4);
- srcmem = change_address (src, SImode, srcreg);
- dstmem = change_address (dst, SImode, destreg);
- emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
- ix86_adjust_counter (countreg, 4);
- emit_label (label);
- LABEL_NUSES (label) = 1;
- }
-
- if (label && desired_alignment > 4 && !TARGET_64BIT)
- {
- emit_label (label);
- LABEL_NUSES (label) = 1;
- label = NULL_RTX;
- }
- if (!TARGET_SINGLE_STRINGOP)
- emit_insn (gen_cld ());
- if (TARGET_64BIT)
- {
- emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
- GEN_INT (3)));
- destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
- }
- else
- {
- emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
- destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
- }
- srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
- destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
- emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
- countreg2, destexp, srcexp));
-
- if (label)
- {
- emit_label (label);
- LABEL_NUSES (label) = 1;
- }
- if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
- {
- srcmem = change_address (src, SImode, srcreg);
- dstmem = change_address (dst, SImode, destreg);
- emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
- }
- if ((align <= 4 || count == 0) && TARGET_64BIT)
- {
- rtx label = ix86_expand_aligntest (countreg, 4);
- srcmem = change_address (src, SImode, srcreg);
- dstmem = change_address (dst, SImode, destreg);
- emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
- emit_label (label);
- LABEL_NUSES (label) = 1;
- }
- if (align > 2 && count != 0 && (count & 2))
- {
- srcmem = change_address (src, HImode, srcreg);
- dstmem = change_address (dst, HImode, destreg);
- emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
- }
- if (align <= 2 || count == 0)
- {
- rtx label = ix86_expand_aligntest (countreg, 2);
- srcmem = change_address (src, HImode, srcreg);
- dstmem = change_address (dst, HImode, destreg);
- emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
- emit_label (label);
- LABEL_NUSES (label) = 1;
- }
- if (align > 1 && count != 0 && (count & 1))
- {
- srcmem = change_address (src, QImode, srcreg);
- dstmem = change_address (dst, QImode, destreg);
- emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
- }
- if (align <= 1 || count == 0)
- {
- rtx label = ix86_expand_aligntest (countreg, 1);
- srcmem = change_address (src, QImode, srcreg);
- dstmem = change_address (dst, QImode, destreg);
- emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
- emit_label (label);
- LABEL_NUSES (label) = 1;
- }
- }
-
- return 1;
-}
-
-/* Expand string clear operation (bzero). Use i386 string operations when
- profitable. expand_movmem contains similar code. */
-int
-ix86_expand_clrmem (rtx dst, rtx count_exp, rtx align_exp)
-{
- rtx destreg, zeroreg, countreg, destexp;
- enum machine_mode counter_mode;
- HOST_WIDE_INT align = 0;
- unsigned HOST_WIDE_INT count = 0;
-
- if (GET_CODE (align_exp) == CONST_INT)
- align = INTVAL (align_exp);
-
- /* Can't use any of this if the user has appropriated esi. */
- if (global_regs[4])
- return 0;
-
- /* This simple hack avoids all inlining code and simplifies code below. */
- if (!TARGET_ALIGN_STRINGOPS)
- align = 32;
-
- if (GET_CODE (count_exp) == CONST_INT)
- {
- count = INTVAL (count_exp);
- if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
- return 0;
- }
- /* Figure out proper mode for counter. For 32bits it is always SImode,
- for 64bits use SImode when possible, otherwise DImode.
- Set count to number of bytes copied when known at compile time. */
- if (!TARGET_64BIT
- || GET_MODE (count_exp) == SImode
- || x86_64_zext_immediate_operand (count_exp, VOIDmode))
- counter_mode = SImode;
- else
- counter_mode = DImode;
-
- destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
- if (destreg != XEXP (dst, 0))
- dst = replace_equiv_address_nv (dst, destreg);
-
-
- /* When optimizing for size emit simple rep ; movsb instruction for
- counts not divisible by 4. The movl $N, %ecx; rep; stosb
- sequence is 7 bytes long, so if optimizing for size and count is
- small enough that some stosl, stosw and stosb instructions without
- rep are shorter, fall back into the next if. */
-
- if ((!optimize || optimize_size)
- && (count == 0
- || ((count & 0x03)
- && (!optimize_size || (count & 0x03) + (count >> 2) > 7))))
- {
- emit_insn (gen_cld ());
-
- countreg = ix86_zero_extend_to_Pmode (count_exp);
- zeroreg = copy_to_mode_reg (QImode, const0_rtx);
- destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
- emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
- }
- else if (count != 0
- && (align >= 8
- || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
- || optimize_size || count < (unsigned int) 64))
- {
- int size = TARGET_64BIT && !optimize_size ? 8 : 4;
- unsigned HOST_WIDE_INT offset = 0;
-
- emit_insn (gen_cld ());
-
- zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
- if (count & ~(size - 1))
- {
- unsigned HOST_WIDE_INT repcount;
- unsigned int max_nonrep;
-
- repcount = count >> (size == 4 ? 2 : 3);
- if (!TARGET_64BIT)
- repcount &= 0x3fffffff;
-
- /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes.
- movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN
- bytes. In both cases the latter seems to be faster for small
- values of N. */
- max_nonrep = size == 4 ? 7 : 4;
- if (!optimize_size)
- switch (ix86_tune)
- {
- case PROCESSOR_PENTIUM4:
- case PROCESSOR_NOCONA:
- max_nonrep = 3;
- break;
- default:
- break;
- }
-
- if (repcount <= max_nonrep)
- while (repcount-- > 0)
- {
- rtx mem = adjust_automodify_address_nv (dst,
- GET_MODE (zeroreg),
- destreg, offset);
- emit_insn (gen_strset (destreg, mem, zeroreg));
- offset += size;
- }
- else
- {
- countreg = copy_to_mode_reg (counter_mode, GEN_INT (repcount));
- countreg = ix86_zero_extend_to_Pmode (countreg);
- destexp = gen_rtx_ASHIFT (Pmode, countreg,
- GEN_INT (size == 4 ? 2 : 3));
- destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
- emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg,
- destexp));
- offset = count & ~(size - 1);
- }
- }
- if (size == 8 && (count & 0x04))
- {
- rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
- offset);
- emit_insn (gen_strset (destreg, mem,
- gen_rtx_SUBREG (SImode, zeroreg, 0)));
- offset += 4;
- }
- if (count & 0x02)
- {
- rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
- offset);
- emit_insn (gen_strset (destreg, mem,
- gen_rtx_SUBREG (HImode, zeroreg, 0)));
- offset += 2;
- }
- if (count & 0x01)
- {
- rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
- offset);
- emit_insn (gen_strset (destreg, mem,
- gen_rtx_SUBREG (QImode, zeroreg, 0)));
- }
- }
- else
- {
- rtx countreg2;
- rtx label = NULL;
- /* Compute desired alignment of the string operation. */
- int desired_alignment = (TARGET_PENTIUMPRO
- && (count == 0 || count >= (unsigned int) 260)
- ? 8 : UNITS_PER_WORD);
-
- /* In case we don't know anything about the alignment, default to
- library version, since it is usually equally fast and result in
- shorter code.
-
- Also emit call when we know that the count is large and call overhead
- will not be important. */
- if (!TARGET_INLINE_ALL_STRINGOPS
- && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
- return 0;
-
- if (TARGET_SINGLE_STRINGOP)
- emit_insn (gen_cld ());
-
- countreg2 = gen_reg_rtx (Pmode);
- countreg = copy_to_mode_reg (counter_mode, count_exp);
- zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
- /* Get rid of MEM_OFFSET, it won't be accurate. */
- dst = change_address (dst, BLKmode, destreg);
-
- if (count == 0 && align < desired_alignment)
- {
- label = gen_label_rtx ();
- emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
- LEU, 0, counter_mode, 1, label);
- }
- if (align <= 1)
- {
- rtx label = ix86_expand_aligntest (destreg, 1);
- emit_insn (gen_strset (destreg, dst,
- gen_rtx_SUBREG (QImode, zeroreg, 0)));
- ix86_adjust_counter (countreg, 1);
- emit_label (label);
- LABEL_NUSES (label) = 1;
- }
- if (align <= 2)
- {
- rtx label = ix86_expand_aligntest (destreg, 2);
- emit_insn (gen_strset (destreg, dst,
- gen_rtx_SUBREG (HImode, zeroreg, 0)));
- ix86_adjust_counter (countreg, 2);
- emit_label (label);
- LABEL_NUSES (label) = 1;
- }
- if (align <= 4 && desired_alignment > 4)
- {
- rtx label = ix86_expand_aligntest (destreg, 4);
- emit_insn (gen_strset (destreg, dst,
- (TARGET_64BIT
- ? gen_rtx_SUBREG (SImode, zeroreg, 0)
- : zeroreg)));
- ix86_adjust_counter (countreg, 4);
- emit_label (label);
- LABEL_NUSES (label) = 1;
- }
-
- if (label && desired_alignment > 4 && !TARGET_64BIT)
- {
- emit_label (label);
- LABEL_NUSES (label) = 1;
- label = NULL_RTX;
- }
-
- if (!TARGET_SINGLE_STRINGOP)
- emit_insn (gen_cld ());
- if (TARGET_64BIT)
- {
- emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
- GEN_INT (3)));
- destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
- }
- else
- {
- emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
- destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
- }
- destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
- emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
-
- if (label)
- {
- emit_label (label);
- LABEL_NUSES (label) = 1;
- }
-
- if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
- emit_insn (gen_strset (destreg, dst,
- gen_rtx_SUBREG (SImode, zeroreg, 0)));
- if (TARGET_64BIT && (align <= 4 || count == 0))
- {
- rtx label = ix86_expand_aligntest (countreg, 4);
- emit_insn (gen_strset (destreg, dst,
- gen_rtx_SUBREG (SImode, zeroreg, 0)));
- emit_label (label);
- LABEL_NUSES (label) = 1;
- }
- if (align > 2 && count != 0 && (count & 2))
- emit_insn (gen_strset (destreg, dst,
- gen_rtx_SUBREG (HImode, zeroreg, 0)));
- if (align <= 2 || count == 0)
- {
- rtx label = ix86_expand_aligntest (countreg, 2);
- emit_insn (gen_strset (destreg, dst,
- gen_rtx_SUBREG (HImode, zeroreg, 0)));
- emit_label (label);
- LABEL_NUSES (label) = 1;
- }
- if (align > 1 && count != 0 && (count & 1))
- emit_insn (gen_strset (destreg, dst,
- gen_rtx_SUBREG (QImode, zeroreg, 0)));
- if (align <= 1 || count == 0)
- {
- rtx label = ix86_expand_aligntest (countreg, 1);
- emit_insn (gen_strset (destreg, dst,
- gen_rtx_SUBREG (QImode, zeroreg, 0)));
- emit_label (label);
- LABEL_NUSES (label) = 1;
- }
- }
- return 1;
-}
-
-/* Expand strlen. */
-int
-ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
-{
- rtx addr, scratch1, scratch2, scratch3, scratch4;
-
- /* The generic case of strlen expander is long. Avoid it's
- expanding unless TARGET_INLINE_ALL_STRINGOPS. */
-
- if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
- && !TARGET_INLINE_ALL_STRINGOPS
- && !optimize_size
- && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
- return 0;
-
- addr = force_reg (Pmode, XEXP (src, 0));
- scratch1 = gen_reg_rtx (Pmode);
-
- if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
- && !optimize_size)
- {
- /* Well it seems that some optimizer does not combine a call like
- foo(strlen(bar), strlen(bar));
- when the move and the subtraction is done here. It does calculate
- the length just once when these instructions are done inside of
- output_strlen_unroll(). But I think since &bar[strlen(bar)] is
- often used and I use one fewer register for the lifetime of
- output_strlen_unroll() this is better. */
-
- emit_move_insn (out, addr);
-
- ix86_expand_strlensi_unroll_1 (out, src, align);
-
- /* strlensi_unroll_1 returns the address of the zero at the end of
- the string, like memchr(), so compute the length by subtracting
- the start address. */
- if (TARGET_64BIT)
- emit_insn (gen_subdi3 (out, out, addr));
- else
- emit_insn (gen_subsi3 (out, out, addr));
- }
- else
- {
- rtx unspec;
- scratch2 = gen_reg_rtx (Pmode);
- scratch3 = gen_reg_rtx (Pmode);
- scratch4 = force_reg (Pmode, constm1_rtx);
-
- emit_move_insn (scratch3, addr);
- eoschar = force_reg (QImode, eoschar);
-
- emit_insn (gen_cld ());
- src = replace_equiv_address_nv (src, scratch3);
-
- /* If .md starts supporting :P, this can be done in .md. */
- unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
- scratch4), UNSPEC_SCAS);
- emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
- if (TARGET_64BIT)
- {
- emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
- emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
- }
- else
- {
- emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
- emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
- }
- }
- return 1;
-}
-
-/* Expand the appropriate insns for doing strlen if not just doing
- repnz; scasb
-
- out = result, initialized with the start address
- align_rtx = alignment of the address.
- scratch = scratch register, initialized with the startaddress when
- not aligned, otherwise undefined
-
- This is just the body. It needs the initializations mentioned above and
- some address computing at the end. These things are done in i386.md. */
-
-static void
-ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
-{
- int align;
- rtx tmp;
- rtx align_2_label = NULL_RTX;
- rtx align_3_label = NULL_RTX;
- rtx align_4_label = gen_label_rtx ();
- rtx end_0_label = gen_label_rtx ();
- rtx mem;
- rtx tmpreg = gen_reg_rtx (SImode);
- rtx scratch = gen_reg_rtx (SImode);
- rtx cmp;
-
- align = 0;
- if (GET_CODE (align_rtx) == CONST_INT)
- align = INTVAL (align_rtx);
-
- /* Loop to check 1..3 bytes for null to get an aligned pointer. */
-
- /* Is there a known alignment and is it less than 4? */
- if (align < 4)
- {
- rtx scratch1 = gen_reg_rtx (Pmode);
- emit_move_insn (scratch1, out);
- /* Is there a known alignment and is it not 2? */
- if (align != 2)
- {
- align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
- align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
-
- /* Leave just the 3 lower bits. */
- align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
- NULL_RTX, 0, OPTAB_WIDEN);
-
- emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
- Pmode, 1, align_4_label);
- emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
- Pmode, 1, align_2_label);
- emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
- Pmode, 1, align_3_label);
- }
- else
- {
- /* Since the alignment is 2, we have to check 2 or 0 bytes;
- check if is aligned to 4 - byte. */
-
- align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
- NULL_RTX, 0, OPTAB_WIDEN);
-
- emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
- Pmode, 1, align_4_label);
- }
-
- mem = change_address (src, QImode, out);
-
- /* Now compare the bytes. */
-
- /* Compare the first n unaligned byte on a byte per byte basis. */
- emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
- QImode, 1, end_0_label);
-
- /* Increment the address. */
- if (TARGET_64BIT)
- emit_insn (gen_adddi3 (out, out, const1_rtx));
- else
- emit_insn (gen_addsi3 (out, out, const1_rtx));
-
- /* Not needed with an alignment of 2 */
- if (align != 2)
- {
- emit_label (align_2_label);
-
- emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
- end_0_label);
-
- if (TARGET_64BIT)
- emit_insn (gen_adddi3 (out, out, const1_rtx));
- else
- emit_insn (gen_addsi3 (out, out, const1_rtx));
-
- emit_label (align_3_label);
- }
-
- emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
- end_0_label);
-
- if (TARGET_64BIT)
- emit_insn (gen_adddi3 (out, out, const1_rtx));
- else
- emit_insn (gen_addsi3 (out, out, const1_rtx));
- }
-
- /* Generate loop to check 4 bytes at a time. It is not a good idea to
- align this loop. It gives only huge programs, but does not help to
- speed up. */
- emit_label (align_4_label);
-
- mem = change_address (src, SImode, out);
- emit_move_insn (scratch, mem);
- if (TARGET_64BIT)
- emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
- else
- emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
-
- /* This formula yields a nonzero result iff one of the bytes is zero.
- This saves three branches inside loop and many cycles. */
-
- emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
- emit_insn (gen_one_cmplsi2 (scratch, scratch));
- emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
- emit_insn (gen_andsi3 (tmpreg, tmpreg,
- gen_int_mode (0x80808080, SImode)));
- emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
- align_4_label);
-
- if (TARGET_CMOVE)
- {
- rtx reg = gen_reg_rtx (SImode);
- rtx reg2 = gen_reg_rtx (Pmode);
- emit_move_insn (reg, tmpreg);
- emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
-
- /* If zero is not in the first two bytes, move two bytes forward. */
- emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
- tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
- tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
- emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
- gen_rtx_IF_THEN_ELSE (SImode, tmp,
- reg,
- tmpreg)));
- /* Emit lea manually to avoid clobbering of flags. */
- emit_insn (gen_rtx_SET (SImode, reg2,
- gen_rtx_PLUS (Pmode, out, const2_rtx)));
-
- tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
- tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
- emit_insn (gen_rtx_SET (VOIDmode, out,
- gen_rtx_IF_THEN_ELSE (Pmode, tmp,
- reg2,
- out)));
-
- }
- else
- {
- rtx end_2_label = gen_label_rtx ();
- /* Is zero in the first two bytes? */
-
- emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
- tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
- tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
- tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
- gen_rtx_LABEL_REF (VOIDmode, end_2_label),
- pc_rtx);
- tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
- JUMP_LABEL (tmp) = end_2_label;
-
- /* Not in the first two. Move two bytes forward. */
- emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
- if (TARGET_64BIT)
- emit_insn (gen_adddi3 (out, out, const2_rtx));
- else
- emit_insn (gen_addsi3 (out, out, const2_rtx));
-
- emit_label (end_2_label);
-
- }
-
- /* Avoid branch in fixing the byte. */
- tmpreg = gen_lowpart (QImode, tmpreg);
- emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
- cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
- if (TARGET_64BIT)
- emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
- else
- emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
-
- emit_label (end_0_label);
-}
-
-void
-ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
- rtx callarg2 ATTRIBUTE_UNUSED,
- rtx pop, int sibcall)
-{
- rtx use = NULL, call;
-
- if (pop == const0_rtx)
- pop = NULL;
- gcc_assert (!TARGET_64BIT || !pop);
-
- if (TARGET_MACHO && !TARGET_64BIT)
- {
-#if TARGET_MACHO
- if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
- fnaddr = machopic_indirect_call_target (fnaddr);
-#endif
- }
- else
- {
- /* Static functions and indirect calls don't need the pic register. */
- if (! TARGET_64BIT && flag_pic
- && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
- && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
- use_reg (&use, pic_offset_table_rtx);
- }
-
- if (TARGET_64BIT && INTVAL (callarg2) >= 0)
- {
- rtx al = gen_rtx_REG (QImode, 0);
- emit_move_insn (al, callarg2);
- use_reg (&use, al);
- }
-
- if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
- {
- fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
- fnaddr = gen_rtx_MEM (QImode, fnaddr);
- }
- if (sibcall && TARGET_64BIT
- && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
- {
- rtx addr;
- addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
- fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
- emit_move_insn (fnaddr, addr);
- fnaddr = gen_rtx_MEM (QImode, fnaddr);
- }
-
- call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
- if (retval)
- call = gen_rtx_SET (VOIDmode, retval, call);
- if (pop)
- {
- pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
- pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
- call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
- }
-
- call = emit_call_insn (call);
- if (use)
- CALL_INSN_FUNCTION_USAGE (call) = use;
-}
-
-
-/* Clear stack slot assignments remembered from previous functions.
- This is called from INIT_EXPANDERS once before RTL is emitted for each
- function. */
-
-static struct machine_function *
-ix86_init_machine_status (void)
-{
- struct machine_function *f;
-
- f = ggc_alloc_cleared (sizeof (struct machine_function));
- f->use_fast_prologue_epilogue_nregs = -1;
- f->tls_descriptor_call_expanded_p = 0;
-
- return f;
-}
-
-/* Return a MEM corresponding to a stack slot with mode MODE.
- Allocate a new slot if necessary.
-
- The RTL for a function can have several slots available: N is
- which slot to use. */
-
-rtx
-assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
-{
- struct stack_local_entry *s;
-
- gcc_assert (n < MAX_386_STACK_LOCALS);
-
- /* Virtual slot is valid only before vregs are instantiated. */
- gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
-
- for (s = ix86_stack_locals; s; s = s->next)
- if (s->mode == mode && s->n == n)
- return s->rtl;
-
- s = (struct stack_local_entry *)
- ggc_alloc (sizeof (struct stack_local_entry));
- s->n = n;
- s->mode = mode;
- s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
-
- s->next = ix86_stack_locals;
- ix86_stack_locals = s;
- return s->rtl;
-}
-
-/* Construct the SYMBOL_REF for the tls_get_addr function. */
-
-static GTY(()) rtx ix86_tls_symbol;
-rtx
-ix86_tls_get_addr (void)
-{
-
- if (!ix86_tls_symbol)
- {
- ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
- (TARGET_ANY_GNU_TLS
- && !TARGET_64BIT)
- ? "___tls_get_addr"
- : "__tls_get_addr");
- }
-
- return ix86_tls_symbol;
-}
-
-/* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
-
-static GTY(()) rtx ix86_tls_module_base_symbol;
-rtx
-ix86_tls_module_base (void)
-{
-
- if (!ix86_tls_module_base_symbol)
- {
- ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
- "_TLS_MODULE_BASE_");
- SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
- |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
- }
-
- return ix86_tls_module_base_symbol;
-}
-
-/* Calculate the length of the memory address in the instruction
- encoding. Does not include the one-byte modrm, opcode, or prefix. */
-
-int
-memory_address_length (rtx addr)
-{
- struct ix86_address parts;
- rtx base, index, disp;
- int len;
- int ok;
-
- if (GET_CODE (addr) == PRE_DEC
- || GET_CODE (addr) == POST_INC
- || GET_CODE (addr) == PRE_MODIFY
- || GET_CODE (addr) == POST_MODIFY)
- return 0;
-
- ok = ix86_decompose_address (addr, &parts);
- gcc_assert (ok);
-
- if (parts.base && GET_CODE (parts.base) == SUBREG)
- parts.base = SUBREG_REG (parts.base);
- if (parts.index && GET_CODE (parts.index) == SUBREG)
- parts.index = SUBREG_REG (parts.index);
-
- base = parts.base;
- index = parts.index;
- disp = parts.disp;
- len = 0;
-
- /* Rule of thumb:
- - esp as the base always wants an index,
- - ebp as the base always wants a displacement. */
-
- /* Register Indirect. */
- if (base && !index && !disp)
- {
- /* esp (for its index) and ebp (for its displacement) need
- the two-byte modrm form. */
- if (addr == stack_pointer_rtx
- || addr == arg_pointer_rtx
- || addr == frame_pointer_rtx
- || addr == hard_frame_pointer_rtx)
- len = 1;
- }
-
- /* Direct Addressing. */
- else if (disp && !base && !index)
- len = 4;
-
- else
- {
- /* Find the length of the displacement constant. */
- if (disp)
- {
- if (base && satisfies_constraint_K (disp))
- len = 1;
- else
- len = 4;
- }
- /* ebp always wants a displacement. */
- else if (base == hard_frame_pointer_rtx)
- len = 1;
-
- /* An index requires the two-byte modrm form.... */
- if (index
- /* ...like esp, which always wants an index. */
- || base == stack_pointer_rtx
- || base == arg_pointer_rtx
- || base == frame_pointer_rtx)
- len += 1;
- }
-
- return len;
-}
-
-/* Compute default value for "length_immediate" attribute. When SHORTFORM
- is set, expect that insn have 8bit immediate alternative. */
-int
-ix86_attr_length_immediate_default (rtx insn, int shortform)
-{
- int len = 0;
- int i;
- extract_insn_cached (insn);
- for (i = recog_data.n_operands - 1; i >= 0; --i)
- if (CONSTANT_P (recog_data.operand[i]))
- {
- gcc_assert (!len);
- if (shortform && satisfies_constraint_K (recog_data.operand[i]))
- len = 1;
- else
- {
- switch (get_attr_mode (insn))
- {
- case MODE_QI:
- len+=1;
- break;
- case MODE_HI:
- len+=2;
- break;
- case MODE_SI:
- len+=4;
- break;
- /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
- case MODE_DI:
- len+=4;
- break;
- default:
- fatal_insn ("unknown insn mode", insn);
- }
- }
- }
- return len;
-}
-/* Compute default value for "length_address" attribute. */
-int
-ix86_attr_length_address_default (rtx insn)
-{
- int i;
-
- if (get_attr_type (insn) == TYPE_LEA)
- {
- rtx set = PATTERN (insn);
-
- if (GET_CODE (set) == PARALLEL)
- set = XVECEXP (set, 0, 0);
-
- gcc_assert (GET_CODE (set) == SET);
-
- return memory_address_length (SET_SRC (set));
- }
-
- extract_insn_cached (insn);
- for (i = recog_data.n_operands - 1; i >= 0; --i)
- if (GET_CODE (recog_data.operand[i]) == MEM)
- {
- return memory_address_length (XEXP (recog_data.operand[i], 0));
- break;
- }
- return 0;
-}
-
-/* Return the maximum number of instructions a cpu can issue. */
-
-static int
-ix86_issue_rate (void)
-{
- switch (ix86_tune)
- {
- case PROCESSOR_PENTIUM:
- case PROCESSOR_K6:
- return 2;
-
- case PROCESSOR_PENTIUMPRO:
- case PROCESSOR_PENTIUM4:
- case PROCESSOR_ATHLON:
- case PROCESSOR_K8:
- case PROCESSOR_NOCONA:
- case PROCESSOR_GENERIC32:
- case PROCESSOR_GENERIC64:
- return 3;
- /* APPLE LOCAL begin mainline */
- case PROCESSOR_CORE2:
- return 4;
- /* APPLE LOCAL end mainline */
-
- default:
- return 1;
- }
-}
-
-/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
- by DEP_INSN and nothing set by DEP_INSN. */
-
-static int
-ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
-{
- rtx set, set2;
-
- /* Simplify the test for uninteresting insns. */
- if (insn_type != TYPE_SETCC
- && insn_type != TYPE_ICMOV
- && insn_type != TYPE_FCMOV
- && insn_type != TYPE_IBR)
- return 0;
-
- if ((set = single_set (dep_insn)) != 0)
- {
- set = SET_DEST (set);
- set2 = NULL_RTX;
- }
- else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
- && XVECLEN (PATTERN (dep_insn), 0) == 2
- && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
- && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
- {
- set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
- set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
- }
- else
- return 0;
-
- if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
- return 0;
-
- /* This test is true if the dependent insn reads the flags but
- not any other potentially set register. */
- if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
- return 0;
-
- if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
- return 0;
-
- return 1;
-}
-
-/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
- address with operands set by DEP_INSN. */
-
-static int
-ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
-{
- rtx addr;
-
- if (insn_type == TYPE_LEA
- && TARGET_PENTIUM)
- {
- addr = PATTERN (insn);
-
- if (GET_CODE (addr) == PARALLEL)
- addr = XVECEXP (addr, 0, 0);
-
- gcc_assert (GET_CODE (addr) == SET);
-
- addr = SET_SRC (addr);
- }
- else
- {
- int i;
- extract_insn_cached (insn);
- for (i = recog_data.n_operands - 1; i >= 0; --i)
- if (GET_CODE (recog_data.operand[i]) == MEM)
- {
- addr = XEXP (recog_data.operand[i], 0);
- goto found;
- }
- return 0;
- found:;
- }
-
- return modified_in_p (addr, dep_insn);
-}
-
-static int
-ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
-{
- enum attr_type insn_type, dep_insn_type;
- enum attr_memory memory;
- rtx set, set2;
- int dep_insn_code_number;
-
- /* Anti and output dependencies have zero cost on all CPUs. */
- if (REG_NOTE_KIND (link) != 0)
- return 0;
-
- dep_insn_code_number = recog_memoized (dep_insn);
-
- /* If we can't recognize the insns, we can't really do anything. */
- if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
- return cost;
-
- insn_type = get_attr_type (insn);
- dep_insn_type = get_attr_type (dep_insn);
-
- switch (ix86_tune)
- {
- case PROCESSOR_PENTIUM:
- /* Address Generation Interlock adds a cycle of latency. */
- if (ix86_agi_dependent (insn, dep_insn, insn_type))
- cost += 1;
-
- /* ??? Compares pair with jump/setcc. */
- if (ix86_flags_dependent (insn, dep_insn, insn_type))
- cost = 0;
-
- /* Floating point stores require value to be ready one cycle earlier. */
- if (insn_type == TYPE_FMOV
- && get_attr_memory (insn) == MEMORY_STORE
- && !ix86_agi_dependent (insn, dep_insn, insn_type))
- cost += 1;
- break;
-
- case PROCESSOR_PENTIUMPRO:
- memory = get_attr_memory (insn);
-
- /* INT->FP conversion is expensive. */
- if (get_attr_fp_int_src (dep_insn))
- cost += 5;
-
- /* There is one cycle extra latency between an FP op and a store. */
- if (insn_type == TYPE_FMOV
- && (set = single_set (dep_insn)) != NULL_RTX
- && (set2 = single_set (insn)) != NULL_RTX
- && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
- && GET_CODE (SET_DEST (set2)) == MEM)
- cost += 1;
-
- /* Show ability of reorder buffer to hide latency of load by executing
- in parallel with previous instruction in case
- previous instruction is not needed to compute the address. */
- if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
- && !ix86_agi_dependent (insn, dep_insn, insn_type))
- {
- /* Claim moves to take one cycle, as core can issue one load
- at time and the next load can start cycle later. */
- if (dep_insn_type == TYPE_IMOV
- || dep_insn_type == TYPE_FMOV)
- cost = 1;
- else if (cost > 1)
- cost--;
- }
- break;
-
- case PROCESSOR_K6:
- memory = get_attr_memory (insn);
-
- /* The esp dependency is resolved before the instruction is really
- finished. */
- if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
- && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
- return 1;
-
- /* INT->FP conversion is expensive. */
- if (get_attr_fp_int_src (dep_insn))
- cost += 5;
-
- /* Show ability of reorder buffer to hide latency of load by executing
- in parallel with previous instruction in case
- previous instruction is not needed to compute the address. */
- if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
- && !ix86_agi_dependent (insn, dep_insn, insn_type))
- {
- /* Claim moves to take one cycle, as core can issue one load
- at time and the next load can start cycle later. */
- if (dep_insn_type == TYPE_IMOV
- || dep_insn_type == TYPE_FMOV)
- cost = 1;
- else if (cost > 2)
- cost -= 2;
- else
- cost = 1;
- }
- break;
-
- case PROCESSOR_ATHLON:
- case PROCESSOR_K8:
- case PROCESSOR_GENERIC32:
- case PROCESSOR_GENERIC64:
- memory = get_attr_memory (insn);
-
- /* Show ability of reorder buffer to hide latency of load by executing
- in parallel with previous instruction in case
- previous instruction is not needed to compute the address. */
- if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
- && !ix86_agi_dependent (insn, dep_insn, insn_type))
- {
- enum attr_unit unit = get_attr_unit (insn);
- int loadcost = 3;
-
- /* Because of the difference between the length of integer and
- floating unit pipeline preparation stages, the memory operands
- for floating point are cheaper.
-
- ??? For Athlon it the difference is most probably 2. */
- if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
- loadcost = 3;
- else
- loadcost = TARGET_ATHLON ? 2 : 0;
-
- if (cost >= loadcost)
- cost -= loadcost;
- else
- cost = 0;
- }
-
- default:
- break;
- }
-
- return cost;
-}
-
-/* How many alternative schedules to try. This should be as wide as the
- scheduling freedom in the DFA, but no wider. Making this value too
- large results extra work for the scheduler. */
-
-static int
-ia32_multipass_dfa_lookahead (void)
-{
- if (ix86_tune == PROCESSOR_PENTIUM)
- return 2;
-
- if (ix86_tune == PROCESSOR_PENTIUMPRO
- || ix86_tune == PROCESSOR_K6)
- return 1;
-
- else
- return 0;
-}
-
-
-/* Compute the alignment given to a constant that is being placed in memory.
- EXP is the constant and ALIGN is the alignment that the object would
- ordinarily have.
- The value of this function is used instead of that alignment to align
- the object. */
-
-int
-ix86_constant_alignment (tree exp, int align)
-{
- if (TREE_CODE (exp) == REAL_CST)
- {
- if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
- return 64;
- else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
- return 128;
- }
- else if (!optimize_size && TREE_CODE (exp) == STRING_CST
- && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
- return BITS_PER_WORD;
-
-/* APPLE LOCAL begin 4090661 */
-#if TARGET_MACHO
- /* Without this, static arrays initialized to strings get aligned
- to 32 bytes. These go in cstring, so would result in a lot of extra
- padding in files with a couple of small strings. 4090661. */
- else if (TREE_CODE (exp) == STRING_CST)
- {
- if (TREE_STRING_LENGTH (exp) >= 31 && !optimize_size)
- return BITS_PER_WORD;
- else
- return 8;
- }
-#endif
-/* APPLE LOCAL end 4090661 */
- return align;
-}
-
-/* Compute the alignment for a static variable.
- TYPE is the data type, and ALIGN is the alignment that
- the object would ordinarily have. The value of this function is used
- instead of that alignment to align the object. */
-
-int
-ix86_data_alignment (tree type, int align)
-{
- int max_align = optimize_size ? BITS_PER_WORD : 256;
-
- if (AGGREGATE_TYPE_P (type)
- && TYPE_SIZE (type)
- && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
- && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
- || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
- && align < max_align)
- align = max_align;
-
- /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
- to 16byte boundary. */
- if (TARGET_64BIT)
- {
- if (AGGREGATE_TYPE_P (type)
- && TYPE_SIZE (type)
- && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
- && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
- || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
- return 128;
- }
-
- if (TREE_CODE (type) == ARRAY_TYPE)
- {
- if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
- return 64;
- if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
- return 128;
- }
- else if (TREE_CODE (type) == COMPLEX_TYPE)
- {
-
- if (TYPE_MODE (type) == DCmode && align < 64)
- return 64;
- if (TYPE_MODE (type) == XCmode && align < 128)
- return 128;
- }
- else if ((TREE_CODE (type) == RECORD_TYPE
- || TREE_CODE (type) == UNION_TYPE
- || TREE_CODE (type) == QUAL_UNION_TYPE)
- && TYPE_FIELDS (type))
- {
- if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
- return 64;
- if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
- return 128;
- }
- else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
- || TREE_CODE (type) == INTEGER_TYPE)
- {
- if (TYPE_MODE (type) == DFmode && align < 64)
- return 64;
- if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
- return 128;
- }
-
- return align;
-}
-
-/* Compute the alignment for a local variable.
- TYPE is the data type, and ALIGN is the alignment that
- the object would ordinarily have. The value of this macro is used
- instead of that alignment to align the object. */
-
-int
-ix86_local_alignment (tree type, int align)
-{
- /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
- to 16byte boundary. */
- if (TARGET_64BIT)
- {
- if (AGGREGATE_TYPE_P (type)
- && TYPE_SIZE (type)
- && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
- && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
- || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
- return 128;
- }
- if (TREE_CODE (type) == ARRAY_TYPE)
- {
- if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
- return 64;
- if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
- return 128;
- }
- else if (TREE_CODE (type) == COMPLEX_TYPE)
- {
- if (TYPE_MODE (type) == DCmode && align < 64)
- return 64;
- if (TYPE_MODE (type) == XCmode && align < 128)
- return 128;
- }
- else if ((TREE_CODE (type) == RECORD_TYPE
- || TREE_CODE (type) == UNION_TYPE
- || TREE_CODE (type) == QUAL_UNION_TYPE)
- && TYPE_FIELDS (type))
- {
- if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
- return 64;
- if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
- return 128;
- }
- else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
- || TREE_CODE (type) == INTEGER_TYPE)
- {
-
- if (TYPE_MODE (type) == DFmode && align < 64)
- return 64;
- if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
- return 128;
- }
- return align;
-}
-
-/* Emit RTL insns to initialize the variable parts of a trampoline.
- FNADDR is an RTX for the address of the function's pure code.
- CXT is an RTX for the static chain value for the function. */
-void
-x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
-{
- if (!TARGET_64BIT)
- {
- /* Compute offset from the end of the jmp to the target function. */
- rtx disp = expand_binop (SImode, sub_optab, fnaddr,
- plus_constant (tramp, 10),
- NULL_RTX, 1, OPTAB_DIRECT);
- emit_move_insn (gen_rtx_MEM (QImode, tramp),
- gen_int_mode (0xb9, QImode));
- emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
- emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
- gen_int_mode (0xe9, QImode));
- emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
- }
- else
- {
- int offset = 0;
- /* Try to load address using shorter movl instead of movabs.
- We may want to support movq for kernel mode, but kernel does not use
- trampolines at the moment. */
- if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
- {
- fnaddr = copy_to_mode_reg (DImode, fnaddr);
- emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
- gen_int_mode (0xbb41, HImode));
- emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
- gen_lowpart (SImode, fnaddr));
- offset += 6;
- }
- else
- {
- emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
- gen_int_mode (0xbb49, HImode));
- emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
- fnaddr);
- offset += 10;
- }
- /* Load static chain using movabs to r10. */
- emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
- gen_int_mode (0xba49, HImode));
- emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
- cxt);
- offset += 10;
- /* Jump to the r11 */
- emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
- gen_int_mode (0xff49, HImode));
- emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
- gen_int_mode (0xe3, QImode));
- offset += 3;
- gcc_assert (offset <= TRAMPOLINE_SIZE);
- }
-
-#ifdef ENABLE_EXECUTE_STACK
- emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
- LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
-#endif
-}
-
-/* Codes for all the SSE/MMX builtins. */
-enum ix86_builtins
-{
- IX86_BUILTIN_ADDPS,
- IX86_BUILTIN_ADDSS,
- IX86_BUILTIN_DIVPS,
- IX86_BUILTIN_DIVSS,
- IX86_BUILTIN_MULPS,
- IX86_BUILTIN_MULSS,
- IX86_BUILTIN_SUBPS,
- IX86_BUILTIN_SUBSS,
-
- IX86_BUILTIN_CMPEQPS,
- IX86_BUILTIN_CMPLTPS,
- IX86_BUILTIN_CMPLEPS,
- IX86_BUILTIN_CMPGTPS,
- IX86_BUILTIN_CMPGEPS,
- IX86_BUILTIN_CMPNEQPS,
- IX86_BUILTIN_CMPNLTPS,
- IX86_BUILTIN_CMPNLEPS,
- IX86_BUILTIN_CMPNGTPS,
- IX86_BUILTIN_CMPNGEPS,
- IX86_BUILTIN_CMPORDPS,
- IX86_BUILTIN_CMPUNORDPS,
- IX86_BUILTIN_CMPEQSS,
- IX86_BUILTIN_CMPLTSS,
- IX86_BUILTIN_CMPLESS,
- IX86_BUILTIN_CMPNEQSS,
- IX86_BUILTIN_CMPNLTSS,
- IX86_BUILTIN_CMPNLESS,
- IX86_BUILTIN_CMPNGTSS,
- IX86_BUILTIN_CMPNGESS,
- IX86_BUILTIN_CMPORDSS,
- IX86_BUILTIN_CMPUNORDSS,
-
- IX86_BUILTIN_COMIEQSS,
- IX86_BUILTIN_COMILTSS,
- IX86_BUILTIN_COMILESS,
- IX86_BUILTIN_COMIGTSS,
- IX86_BUILTIN_COMIGESS,
- IX86_BUILTIN_COMINEQSS,
- IX86_BUILTIN_UCOMIEQSS,
- IX86_BUILTIN_UCOMILTSS,
- IX86_BUILTIN_UCOMILESS,
- IX86_BUILTIN_UCOMIGTSS,
- IX86_BUILTIN_UCOMIGESS,
- IX86_BUILTIN_UCOMINEQSS,
-
- IX86_BUILTIN_CVTPI2PS,
- IX86_BUILTIN_CVTPS2PI,
- IX86_BUILTIN_CVTSI2SS,
- IX86_BUILTIN_CVTSI642SS,
- IX86_BUILTIN_CVTSS2SI,
- IX86_BUILTIN_CVTSS2SI64,
- IX86_BUILTIN_CVTTPS2PI,
- IX86_BUILTIN_CVTTSS2SI,
- IX86_BUILTIN_CVTTSS2SI64,
-
- IX86_BUILTIN_MAXPS,
- IX86_BUILTIN_MAXSS,
- IX86_BUILTIN_MINPS,
- IX86_BUILTIN_MINSS,
-
- IX86_BUILTIN_LOADUPS,
- IX86_BUILTIN_STOREUPS,
- IX86_BUILTIN_MOVSS,
-
- IX86_BUILTIN_MOVHLPS,
- IX86_BUILTIN_MOVLHPS,
- IX86_BUILTIN_LOADHPS,
- IX86_BUILTIN_LOADLPS,
- IX86_BUILTIN_STOREHPS,
- IX86_BUILTIN_STORELPS,
-
- IX86_BUILTIN_MASKMOVQ,
- IX86_BUILTIN_MOVMSKPS,
- IX86_BUILTIN_PMOVMSKB,
-
- IX86_BUILTIN_MOVNTPS,
- IX86_BUILTIN_MOVNTQ,
-
- IX86_BUILTIN_LOADDQU,
- IX86_BUILTIN_STOREDQU,
-
- IX86_BUILTIN_PACKSSWB,
- IX86_BUILTIN_PACKSSDW,
- IX86_BUILTIN_PACKUSWB,
-
- IX86_BUILTIN_PADDB,
- IX86_BUILTIN_PADDW,
- IX86_BUILTIN_PADDD,
- IX86_BUILTIN_PADDQ,
- IX86_BUILTIN_PADDSB,
- IX86_BUILTIN_PADDSW,
- IX86_BUILTIN_PADDUSB,
- IX86_BUILTIN_PADDUSW,
- IX86_BUILTIN_PSUBB,
- IX86_BUILTIN_PSUBW,
- IX86_BUILTIN_PSUBD,
- IX86_BUILTIN_PSUBQ,
- IX86_BUILTIN_PSUBSB,
- IX86_BUILTIN_PSUBSW,
- IX86_BUILTIN_PSUBUSB,
- IX86_BUILTIN_PSUBUSW,
-
- IX86_BUILTIN_PAND,
- IX86_BUILTIN_PANDN,
- IX86_BUILTIN_POR,
- IX86_BUILTIN_PXOR,
-
- IX86_BUILTIN_PAVGB,
- IX86_BUILTIN_PAVGW,
-
- IX86_BUILTIN_PCMPEQB,
- IX86_BUILTIN_PCMPEQW,
- IX86_BUILTIN_PCMPEQD,
- IX86_BUILTIN_PCMPGTB,
- IX86_BUILTIN_PCMPGTW,
- IX86_BUILTIN_PCMPGTD,
-
- IX86_BUILTIN_PMADDWD,
-
- IX86_BUILTIN_PMAXSW,
- IX86_BUILTIN_PMAXUB,
- IX86_BUILTIN_PMINSW,
- IX86_BUILTIN_PMINUB,
-
- IX86_BUILTIN_PMULHUW,
- IX86_BUILTIN_PMULHW,
- IX86_BUILTIN_PMULLW,
-
- IX86_BUILTIN_PSADBW,
- IX86_BUILTIN_PSHUFW,
-
- IX86_BUILTIN_PSLLW,
- IX86_BUILTIN_PSLLD,
- IX86_BUILTIN_PSLLQ,
- IX86_BUILTIN_PSRAW,
- IX86_BUILTIN_PSRAD,
- IX86_BUILTIN_PSRLW,
- IX86_BUILTIN_PSRLD,
- IX86_BUILTIN_PSRLQ,
- IX86_BUILTIN_PSLLWI,
- IX86_BUILTIN_PSLLDI,
- IX86_BUILTIN_PSLLQI,
- IX86_BUILTIN_PSRAWI,
- IX86_BUILTIN_PSRADI,
- IX86_BUILTIN_PSRLWI,
- IX86_BUILTIN_PSRLDI,
- IX86_BUILTIN_PSRLQI,
-
- IX86_BUILTIN_PUNPCKHBW,
- IX86_BUILTIN_PUNPCKHWD,
- IX86_BUILTIN_PUNPCKHDQ,
- IX86_BUILTIN_PUNPCKLBW,
- IX86_BUILTIN_PUNPCKLWD,
- IX86_BUILTIN_PUNPCKLDQ,
-
- IX86_BUILTIN_SHUFPS,
-
- IX86_BUILTIN_RCPPS,
- IX86_BUILTIN_RCPSS,
- IX86_BUILTIN_RSQRTPS,
- IX86_BUILTIN_RSQRTSS,
- IX86_BUILTIN_SQRTPS,
- IX86_BUILTIN_SQRTSS,
-
- IX86_BUILTIN_UNPCKHPS,
- IX86_BUILTIN_UNPCKLPS,
-
- IX86_BUILTIN_ANDPS,
- IX86_BUILTIN_ANDNPS,
- IX86_BUILTIN_ORPS,
- IX86_BUILTIN_XORPS,
-
- IX86_BUILTIN_EMMS,
- IX86_BUILTIN_LDMXCSR,
- IX86_BUILTIN_STMXCSR,
- IX86_BUILTIN_SFENCE,
-
- /* 3DNow! Original */
- IX86_BUILTIN_FEMMS,
- IX86_BUILTIN_PAVGUSB,
- IX86_BUILTIN_PF2ID,
- IX86_BUILTIN_PFACC,
- IX86_BUILTIN_PFADD,
- IX86_BUILTIN_PFCMPEQ,
- IX86_BUILTIN_PFCMPGE,
- IX86_BUILTIN_PFCMPGT,
- IX86_BUILTIN_PFMAX,
- IX86_BUILTIN_PFMIN,
- IX86_BUILTIN_PFMUL,
- IX86_BUILTIN_PFRCP,
- IX86_BUILTIN_PFRCPIT1,
- IX86_BUILTIN_PFRCPIT2,
- IX86_BUILTIN_PFRSQIT1,
- IX86_BUILTIN_PFRSQRT,
- IX86_BUILTIN_PFSUB,
- IX86_BUILTIN_PFSUBR,
- IX86_BUILTIN_PI2FD,
- IX86_BUILTIN_PMULHRW,
-
- /* 3DNow! Athlon Extensions */
- IX86_BUILTIN_PF2IW,
- IX86_BUILTIN_PFNACC,
- IX86_BUILTIN_PFPNACC,
- IX86_BUILTIN_PI2FW,
- IX86_BUILTIN_PSWAPDSI,
- IX86_BUILTIN_PSWAPDSF,
-
- /* SSE2 */
- IX86_BUILTIN_ADDPD,
- IX86_BUILTIN_ADDSD,
- IX86_BUILTIN_DIVPD,
- IX86_BUILTIN_DIVSD,
- IX86_BUILTIN_MULPD,
- IX86_BUILTIN_MULSD,
- IX86_BUILTIN_SUBPD,
- IX86_BUILTIN_SUBSD,
-
- IX86_BUILTIN_CMPEQPD,
- IX86_BUILTIN_CMPLTPD,
- IX86_BUILTIN_CMPLEPD,
- IX86_BUILTIN_CMPGTPD,
- IX86_BUILTIN_CMPGEPD,
- IX86_BUILTIN_CMPNEQPD,
- IX86_BUILTIN_CMPNLTPD,
- IX86_BUILTIN_CMPNLEPD,
- IX86_BUILTIN_CMPNGTPD,
- IX86_BUILTIN_CMPNGEPD,
- IX86_BUILTIN_CMPORDPD,
- IX86_BUILTIN_CMPUNORDPD,
- IX86_BUILTIN_CMPNEPD,
- IX86_BUILTIN_CMPEQSD,
- IX86_BUILTIN_CMPLTSD,
- IX86_BUILTIN_CMPLESD,
- IX86_BUILTIN_CMPNEQSD,
- IX86_BUILTIN_CMPNLTSD,
- IX86_BUILTIN_CMPNLESD,
- IX86_BUILTIN_CMPORDSD,
- IX86_BUILTIN_CMPUNORDSD,
- IX86_BUILTIN_CMPNESD,
-
- IX86_BUILTIN_COMIEQSD,
- IX86_BUILTIN_COMILTSD,
- IX86_BUILTIN_COMILESD,
- IX86_BUILTIN_COMIGTSD,
- IX86_BUILTIN_COMIGESD,
- IX86_BUILTIN_COMINEQSD,
- IX86_BUILTIN_UCOMIEQSD,
- IX86_BUILTIN_UCOMILTSD,
- IX86_BUILTIN_UCOMILESD,
- IX86_BUILTIN_UCOMIGTSD,
- IX86_BUILTIN_UCOMIGESD,
- IX86_BUILTIN_UCOMINEQSD,
-
- IX86_BUILTIN_MAXPD,
- IX86_BUILTIN_MAXSD,
- IX86_BUILTIN_MINPD,
- IX86_BUILTIN_MINSD,
-
- IX86_BUILTIN_ANDPD,
- IX86_BUILTIN_ANDNPD,
- IX86_BUILTIN_ORPD,
- IX86_BUILTIN_XORPD,
-
- IX86_BUILTIN_SQRTPD,
- IX86_BUILTIN_SQRTSD,
-
- IX86_BUILTIN_UNPCKHPD,
- IX86_BUILTIN_UNPCKLPD,
-
- IX86_BUILTIN_SHUFPD,
-
- IX86_BUILTIN_LOADUPD,
- IX86_BUILTIN_STOREUPD,
- IX86_BUILTIN_MOVSD,
-
- IX86_BUILTIN_LOADHPD,
- IX86_BUILTIN_LOADLPD,
-
- IX86_BUILTIN_CVTDQ2PD,
- IX86_BUILTIN_CVTDQ2PS,
-
- IX86_BUILTIN_CVTPD2DQ,
- IX86_BUILTIN_CVTPD2PI,
- IX86_BUILTIN_CVTPD2PS,
- IX86_BUILTIN_CVTTPD2DQ,
- IX86_BUILTIN_CVTTPD2PI,
-
- IX86_BUILTIN_CVTPI2PD,
- IX86_BUILTIN_CVTSI2SD,
- IX86_BUILTIN_CVTSI642SD,
-
- IX86_BUILTIN_CVTSD2SI,
- IX86_BUILTIN_CVTSD2SI64,
- IX86_BUILTIN_CVTSD2SS,
- IX86_BUILTIN_CVTSS2SD,
- IX86_BUILTIN_CVTTSD2SI,
- IX86_BUILTIN_CVTTSD2SI64,
-
- IX86_BUILTIN_CVTPS2DQ,
- IX86_BUILTIN_CVTPS2PD,
- IX86_BUILTIN_CVTTPS2DQ,
-
- IX86_BUILTIN_MOVNTI,
- IX86_BUILTIN_MOVNTPD,
- IX86_BUILTIN_MOVNTDQ,
-
- /* SSE2 MMX */
- IX86_BUILTIN_MASKMOVDQU,
- IX86_BUILTIN_MOVMSKPD,
- IX86_BUILTIN_PMOVMSKB128,
-
- /* APPLE LOCAL begin 4099020 */
- IX86_BUILTIN_MOVQ,
- IX86_BUILTIN_LOADQ,
- IX86_BUILTIN_STOREQ,
- /* APPLE LOCAL end 4099020 */
-
- IX86_BUILTIN_PACKSSWB128,
- IX86_BUILTIN_PACKSSDW128,
- IX86_BUILTIN_PACKUSWB128,
-
- IX86_BUILTIN_PADDB128,
- IX86_BUILTIN_PADDW128,
- IX86_BUILTIN_PADDD128,
- IX86_BUILTIN_PADDQ128,
- IX86_BUILTIN_PADDSB128,
- IX86_BUILTIN_PADDSW128,
- IX86_BUILTIN_PADDUSB128,
- IX86_BUILTIN_PADDUSW128,
- IX86_BUILTIN_PSUBB128,
- IX86_BUILTIN_PSUBW128,
- IX86_BUILTIN_PSUBD128,
- IX86_BUILTIN_PSUBQ128,
- IX86_BUILTIN_PSUBSB128,
- IX86_BUILTIN_PSUBSW128,
- IX86_BUILTIN_PSUBUSB128,
- IX86_BUILTIN_PSUBUSW128,
-
- IX86_BUILTIN_PAND128,
- IX86_BUILTIN_PANDN128,
- IX86_BUILTIN_POR128,
- IX86_BUILTIN_PXOR128,
-
- IX86_BUILTIN_PAVGB128,
- IX86_BUILTIN_PAVGW128,
-
- IX86_BUILTIN_PCMPEQB128,
- IX86_BUILTIN_PCMPEQW128,
- IX86_BUILTIN_PCMPEQD128,
- IX86_BUILTIN_PCMPGTB128,
- IX86_BUILTIN_PCMPGTW128,
- IX86_BUILTIN_PCMPGTD128,
-
- IX86_BUILTIN_PMADDWD128,
-
- IX86_BUILTIN_PMAXSW128,
- IX86_BUILTIN_PMAXUB128,
- IX86_BUILTIN_PMINSW128,
- IX86_BUILTIN_PMINUB128,
-
- IX86_BUILTIN_PMULUDQ,
- IX86_BUILTIN_PMULUDQ128,
- IX86_BUILTIN_PMULHUW128,
- IX86_BUILTIN_PMULHW128,
- IX86_BUILTIN_PMULLW128,
-
- IX86_BUILTIN_PSADBW128,
- IX86_BUILTIN_PSHUFHW,
- IX86_BUILTIN_PSHUFLW,
- IX86_BUILTIN_PSHUFD,
-
- IX86_BUILTIN_PSLLW128,
- IX86_BUILTIN_PSLLD128,
- IX86_BUILTIN_PSLLQ128,
- IX86_BUILTIN_PSRAW128,
- IX86_BUILTIN_PSRAD128,
- IX86_BUILTIN_PSRLW128,
- IX86_BUILTIN_PSRLD128,
- IX86_BUILTIN_PSRLQ128,
- IX86_BUILTIN_PSLLDQI128,
- /* APPLE LOCAL 591583 */
- IX86_BUILTIN_PSLLDQI128_BYTESHIFT,
- IX86_BUILTIN_PSLLWI128,
- IX86_BUILTIN_PSLLDI128,
- IX86_BUILTIN_PSLLQI128,
- IX86_BUILTIN_PSRAWI128,
- IX86_BUILTIN_PSRADI128,
- IX86_BUILTIN_PSRLDQI128,
- /* APPLE LOCAL 591583 */
- IX86_BUILTIN_PSRLDQI128_BYTESHIFT,
- IX86_BUILTIN_PSRLWI128,
- IX86_BUILTIN_PSRLDI128,
- IX86_BUILTIN_PSRLQI128,
-
- IX86_BUILTIN_PUNPCKHBW128,
- IX86_BUILTIN_PUNPCKHWD128,
- IX86_BUILTIN_PUNPCKHDQ128,
- IX86_BUILTIN_PUNPCKHQDQ128,
- IX86_BUILTIN_PUNPCKLBW128,
- IX86_BUILTIN_PUNPCKLWD128,
- IX86_BUILTIN_PUNPCKLDQ128,
- IX86_BUILTIN_PUNPCKLQDQ128,
-
- IX86_BUILTIN_CLFLUSH,
- IX86_BUILTIN_MFENCE,
- IX86_BUILTIN_LFENCE,
-
- /* Prescott New Instructions. */
- IX86_BUILTIN_ADDSUBPS,
- IX86_BUILTIN_HADDPS,
- IX86_BUILTIN_HSUBPS,
- IX86_BUILTIN_MOVSHDUP,
- IX86_BUILTIN_MOVSLDUP,
- IX86_BUILTIN_ADDSUBPD,
- IX86_BUILTIN_HADDPD,
- IX86_BUILTIN_HSUBPD,
- IX86_BUILTIN_LDDQU,
-
- IX86_BUILTIN_MONITOR,
- IX86_BUILTIN_MWAIT,
- /* APPLE LOCAL begin mainline */
- /* Merom New Instructions. */
- IX86_BUILTIN_PHADDW,
- IX86_BUILTIN_PHADDD,
- IX86_BUILTIN_PHADDSW,
- IX86_BUILTIN_PHSUBW,
- IX86_BUILTIN_PHSUBD,
- IX86_BUILTIN_PHSUBSW,
- IX86_BUILTIN_PMADDUBSW,
- IX86_BUILTIN_PMULHRSW,
- IX86_BUILTIN_PSHUFB,
- IX86_BUILTIN_PSIGNB,
- IX86_BUILTIN_PSIGNW,
- IX86_BUILTIN_PSIGND,
- IX86_BUILTIN_PALIGNR,
- IX86_BUILTIN_PABSB,
- IX86_BUILTIN_PABSW,
- IX86_BUILTIN_PABSD,
-
- IX86_BUILTIN_PHADDW128,
- IX86_BUILTIN_PHADDD128,
- IX86_BUILTIN_PHADDSW128,
- IX86_BUILTIN_PHSUBW128,
- IX86_BUILTIN_PHSUBD128,
- IX86_BUILTIN_PHSUBSW128,
- IX86_BUILTIN_PMADDUBSW128,
- IX86_BUILTIN_PMULHRSW128,
- IX86_BUILTIN_PSHUFB128,
- IX86_BUILTIN_PSIGNB128,
- IX86_BUILTIN_PSIGNW128,
- IX86_BUILTIN_PSIGND128,
- IX86_BUILTIN_PALIGNR128,
- IX86_BUILTIN_PABSB128,
- IX86_BUILTIN_PABSW128,
- IX86_BUILTIN_PABSD128,
- /* APPLE LOCAL begin 5612787 mainline sse4 */
- /* AMDFAM10 - SSE4A New Instructions. */
- IX86_BUILTIN_MOVNTSD,
- IX86_BUILTIN_MOVNTSS,
- IX86_BUILTIN_EXTRQI,
- IX86_BUILTIN_EXTRQ,
- IX86_BUILTIN_INSERTQI,
- IX86_BUILTIN_INSERTQ,
-
- /* SSE4.1. */
- IX86_BUILTIN_BLENDPD,
- IX86_BUILTIN_BLENDPS,
- IX86_BUILTIN_BLENDVPD,
- IX86_BUILTIN_BLENDVPS,
- IX86_BUILTIN_PBLENDVB128,
- IX86_BUILTIN_PBLENDW128,
-
- IX86_BUILTIN_DPPD,
- IX86_BUILTIN_DPPS,
-
- IX86_BUILTIN_INSERTPS128,
-
- IX86_BUILTIN_MOVNTDQA,
- IX86_BUILTIN_MPSADBW128,
- IX86_BUILTIN_PACKUSDW128,
- IX86_BUILTIN_PCMPEQQ,
- IX86_BUILTIN_PHMINPOSUW128,
-
- IX86_BUILTIN_PMAXSB128,
- IX86_BUILTIN_PMAXSD128,
- IX86_BUILTIN_PMAXUD128,
- IX86_BUILTIN_PMAXUW128,
-
- IX86_BUILTIN_PMINSB128,
- IX86_BUILTIN_PMINSD128,
- IX86_BUILTIN_PMINUD128,
- IX86_BUILTIN_PMINUW128,
-
- IX86_BUILTIN_PMOVSXBW128,
- IX86_BUILTIN_PMOVSXBD128,
- IX86_BUILTIN_PMOVSXBQ128,
- IX86_BUILTIN_PMOVSXWD128,
- IX86_BUILTIN_PMOVSXWQ128,
- IX86_BUILTIN_PMOVSXDQ128,
-
- IX86_BUILTIN_PMOVZXBW128,
- IX86_BUILTIN_PMOVZXBD128,
- IX86_BUILTIN_PMOVZXBQ128,
- IX86_BUILTIN_PMOVZXWD128,
- IX86_BUILTIN_PMOVZXWQ128,
- IX86_BUILTIN_PMOVZXDQ128,
-
- IX86_BUILTIN_PMULDQ128,
- IX86_BUILTIN_PMULLD128,
-
- IX86_BUILTIN_ROUNDPD,
- IX86_BUILTIN_ROUNDPS,
- IX86_BUILTIN_ROUNDSD,
- IX86_BUILTIN_ROUNDSS,
-
- IX86_BUILTIN_PTESTZ,
- IX86_BUILTIN_PTESTC,
- IX86_BUILTIN_PTESTNZC,
- /* APPLE LOCAL end 5612787 mainline sse4 */
- /* APPLE LOCAL end mainline */
- IX86_BUILTIN_VEC_INIT_V2SI,
- IX86_BUILTIN_VEC_INIT_V4HI,
- IX86_BUILTIN_VEC_INIT_V8QI,
- IX86_BUILTIN_VEC_EXT_V2DF,
- IX86_BUILTIN_VEC_EXT_V2DI,
- IX86_BUILTIN_VEC_EXT_V4SF,
- IX86_BUILTIN_VEC_EXT_V4SI,
- IX86_BUILTIN_VEC_EXT_V8HI,
- /* APPLE LOCAL begin 5612787 mainline sse4 */
- /* deletion */
- /* APPLE LOCAL end 5612787 mainline sse4 */
- IX86_BUILTIN_VEC_EXT_V2SI,
- IX86_BUILTIN_VEC_EXT_V4HI,
- /* APPLE LOCAL begin 5612787 mainline sse4 */
- IX86_BUILTIN_VEC_EXT_V16QI,
- IX86_BUILTIN_VEC_SET_V2DI,
- IX86_BUILTIN_VEC_SET_V4SF,
- IX86_BUILTIN_VEC_SET_V4SI,
- /* APPLE LOCAL end 5612787 mainline sse4 */
- IX86_BUILTIN_VEC_SET_V8HI,
- IX86_BUILTIN_VEC_SET_V4HI,
- /* APPLE LOCAL begin 5612787 mainline sse4 */
- IX86_BUILTIN_VEC_SET_V16QI,
-
- IX86_BUILTIN_VEC_PACK_SFIX,
-
- /* SSE4.2. */
- IX86_BUILTIN_CRC32QI,
- IX86_BUILTIN_CRC32HI,
- IX86_BUILTIN_CRC32SI,
- IX86_BUILTIN_CRC32DI,
-
- IX86_BUILTIN_PCMPESTRI128,
- IX86_BUILTIN_PCMPESTRM128,
- IX86_BUILTIN_PCMPESTRA128,
- IX86_BUILTIN_PCMPESTRC128,
- IX86_BUILTIN_PCMPESTRO128,
- IX86_BUILTIN_PCMPESTRS128,
- IX86_BUILTIN_PCMPESTRZ128,
- IX86_BUILTIN_PCMPISTRI128,
- IX86_BUILTIN_PCMPISTRM128,
- IX86_BUILTIN_PCMPISTRA128,
- IX86_BUILTIN_PCMPISTRC128,
- IX86_BUILTIN_PCMPISTRO128,
- IX86_BUILTIN_PCMPISTRS128,
- IX86_BUILTIN_PCMPISTRZ128,
-
- IX86_BUILTIN_PCMPGTQ,
-
- /* TFmode support builtins. */
- IX86_BUILTIN_INFQ,
- IX86_BUILTIN_FABSQ,
- IX86_BUILTIN_COPYSIGNQ,
- /* APPLE LOCAL end 5612787 mainline sse4 */
-
- IX86_BUILTIN_MAX
-};
-
-#define def_builtin(MASK, NAME, TYPE, CODE) \
-do { \
- if ((MASK) & target_flags \
- && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
- lang_hooks.builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
- NULL, NULL_TREE); \
-} while (0)
-
-/* APPLE LOCAL begin 5612787 mainline sse4 */
-/* Like def_builtin, but also marks the function decl "const". */
-
-static inline tree
-def_builtin_const (int mask, const char *name, tree type,
- enum ix86_builtins code)
-{
- tree decl = NULL_TREE;
- if ((mask) & target_flags
- && (!((mask) & MASK_64BIT) || TARGET_64BIT))
- decl = lang_hooks.builtin_function (name, type, code, BUILT_IN_MD,
- NULL, NULL_TREE);
-
- if (decl)
- TREE_READONLY (decl) = 1;
- return decl;
-}
-/* APPLE LOCAL end 5612787 mainline sse4 */
-
-/* Bits for builtin_description.flag. */
-
-/* Set when we don't support the comparison natively, and should
- swap_comparison in order to support it. */
-#define BUILTIN_DESC_SWAP_OPERANDS 1
-
-struct builtin_description
-{
- const unsigned int mask;
- const enum insn_code icode;
- const char *const name;
- const enum ix86_builtins code;
- const enum rtx_code comparison;
- const unsigned int flag;
-};
-
-/* APPLE LOCAL begin 4299257 */
-static const struct builtin_description bdesc_comi[] =
-{
- { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
- { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
- { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
- { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
- { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
- { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
- { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
- { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
- { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
- { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
- { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
- { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
-};
-static const struct builtin_description bdesc_ucomi[] =
-{
- { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, EQ, 0 },
- { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, LT, 0 },
- { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, LE, 0 },
- { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
- { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
- { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
- { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, EQ, 0 },
- { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, LT, 0 },
- { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, LE, 0 },
- { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
- { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
- { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
-};
-/* APPLE LOCAL end 4299257 */
-
-/* APPLE LOCAL begin 5612787 mainline sse4 */
-static const struct builtin_description bdesc_ptest[] =
-{
- /* SSE4.1 */
- { MASK_SSE4_1, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, 0 },
- { MASK_SSE4_1, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, 0 },
- { MASK_SSE4_1, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, 0 },
-};
-
-static const struct builtin_description bdesc_pcmpestr[] =
-{
- /* SSE4.2 */
- { MASK_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
- { MASK_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
- { MASK_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
- { MASK_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
- { MASK_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
- { MASK_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
- { MASK_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
-};
-
-static const struct builtin_description bdesc_pcmpistr[] =
-{
- /* SSE4.2 */
- { MASK_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
- { MASK_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
- { MASK_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
- { MASK_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
- { MASK_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
- { MASK_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
- { MASK_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
-};
-
-static const struct builtin_description bdesc_crc32[] =
-{
- /* SSE4.2 */
- { MASK_SSE4_2 | MASK_64BIT, CODE_FOR_sse4_2_crc32qi, 0, IX86_BUILTIN_CRC32QI, UNKNOWN, 0 },
- { MASK_SSE4_2, CODE_FOR_sse4_2_crc32hi, 0, IX86_BUILTIN_CRC32HI, UNKNOWN, 0 },
- { MASK_SSE4_2, CODE_FOR_sse4_2_crc32si, 0, IX86_BUILTIN_CRC32SI, UNKNOWN, 0 },
- { MASK_SSE4_2, CODE_FOR_sse4_2_crc32di, 0, IX86_BUILTIN_CRC32DI, UNKNOWN, 0 },
-};
-
-/* SSE builtins with 3 arguments and the last argument must be an immediate or xmm0. */
-static const struct builtin_description bdesc_sse_3arg[] =
-{
- /* SSE4.1 */
- { MASK_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, 0 },
- { MASK_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, 0 },
- { MASK_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, 0 },
- { MASK_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, 0 },
- { MASK_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, 0 },
- { MASK_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, 0 },
- { MASK_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, 0 },
- { MASK_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, 0 },
- { MASK_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, 0 },
- { MASK_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, 0 },
- { MASK_SSE4_1, CODE_FOR_sse4_1_roundsd, 0, IX86_BUILTIN_ROUNDSD, UNKNOWN, 0 },
- { MASK_SSE4_1, CODE_FOR_sse4_1_roundss, 0, IX86_BUILTIN_ROUNDSS, UNKNOWN, 0 },
-};
-/* APPLE LOCAL end 5612787 mainline sse4 */
-
-static const struct builtin_description bdesc_2arg[] =
-{
- /* SSE */
- { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
- { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
- { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
- { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
- { MASK_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
- { MASK_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
- { MASK_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
- { MASK_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
-
- { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
- { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
- { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
- { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
- BUILTIN_DESC_SWAP_OPERANDS },
- { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
- BUILTIN_DESC_SWAP_OPERANDS },
- { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
- { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
- { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
- { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
- { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE,
- BUILTIN_DESC_SWAP_OPERANDS },
- { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT,
- BUILTIN_DESC_SWAP_OPERANDS },
- { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
- { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
- { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
- { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
- { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
- { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
- { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
- { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
- { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE,
- BUILTIN_DESC_SWAP_OPERANDS },
- { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT,
- BUILTIN_DESC_SWAP_OPERANDS },
- { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, 0 },
-
- { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
- { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
- { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
- { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
-
- { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
- { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
- { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
- { MASK_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
-
- { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
- { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
- { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
- { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
- { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
-
- /* MMX */
- { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
- { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
- { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
- /* APPLE LOCAL begin 4656532 use V1DImode for _m64 */
- { MASK_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
- { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
- { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
- { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
- { MASK_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
-
- { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
- { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
- { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
- { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
- { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
- { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
- { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
- { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
-
- { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
- { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
- { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
-
- { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
- { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
- { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
- { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
-
- { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
- { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
-
- { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
- { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
- { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
- { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
- { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
- { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
-
- { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
- { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
- { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
- { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
-
- { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
- { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
- { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
- { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
- { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
- { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
-
- /* Special. */
- { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
- { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
- { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
-
- { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
- { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
- { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
-
- { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
- { MASK_MMX, CODE_FOR_mmx_ashlv4hi2si, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
- { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
- { MASK_MMX, CODE_FOR_mmx_ashlv2si2si, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
- { MASK_MMX, CODE_FOR_mmx_ashlv1di3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
- { MASK_MMX, CODE_FOR_mmx_ashlv1di2si, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
-
- { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
- { MASK_MMX, CODE_FOR_mmx_lshrv4hi2si, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
- { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
- { MASK_MMX, CODE_FOR_mmx_lshrv2si2si, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
- { MASK_MMX, CODE_FOR_mmx_lshrv1di3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
- { MASK_MMX, CODE_FOR_mmx_lshrv1di2si, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
-
- { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
- { MASK_MMX, CODE_FOR_mmx_ashrv4hi2si, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
- { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
- { MASK_MMX, CODE_FOR_mmx_ashrv2si2si, 0, IX86_BUILTIN_PSRADI, 0, 0 },
- /* APPLE LOCAL end 4656532 use V1DImode for _m64 */
-
- { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
- { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
-
- /* SSE2 */
- { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
- { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
- { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
- { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
- { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
- { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
- { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
- { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
-
- { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
- { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
- { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
- { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
- BUILTIN_DESC_SWAP_OPERANDS },
- { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
- BUILTIN_DESC_SWAP_OPERANDS },
- { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
- { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
- { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
- { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
- { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE,
- BUILTIN_DESC_SWAP_OPERANDS },
- { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT,
- BUILTIN_DESC_SWAP_OPERANDS },
- { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
- { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
- { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
- { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
- { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
- { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
- { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
- { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
- { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
-
- { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
- { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
- { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
- { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
-
- { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
- { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
- { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
- { MASK_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
-
- { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
- { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
- { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
-
- /* SSE2 MMX */
- { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
- { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
- { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
- { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
- { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
- { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
- { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
- { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
-
- { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
- { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
- { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
- { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
- { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
- { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
- { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
- { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
-
- { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
- /* APPLE LOCAL 5612787 mainline sse4 */
- { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
-
- { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
- { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
- { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
- { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
-
- { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
- { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
-
- { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
- { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
- { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
- { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
- { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
- { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
-
- { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
- { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
- { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
- { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
-
- { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
- { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
- { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
- { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
- { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
- { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
- { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
- { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
-
- { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
- { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
- { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
-
- /* APPLE LOCAL 5612787 mainline sse4 */
- { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
- { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
-
- { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
- { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
-
- { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
- { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
- { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
-
- { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
- { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
- { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
-
- { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
- { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
-
- { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
-
- { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
- { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
- { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
- { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
-
- /* SSE3 MMX */
- { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
- { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
- { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
- { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
- { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
- /* APPLE LOCAL begin mainline */
- { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 },
-
- /* SSSE3 MMX */
- { MASK_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, 0, 0 },
- { MASK_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, 0, 0 },
- { MASK_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, 0, 0 },
- { MASK_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, 0, 0 },
- { MASK_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, 0, 0 },
- { MASK_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, 0, 0 },
- { MASK_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, 0, 0 },
- { MASK_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, 0, 0 },
- { MASK_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, 0, 0 },
- { MASK_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, 0, 0 },
- { MASK_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, 0, 0 },
- { MASK_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, 0, 0 },
- { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv8hi3, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, 0, 0 },
- { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv4hi3, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, 0, 0 },
- { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, 0, 0 },
- { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, 0, 0 },
- { MASK_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, 0, 0 },
- { MASK_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, 0, 0 },
- { MASK_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, 0, 0 },
- { MASK_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, 0, 0 },
- { MASK_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, 0, 0 },
- { MASK_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, 0, 0 },
- { MASK_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, 0, 0 },
- /* APPLE LOCAL 5612787 mainline sse4 */
- { MASK_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, 0, 0 },
- /* APPLE LOCAL end mainline */
- /* APPLE LOCAL begin 5612787 mainline sse4 */
- /* SSE4.1 */
- { MASK_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, 0 },
- { MASK_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, 0 },
- { MASK_SSE4_1, CODE_FOR_sse4_1_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, 0 },
- { MASK_SSE4_1, CODE_FOR_sse4_1_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, 0 },
- { MASK_SSE4_1, CODE_FOR_sse4_1_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, 0 },
- { MASK_SSE4_1, CODE_FOR_sse4_1_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, 0 },
- { MASK_SSE4_1, CODE_FOR_sse4_1_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, 0 },
- { MASK_SSE4_1, CODE_FOR_sse4_1_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, 0 },
- { MASK_SSE4_1, CODE_FOR_sse4_1_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, 0 },
- { MASK_SSE4_1, CODE_FOR_sse4_1_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, 0 },
- { MASK_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, 0, IX86_BUILTIN_PMULDQ128, UNKNOWN, 0 },
- { MASK_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, 0 },
-
- /* SSE4.2 */
- { MASK_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, 0 },
- /* APPLE LOCAL end 5612787 mainline sse4 */
-};
-
-static const struct builtin_description bdesc_1arg[] =
-{
- { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
- { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
-
- { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
- { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
- { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
-
- { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
- { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
- { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
- { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
- { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
- { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
-
- { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
- { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
-
- { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
-
- { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
- { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
-
- { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
- { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
- { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
- { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
- { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
-
- { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
-
- { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
- { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
- { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
- { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
-
- { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
- { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
- { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
-
- /* SSE3 */
- { MASK_SSE3, CODE_FOR_sse3_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
- { MASK_SSE3, CODE_FOR_sse3_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
- /* APPLE LOCAL begin mainline */
-
- /* SSSE3 */
- { MASK_SSSE3, CODE_FOR_ssse3_pabsv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, 0, 0 },
- { MASK_SSSE3, CODE_FOR_ssse3_pabsv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, 0, 0 },
- { MASK_SSSE3, CODE_FOR_ssse3_pabsv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, 0, 0 },
- { MASK_SSSE3, CODE_FOR_ssse3_pabsv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, 0, 0 },
- { MASK_SSSE3, CODE_FOR_ssse3_pabsv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, 0, 0 },
- /* APPLE LOCAL 5612787 mainline sse4 */
- { MASK_SSSE3, CODE_FOR_ssse3_pabsv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, 0, 0 },
- /* APPLE LOCAL end mainline */
- /* APPLE LOCAL begin 5612787 mainline sse4 */
- /* SSE4.1 */
- { MASK_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, 0, IX86_BUILTIN_PMOVSXBW128, UNKNOWN, 0 },
- { MASK_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, 0, IX86_BUILTIN_PMOVSXBD128, UNKNOWN, 0 },
- { MASK_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, 0, IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, 0 },
- { MASK_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, 0, IX86_BUILTIN_PMOVSXWD128, UNKNOWN, 0 },
- { MASK_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, 0, IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, 0 },
- { MASK_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, 0, IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, 0 },
- { MASK_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, 0, IX86_BUILTIN_PMOVZXBW128, UNKNOWN, 0 },
- { MASK_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, 0, IX86_BUILTIN_PMOVZXBD128, UNKNOWN, 0 },
- { MASK_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, 0, IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, 0 },
- { MASK_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, 0, IX86_BUILTIN_PMOVZXWD128, UNKNOWN, 0 },
- { MASK_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, 0, IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, 0 },
- { MASK_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, 0, IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, 0 },
- { MASK_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, 0 },
-
- /* Fake 1 arg builtins with a constant smaller than 8 bits as the 2nd arg. */
- { MASK_SSE4_1, CODE_FOR_sse4_1_roundpd, 0, IX86_BUILTIN_ROUNDPD, UNKNOWN, 0 },
- { MASK_SSE4_1, CODE_FOR_sse4_1_roundps, 0, IX86_BUILTIN_ROUNDPS, UNKNOWN, 0 },
- /* APPLE LOCAL end 5612787 mainline sse4 */
-};
-
-static void
-ix86_init_builtins (void)
-{
- if (TARGET_MMX)
- ix86_init_mmx_sse_builtins ();
-
- /* APPLE LOCAL begin constant cfstrings */
-#ifdef SUBTARGET_INIT_BUILTINS
- SUBTARGET_INIT_BUILTINS;
-#endif
- /* APPLE LOCAL end constant cfstrings */
-}
-
-/* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
- is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
- builtins. */
-static void
-ix86_init_mmx_sse_builtins (void)
-{
- const struct builtin_description * d;
- size_t i;
-
- tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode);
- tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
- tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
- tree V2DI_type_node
- = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
- tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
- tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
- tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
- tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
- tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
- tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
- /* APPLE LOCAL 4656532 use V1DImode for _m64 */
- tree V1DI_type_node = build_vector_type_for_mode (long_long_integer_type_node, V1DImode);
-
- tree pchar_type_node = build_pointer_type (char_type_node);
- tree pcchar_type_node = build_pointer_type (
- build_type_variant (char_type_node, 1, 0));
- tree pfloat_type_node = build_pointer_type (float_type_node);
- tree pcfloat_type_node = build_pointer_type (
- build_type_variant (float_type_node, 1, 0));
- tree pv2si_type_node = build_pointer_type (V2SI_type_node);
- tree pv2di_type_node = build_pointer_type (V2DI_type_node);
- /* APPLE LOCAL 4656532 use V1DImode for _m64 */
- tree pv1di_type_node = build_pointer_type (V1DI_type_node);
-
- /* Comparisons. */
- tree int_ftype_v4sf_v4sf
- = build_function_type_list (integer_type_node,
- V4SF_type_node, V4SF_type_node, NULL_TREE);
- tree v4si_ftype_v4sf_v4sf
- = build_function_type_list (V4SI_type_node,
- V4SF_type_node, V4SF_type_node, NULL_TREE);
- /* MMX/SSE/integer conversions. */
- tree int_ftype_v4sf
- = build_function_type_list (integer_type_node,
- V4SF_type_node, NULL_TREE);
- tree int64_ftype_v4sf
- = build_function_type_list (long_long_integer_type_node,
- V4SF_type_node, NULL_TREE);
- tree int_ftype_v8qi
- = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
- tree v4sf_ftype_v4sf_int
- = build_function_type_list (V4SF_type_node,
- V4SF_type_node, integer_type_node, NULL_TREE);
- tree v4sf_ftype_v4sf_int64
- = build_function_type_list (V4SF_type_node,
- V4SF_type_node, long_long_integer_type_node,
- NULL_TREE);
- tree v4sf_ftype_v4sf_v2si
- = build_function_type_list (V4SF_type_node,
- V4SF_type_node, V2SI_type_node, NULL_TREE);
-
- /* Miscellaneous. */
- tree v8qi_ftype_v4hi_v4hi
- = build_function_type_list (V8QI_type_node,
- V4HI_type_node, V4HI_type_node, NULL_TREE);
- tree v4hi_ftype_v2si_v2si
- = build_function_type_list (V4HI_type_node,
- V2SI_type_node, V2SI_type_node, NULL_TREE);
- tree v4sf_ftype_v4sf_v4sf_int
- = build_function_type_list (V4SF_type_node,
- V4SF_type_node, V4SF_type_node,
- integer_type_node, NULL_TREE);
- tree v2si_ftype_v4hi_v4hi
- = build_function_type_list (V2SI_type_node,
- V4HI_type_node, V4HI_type_node, NULL_TREE);
- tree v4hi_ftype_v4hi_int
- = build_function_type_list (V4HI_type_node,
- V4HI_type_node, integer_type_node, NULL_TREE);
- /* APPLE LOCAL begin 4656532 use V1DImode for _m64 */
- tree v4hi_ftype_v4hi_v1di
- = build_function_type_list (V4HI_type_node,
- V4HI_type_node, V1DI_type_node,
- NULL_TREE);
- tree v2si_ftype_v2si_int
- = build_function_type_list (V2SI_type_node,
- V2SI_type_node, integer_type_node, NULL_TREE);
- tree v2si_ftype_v2si_v1di
- = build_function_type_list (V2SI_type_node,
- V2SI_type_node, V1DI_type_node, NULL_TREE);
- /* APPLE LOCAL end 4656532 use V1DImode for _m64 */
-
- tree void_ftype_void
- = build_function_type (void_type_node, void_list_node);
- tree void_ftype_unsigned
- = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
- tree void_ftype_unsigned_unsigned
- = build_function_type_list (void_type_node, unsigned_type_node,
- unsigned_type_node, NULL_TREE);
- tree void_ftype_pcvoid_unsigned_unsigned
- = build_function_type_list (void_type_node, const_ptr_type_node,
- unsigned_type_node, unsigned_type_node,
- NULL_TREE);
- tree unsigned_ftype_void
- = build_function_type (unsigned_type_node, void_list_node);
- tree v2si_ftype_v4sf
- = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
- /* Loads/stores. */
- tree void_ftype_v8qi_v8qi_pchar
- = build_function_type_list (void_type_node,
- V8QI_type_node, V8QI_type_node,
- pchar_type_node, NULL_TREE);
- tree v4sf_ftype_pcfloat
- = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
- /* @@@ the type is bogus */
- tree v4sf_ftype_v4sf_pv2si
- = build_function_type_list (V4SF_type_node,
- V4SF_type_node, pv2si_type_node, NULL_TREE);
- tree void_ftype_pv2si_v4sf
- = build_function_type_list (void_type_node,
- pv2si_type_node, V4SF_type_node, NULL_TREE);
- tree void_ftype_pfloat_v4sf
- = build_function_type_list (void_type_node,
- pfloat_type_node, V4SF_type_node, NULL_TREE);
- /* APPLE LOCAL begin 4656532 use V1DImode for _m64 */
- tree void_ftype_pv1di_v1di
- = build_function_type_list (void_type_node,
- pv1di_type_node, V1DI_type_node, NULL_TREE);
- /* APPLE LOCAL end 4656532 use V1DImode for _m64 */
- tree void_ftype_pv2di_v2di
- = build_function_type_list (void_type_node,
- pv2di_type_node, V2DI_type_node, NULL_TREE);
- /* Normal vector unops. */
- tree v4sf_ftype_v4sf
- = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
- /* APPLE LOCAL begin mainline */
- tree v16qi_ftype_v16qi
- = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
- tree v8hi_ftype_v8hi
- = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
- tree v4si_ftype_v4si
- = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
- tree v8qi_ftype_v8qi
- = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
- tree v4hi_ftype_v4hi
- = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
- /* APPLE LOCAL end mainline */
-
- /* Normal vector binops. */
- tree v4sf_ftype_v4sf_v4sf
- = build_function_type_list (V4SF_type_node,
- V4SF_type_node, V4SF_type_node, NULL_TREE);
- tree v8qi_ftype_v8qi_v8qi
- = build_function_type_list (V8QI_type_node,
- V8QI_type_node, V8QI_type_node, NULL_TREE);
- tree v4hi_ftype_v4hi_v4hi
- = build_function_type_list (V4HI_type_node,
- V4HI_type_node, V4HI_type_node, NULL_TREE);
- tree v2si_ftype_v2si_v2si
- = build_function_type_list (V2SI_type_node,
- V2SI_type_node, V2SI_type_node, NULL_TREE);
- /* APPLE LOCAL begin 4656532 use V1DImode for _m64 */
- tree v1di_ftype_v1di_v1di
- = build_function_type_list (V1DI_type_node,
- V1DI_type_node, V1DI_type_node, NULL_TREE);
- /* APPLE LOCAL begin 4684674 */
- tree v1di_ftype_v1di_int
- = build_function_type_list (V1DI_type_node,
- V1DI_type_node, integer_type_node, NULL_TREE);
- /* APPLE LOCAL end 4684674 */
- /* APPLE LOCAL begin 4656532 */
- tree v1di_ftype_v1di_v1di_int
- = build_function_type_list (V1DI_type_node,
- V1DI_type_node,
- V1DI_type_node,
- integer_type_node, NULL_TREE);
- /* APPLE LOCAL end 4656532 */
- /* APPLE LOCAL end 4656532 use V1DImode for _m64 */
-
- tree v2si_ftype_v2sf
- = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
- tree v2sf_ftype_v2si
- = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
- tree v2si_ftype_v2si
- = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
- tree v2sf_ftype_v2sf
- = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
- tree v2sf_ftype_v2sf_v2sf
- = build_function_type_list (V2SF_type_node,
- V2SF_type_node, V2SF_type_node, NULL_TREE);
- tree v2si_ftype_v2sf_v2sf
- = build_function_type_list (V2SI_type_node,
- V2SF_type_node, V2SF_type_node, NULL_TREE);
- tree pint_type_node = build_pointer_type (integer_type_node);
- tree pdouble_type_node = build_pointer_type (double_type_node);
- tree pcdouble_type_node = build_pointer_type (
- build_type_variant (double_type_node, 1, 0));
- tree int_ftype_v2df_v2df
- = build_function_type_list (integer_type_node,
- V2DF_type_node, V2DF_type_node, NULL_TREE);
-
- tree void_ftype_pcvoid
- = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
- tree v4sf_ftype_v4si
- = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
- tree v4si_ftype_v4sf
- = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
- tree v2df_ftype_v4si
- = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
- tree v4si_ftype_v2df
- = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
- tree v2si_ftype_v2df
- = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
- tree v4sf_ftype_v2df
- = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
- tree v2df_ftype_v2si
- = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
- tree v2df_ftype_v4sf
- = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
- tree int_ftype_v2df
- = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
- tree int64_ftype_v2df
- = build_function_type_list (long_long_integer_type_node,
- V2DF_type_node, NULL_TREE);
- tree v2df_ftype_v2df_int
- = build_function_type_list (V2DF_type_node,
- V2DF_type_node, integer_type_node, NULL_TREE);
- tree v2df_ftype_v2df_int64
- = build_function_type_list (V2DF_type_node,
- V2DF_type_node, long_long_integer_type_node,
- NULL_TREE);
- tree v4sf_ftype_v4sf_v2df
- = build_function_type_list (V4SF_type_node,
- V4SF_type_node, V2DF_type_node, NULL_TREE);
- tree v2df_ftype_v2df_v4sf
- = build_function_type_list (V2DF_type_node,
- V2DF_type_node, V4SF_type_node, NULL_TREE);
- tree v2df_ftype_v2df_v2df_int
- = build_function_type_list (V2DF_type_node,
- V2DF_type_node, V2DF_type_node,
- integer_type_node,
- NULL_TREE);
- tree v2df_ftype_v2df_pcdouble
- = build_function_type_list (V2DF_type_node,
- V2DF_type_node, pcdouble_type_node, NULL_TREE);
- tree void_ftype_pdouble_v2df
- = build_function_type_list (void_type_node,
- pdouble_type_node, V2DF_type_node, NULL_TREE);
- tree void_ftype_pint_int
- = build_function_type_list (void_type_node,
- pint_type_node, integer_type_node, NULL_TREE);
- tree void_ftype_v16qi_v16qi_pchar
- = build_function_type_list (void_type_node,
- V16QI_type_node, V16QI_type_node,
- pchar_type_node, NULL_TREE);
- tree v2df_ftype_pcdouble
- = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
- tree v2df_ftype_v2df_v2df
- = build_function_type_list (V2DF_type_node,
- V2DF_type_node, V2DF_type_node, NULL_TREE);
- tree v16qi_ftype_v16qi_v16qi
- = build_function_type_list (V16QI_type_node,
- V16QI_type_node, V16QI_type_node, NULL_TREE);
- tree v8hi_ftype_v8hi_v8hi
- = build_function_type_list (V8HI_type_node,
- V8HI_type_node, V8HI_type_node, NULL_TREE);
- tree v4si_ftype_v4si_v4si
- = build_function_type_list (V4SI_type_node,
- V4SI_type_node, V4SI_type_node, NULL_TREE);
- tree v2di_ftype_v2di_v2di
- = build_function_type_list (V2DI_type_node,
- V2DI_type_node, V2DI_type_node, NULL_TREE);
- tree v2di_ftype_v2df_v2df
- = build_function_type_list (V2DI_type_node,
- V2DF_type_node, V2DF_type_node, NULL_TREE);
- tree v2df_ftype_v2df
- = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
- tree v2di_ftype_v2di_int
- = build_function_type_list (V2DI_type_node,
- V2DI_type_node, integer_type_node, NULL_TREE);
- /* APPLE LOCAL begin mainline */
- tree v2di_ftype_v2di_v2di_int
- = build_function_type_list (V2DI_type_node, V2DI_type_node,
- V2DI_type_node, integer_type_node, NULL_TREE);
- /* APPLE LOCAL end mainline */
- tree v4si_ftype_v4si_int
- = build_function_type_list (V4SI_type_node,
- V4SI_type_node, integer_type_node, NULL_TREE);
- tree v8hi_ftype_v8hi_int
- = build_function_type_list (V8HI_type_node,
- V8HI_type_node, integer_type_node, NULL_TREE);
- tree v4si_ftype_v8hi_v8hi
- = build_function_type_list (V4SI_type_node,
- V8HI_type_node, V8HI_type_node, NULL_TREE);
- /* APPLE LOCAL begin 5612787 mainline sse4 */
- /* APPLE LOCAL begin 4656532 use V1DImode for _m64 */
- tree v1di_ftype_v8qi_v8qi
- = build_function_type_list (V1DI_type_node,
- V8QI_type_node, V8QI_type_node, NULL_TREE);
- tree v1di_ftype_v2si_v2si
- = build_function_type_list (V1DI_type_node,
- V2SI_type_node, V2SI_type_node, NULL_TREE);
- /* APPLE LOCAL end 4656532 use V1DImode for _m64 */
- /* APPLE LOCAL end 5612787 mainline sse4 */
-
- tree v2di_ftype_v16qi_v16qi
- = build_function_type_list (V2DI_type_node,
- V16QI_type_node, V16QI_type_node, NULL_TREE);
- tree v2di_ftype_v4si_v4si
- = build_function_type_list (V2DI_type_node,
- V4SI_type_node, V4SI_type_node, NULL_TREE);
- tree int_ftype_v16qi
- = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
- tree v16qi_ftype_pcchar
- = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
- tree void_ftype_pchar_v16qi
- = build_function_type_list (void_type_node,
- pchar_type_node, V16QI_type_node, NULL_TREE);
-
- /* APPLE LOCAL begin 5612787 mainline sse4 */
- tree v2di_ftype_v2di_unsigned_unsigned
- = build_function_type_list (V2DI_type_node, V2DI_type_node,
- unsigned_type_node, unsigned_type_node,
- NULL_TREE);
- tree v2di_ftype_v2di_v2di_unsigned_unsigned
- = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
- unsigned_type_node, unsigned_type_node,
- NULL_TREE);
- tree v2di_ftype_v2di_v16qi
- = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
- NULL_TREE);
- tree v2df_ftype_v2df_v2df_v2df
- = build_function_type_list (V2DF_type_node,
- V2DF_type_node, V2DF_type_node,
- V2DF_type_node, NULL_TREE);
- tree v4sf_ftype_v4sf_v4sf_v4sf
- = build_function_type_list (V4SF_type_node,
- V4SF_type_node, V4SF_type_node,
- V4SF_type_node, NULL_TREE);
- tree v8hi_ftype_v16qi
- = build_function_type_list (V8HI_type_node, V16QI_type_node,
- NULL_TREE);
- tree v4si_ftype_v16qi
- = build_function_type_list (V4SI_type_node, V16QI_type_node,
- NULL_TREE);
- tree v2di_ftype_v16qi
- = build_function_type_list (V2DI_type_node, V16QI_type_node,
- NULL_TREE);
- tree v4si_ftype_v8hi
- = build_function_type_list (V4SI_type_node, V8HI_type_node,
- NULL_TREE);
- tree v2di_ftype_v8hi
- = build_function_type_list (V2DI_type_node, V8HI_type_node,
- NULL_TREE);
- tree v2di_ftype_v4si
- = build_function_type_list (V2DI_type_node, V4SI_type_node,
- NULL_TREE);
- tree v2di_ftype_pv2di
- = build_function_type_list (V2DI_type_node, pv2di_type_node,
- NULL_TREE);
- tree v16qi_ftype_v16qi_v16qi_int
- = build_function_type_list (V16QI_type_node, V16QI_type_node,
- V16QI_type_node, integer_type_node,
- NULL_TREE);
- tree v16qi_ftype_v16qi_v16qi_v16qi
- = build_function_type_list (V16QI_type_node, V16QI_type_node,
- V16QI_type_node, V16QI_type_node,
- NULL_TREE);
- tree v8hi_ftype_v8hi_v8hi_int
- = build_function_type_list (V8HI_type_node, V8HI_type_node,
- V8HI_type_node, integer_type_node,
- NULL_TREE);
- tree v4si_ftype_v4si_v4si_int
- = build_function_type_list (V4SI_type_node, V4SI_type_node,
- V4SI_type_node, integer_type_node,
- NULL_TREE);
- tree int_ftype_v2di_v2di
- = build_function_type_list (integer_type_node,
- V2DI_type_node, V2DI_type_node,
- NULL_TREE);
- tree int_ftype_v16qi_int_v16qi_int_int
- = build_function_type_list (integer_type_node,
- V16QI_type_node,
- integer_type_node,
- V16QI_type_node,
- integer_type_node,
- integer_type_node,
- NULL_TREE);
- tree v16qi_ftype_v16qi_int_v16qi_int_int
- = build_function_type_list (V16QI_type_node,
- V16QI_type_node,
- integer_type_node,
- V16QI_type_node,
- integer_type_node,
- integer_type_node,
- NULL_TREE);
- tree int_ftype_v16qi_v16qi_int
- = build_function_type_list (integer_type_node,
- V16QI_type_node,
- V16QI_type_node,
- integer_type_node,
- NULL_TREE);
- /* APPLE LOCAL end 5612787 mainline sse4 */
-
- tree float80_type;
- tree float128_type;
- tree ftype;
-
- /* The __float80 type. */
- if (TYPE_MODE (long_double_type_node) == XFmode)
- (*lang_hooks.types.register_builtin_type) (long_double_type_node,
- "__float80");
- else
- {
- /* The __float80 type. */
- float80_type = make_node (REAL_TYPE);
- TYPE_PRECISION (float80_type) = 80;
- layout_type (float80_type);
- (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
- }
-
- if (TARGET_64BIT)
- {
- float128_type = make_node (REAL_TYPE);
- TYPE_PRECISION (float128_type) = 128;
- layout_type (float128_type);
- (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
- }
-
- /* APPLE LOCAL begin 5612787 mainline sse4 */
- /* Add all SSE builtins that are more or less simple operations on
- three operands. */
- for (i = 0, d = bdesc_sse_3arg;
- i < ARRAY_SIZE (bdesc_sse_3arg);
- i++, d++)
- {
- /* Use one of the operands; the target can have a different mode for
- mask-generating compares. */
- enum machine_mode mode;
- tree type;
-
- if (d->name == 0)
- continue;
- mode = insn_data[d->icode].operand[1].mode;
-
- switch (mode)
- {
- case V16QImode:
- type = v16qi_ftype_v16qi_v16qi_int;
- break;
- case V8HImode:
- type = v8hi_ftype_v8hi_v8hi_int;
- break;
- case V4SImode:
- type = v4si_ftype_v4si_v4si_int;
- break;
- case V2DImode:
- type = v2di_ftype_v2di_v2di_int;
- break;
- case V2DFmode:
- type = v2df_ftype_v2df_v2df_int;
- break;
- case V4SFmode:
- type = v4sf_ftype_v4sf_v4sf_int;
- break;
- default:
- gcc_unreachable ();
- }
-
- /* Override for variable blends. */
- switch (d->icode)
- {
- case CODE_FOR_sse4_1_blendvpd:
- type = v2df_ftype_v2df_v2df_v2df;
- break;
- case CODE_FOR_sse4_1_blendvps:
- type = v4sf_ftype_v4sf_v4sf_v4sf;
- break;
- case CODE_FOR_sse4_1_pblendvb:
- type = v16qi_ftype_v16qi_v16qi_v16qi;
- break;
- default:
- break;
- }
-
- def_builtin (d->mask, d->name, type, d->code);
- }
- /* APPLE LOCAL end 5612787 mainline sse4 */
-
- /* Add all builtins that are more or less simple operations on two
- operands. */
- for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
- {
- /* Use one of the operands; the target can have a different mode for
- mask-generating compares. */
- enum machine_mode mode;
- tree type;
-
- if (d->name == 0)
- continue;
- mode = insn_data[d->icode].operand[1].mode;
-
- switch (mode)
- {
- case V16QImode:
- type = v16qi_ftype_v16qi_v16qi;
- break;
- case V8HImode:
- type = v8hi_ftype_v8hi_v8hi;
- break;
- case V4SImode:
- type = v4si_ftype_v4si_v4si;
- break;
- case V2DImode:
- type = v2di_ftype_v2di_v2di;
- break;
- case V2DFmode:
- type = v2df_ftype_v2df_v2df;
- break;
- case V4SFmode:
- type = v4sf_ftype_v4sf_v4sf;
- break;
- case V8QImode:
- type = v8qi_ftype_v8qi_v8qi;
- break;
- case V4HImode:
- type = v4hi_ftype_v4hi_v4hi;
- break;
- case V2SImode:
- type = v2si_ftype_v2si_v2si;
- break;
- /* APPLE LOCAL begin 4656532 use V1DImode for _m64 */
- case V1DImode:
- type = v1di_ftype_v1di_v1di;
- /* APPLE LOCAL end 4656532 use V1DImode for _m64 */
- break;
-
- default:
- gcc_unreachable ();
- }
-
- /* Override for comparisons. */
- if (d->icode == CODE_FOR_sse_maskcmpv4sf3
- || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
- type = v4si_ftype_v4sf_v4sf;
-
- if (d->icode == CODE_FOR_sse2_maskcmpv2df3
- || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
- type = v2di_ftype_v2df_v2df;
-
- def_builtin (d->mask, d->name, type, d->code);
- }
- /* APPLE LOCAL begin mainline */
- /* Add all builtins that are more or less simple operations on 1 operand. */
- for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
- {
- enum machine_mode mode;
- tree type;
-
- if (d->name == 0)
- continue;
- mode = insn_data[d->icode].operand[1].mode;
-
- switch (mode)
- {
- case V16QImode:
- type = v16qi_ftype_v16qi;
- break;
- case V8HImode:
- type = v8hi_ftype_v8hi;
- break;
- case V4SImode:
- type = v4si_ftype_v4si;
- break;
- case V2DFmode:
- type = v2df_ftype_v2df;
- break;
- case V4SFmode:
- type = v4sf_ftype_v4sf;
- break;
- case V8QImode:
- type = v8qi_ftype_v8qi;
- break;
- case V4HImode:
- type = v4hi_ftype_v4hi;
- break;
- case V2SImode:
- type = v2si_ftype_v2si;
- break;
-
- default:
- abort ();
- }
-
- def_builtin (d->mask, d->name, type, d->code);
- }
- /* APPLE LOCAL end mainline */
- /* APPLE LOCAL begin 5612787 mainline sse4 */
- /* pcmpestr[im] insns. */
- for (i = 0, d = bdesc_pcmpestr;
- i < ARRAY_SIZE (bdesc_pcmpestr);
- i++, d++)
- {
- if (d->code == IX86_BUILTIN_PCMPESTRM128)
- ftype = v16qi_ftype_v16qi_int_v16qi_int_int;
- else
- ftype = int_ftype_v16qi_int_v16qi_int_int;
- def_builtin (d->mask, d->name, ftype, d->code);
- }
-
- /* pcmpistr[im] insns. */
- for (i = 0, d = bdesc_pcmpistr;
- i < ARRAY_SIZE (bdesc_pcmpistr);
- i++, d++)
- {
- if (d->code == IX86_BUILTIN_PCMPISTRM128)
- ftype = v16qi_ftype_v16qi_v16qi_int;
- else
- ftype = int_ftype_v16qi_v16qi_int;
- def_builtin (d->mask, d->name, ftype, d->code);
- }
- /* APPLE LOCAL end 5612787 mainline sse4 */
- /* Add the remaining MMX insns with somewhat more complicated types. */
- def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
- /* APPLE LOCAL begin 4656532 use V1DImode for _m64 */
- def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_v1di, IX86_BUILTIN_PSLLW);
- def_builtin (MASK_MMX, "__builtin_ia32_psllwi", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSLLWI);
- def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_v1di, IX86_BUILTIN_PSLLD);
- def_builtin (MASK_MMX, "__builtin_ia32_pslldi", v2si_ftype_v2si_int, IX86_BUILTIN_PSLLDI);
- def_builtin (MASK_MMX, "__builtin_ia32_psllq", v1di_ftype_v1di_v1di, IX86_BUILTIN_PSLLQ);
- def_builtin (MASK_MMX, "__builtin_ia32_psllqi", v1di_ftype_v1di_int, IX86_BUILTIN_PSLLQI);
-
- def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_v1di, IX86_BUILTIN_PSRLW);
- def_builtin (MASK_MMX, "__builtin_ia32_psrlwi", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSRLWI);
- def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_v1di, IX86_BUILTIN_PSRLD);
- def_builtin (MASK_MMX, "__builtin_ia32_psrldi", v2si_ftype_v2si_int, IX86_BUILTIN_PSRLDI);
- def_builtin (MASK_MMX, "__builtin_ia32_psrlq", v1di_ftype_v1di_v1di, IX86_BUILTIN_PSRLQ);
- def_builtin (MASK_MMX, "__builtin_ia32_psrlqi", v1di_ftype_v1di_int, IX86_BUILTIN_PSRLQI);
-
- def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_v1di, IX86_BUILTIN_PSRAW);
- def_builtin (MASK_MMX, "__builtin_ia32_psrawi", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSRAWI);
- def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_v1di, IX86_BUILTIN_PSRAD);
- def_builtin (MASK_MMX, "__builtin_ia32_psradi", v2si_ftype_v2si_int, IX86_BUILTIN_PSRADI);
- /* APPLE LOCAL end 4656532 use V1DImode for _m64 */
-
- def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
- def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
-
- /* APPLE LOCAL 4299257 */
- /* comi insns. */
- for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
- if (d->mask == MASK_SSE2)
- def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
- else
- def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
-
- /* APPLE LOCAL begin 4299257 */
- /* ucomi insns. */
- for (i = 0, d = bdesc_ucomi; i < ARRAY_SIZE (bdesc_ucomi); i++, d++)
- if (d->mask == MASK_SSE2)
- def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
- else
- def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
- /* APPLE LOCAL end 4299257 */
-
- /* APPLE LOCAL begin 5612787 mainline sse4 */
- /* ptest insns. */
- for (i = 0, d = bdesc_ptest; i < ARRAY_SIZE (bdesc_ptest); i++, d++)
- def_builtin (d->mask, d->name, int_ftype_v2di_v2di, d->code);
- /* APPLE LOCAL end 5612787 mainline sse4 */
-
- def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
- def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
- def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
-
- def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
- def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
- def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
- def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
- def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
- def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
- def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
- def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
- def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
- def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
- def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
-
- def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
-
- def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
- def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
-
- def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
- def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
- def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
- def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
-
- def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
- def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
- def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
- /* APPLE LOCAL begin 4656532 use V1DImode for _m64 */
- def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pv1di_v1di, IX86_BUILTIN_MOVNTQ);
-
- def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
-
- def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", v1di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
- /* APPLE LOCAL end 4656532 use V1DImode for _m64 */
-
- def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
- def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
- def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
- def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
- def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
- def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
-
- def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
-
- /* Original 3DNow! */
- def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
- def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
- def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
- def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
- def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
- def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
- def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
- def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
- def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
- def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
- def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
- def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
- def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
- def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
- def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
- def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
- def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
- def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
- def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
- def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
-
- /* 3DNow! extension as used in the Athlon CPU. */
- def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
- def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
- def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
- def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
- def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
- def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
-
- /* SSE2 */
- def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
-
- def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
- def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
-
- def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
- def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
-
- def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
- def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
- def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
- def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
- def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
-
- def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
- def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
- def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
- def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
-
- def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
- def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
-
- def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
-
- def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
- def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
-
- def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
- def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
- def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
- def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
- def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
-
- def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
-
- def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
- def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
- def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
- def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
-
- def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
- def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
- def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
-
- def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
- def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
- def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
- def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
-
- def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
- def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
- def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
-
- def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
- def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
-
- /* APPLE LOCAL 4656532 use V1DImode for _m64 */
- def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", v1di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
- def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
-
- def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSLLW128);
- def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSLLD128);
- def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
-
- def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRLW128);
- def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRLD128);
- def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
-
- def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRAW128);
- def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRAD128);
-
- def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
- /* APPLE LOCAL 5919583 */
- def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128_byteshift", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128_BYTESHIFT);
- def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
- def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
- def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
-
- def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
- /* APPLE LOCAL 5919583 */
- def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128_byteshift", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128_BYTESHIFT);
- def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
- def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
- def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
-
- def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
- def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
-
- def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
-
- /* Prescott New Instructions. */
- def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
- void_ftype_pcvoid_unsigned_unsigned,
- IX86_BUILTIN_MONITOR);
- def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
- void_ftype_unsigned_unsigned,
- IX86_BUILTIN_MWAIT);
- def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
- v4sf_ftype_v4sf,
- IX86_BUILTIN_MOVSHDUP);
- def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
- v4sf_ftype_v4sf,
- IX86_BUILTIN_MOVSLDUP);
- def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
- v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
-
- /* APPLE LOCAL begin 4099020 */
- ftype = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
- def_builtin (MASK_SSE, "__builtin_ia32_movqv4si", ftype, IX86_BUILTIN_MOVQ);
- ftype = build_function_type_list (V4SI_type_node, pv2si_type_node, NULL_TREE);
- def_builtin (MASK_SSE, "__builtin_ia32_loadlv4si", ftype, IX86_BUILTIN_LOADQ);
- ftype = build_function_type_list (void_type_node, pv2si_type_node, V4SI_type_node, NULL_TREE);
- def_builtin (MASK_SSE, "__builtin_ia32_storelv4si", ftype, IX86_BUILTIN_STOREQ);
- /* APPLE LOCAL end 4099020 */
-
- /* APPLE LOCAL begin 4656532 */
- /* Merom New Instructions. */
- def_builtin (MASK_SSSE3, "__builtin_ia32_palignr128",
- v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128);
- def_builtin (MASK_SSSE3, "__builtin_ia32_palignr", v1di_ftype_v1di_v1di_int,
- IX86_BUILTIN_PALIGNR);
-
- /* APPLE LOCAL end 4656532 */
- /* APPLE LOCAL begin 5612787 mainline sse4 */
- /* SSE4.1. */
- def_builtin (MASK_SSE4_1, "__builtin_ia32_movntdqa", v2di_ftype_pv2di, IX86_BUILTIN_MOVNTDQA);
- def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovsxbw128", v8hi_ftype_v16qi, IX86_BUILTIN_PMOVSXBW128);
- def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovsxbd128", v4si_ftype_v16qi, IX86_BUILTIN_PMOVSXBD128);
- def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovsxbq128", v2di_ftype_v16qi, IX86_BUILTIN_PMOVSXBQ128);
- def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovsxwd128", v4si_ftype_v8hi, IX86_BUILTIN_PMOVSXWD128);
- def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovsxwq128", v2di_ftype_v8hi, IX86_BUILTIN_PMOVSXWQ128);
- def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovsxdq128", v2di_ftype_v4si, IX86_BUILTIN_PMOVSXDQ128);
- def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovzxbw128", v8hi_ftype_v16qi, IX86_BUILTIN_PMOVZXBW128);
- def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovzxbd128", v4si_ftype_v16qi, IX86_BUILTIN_PMOVZXBD128);
- def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovzxbq128", v2di_ftype_v16qi, IX86_BUILTIN_PMOVZXBQ128);
- def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovzxwd128", v4si_ftype_v8hi, IX86_BUILTIN_PMOVZXWD128);
- def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovzxwq128", v2di_ftype_v8hi, IX86_BUILTIN_PMOVZXWQ128);
- def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovzxdq128", v2di_ftype_v4si, IX86_BUILTIN_PMOVZXDQ128);
- def_builtin (MASK_SSE4_1, "__builtin_ia32_pmuldq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULDQ128);
- def_builtin_const (MASK_SSE4_1, "__builtin_ia32_roundpd", v2df_ftype_v2df_int, IX86_BUILTIN_ROUNDPD);
- def_builtin_const (MASK_SSE4_1, "__builtin_ia32_roundps", v4sf_ftype_v4sf_int, IX86_BUILTIN_ROUNDPS);
- def_builtin_const (MASK_SSE4_1, "__builtin_ia32_roundsd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_ROUNDSD);
- def_builtin_const (MASK_SSE4_1, "__builtin_ia32_roundss", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_ROUNDSS);
-
- /* SSE4.2. */
- ftype = build_function_type_list (unsigned_type_node,
- unsigned_type_node,
- unsigned_char_type_node,
- NULL_TREE);
- def_builtin (MASK_SSE4_2, "__builtin_ia32_crc32qi", ftype, IX86_BUILTIN_CRC32QI);
- ftype = build_function_type_list (unsigned_type_node,
- unsigned_type_node,
- short_unsigned_type_node,
- NULL_TREE);
- def_builtin (MASK_SSE4_2, "__builtin_ia32_crc32hi", ftype, IX86_BUILTIN_CRC32HI);
- ftype = build_function_type_list (unsigned_type_node,
- unsigned_type_node,
- unsigned_type_node,
- NULL_TREE);
- def_builtin (MASK_SSE4_2, "__builtin_ia32_crc32si", ftype, IX86_BUILTIN_CRC32SI);
- ftype = build_function_type_list (long_long_unsigned_type_node,
- long_long_unsigned_type_node,
- long_long_unsigned_type_node,
- NULL_TREE);
- def_builtin (MASK_SSE4_2, "__builtin_ia32_crc32di", ftype, IX86_BUILTIN_CRC32DI);
-
- /* AMDFAM10 SSE4A New built-ins */
- def_builtin (MASK_SSE4A, "__builtin_ia32_movntsd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTSD);
- def_builtin (MASK_SSE4A, "__builtin_ia32_movntss", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTSS);
- def_builtin (MASK_SSE4A, "__builtin_ia32_extrqi", v2di_ftype_v2di_unsigned_unsigned, IX86_BUILTIN_EXTRQI);
- def_builtin (MASK_SSE4A, "__builtin_ia32_extrq", v2di_ftype_v2di_v16qi, IX86_BUILTIN_EXTRQ);
- def_builtin (MASK_SSE4A, "__builtin_ia32_insertqi", v2di_ftype_v2di_v2di_unsigned_unsigned, IX86_BUILTIN_INSERTQI);
- def_builtin (MASK_SSE4A, "__builtin_ia32_insertq", v2di_ftype_v2di_v2di, IX86_BUILTIN_INSERTQ);
- /* APPLE LOCAL end 5612787 mainline sse4 */
- /* Access to the vec_init patterns. */
- ftype = build_function_type_list (V2SI_type_node, integer_type_node,
- integer_type_node, NULL_TREE);
- def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si",
- ftype, IX86_BUILTIN_VEC_INIT_V2SI);
-
- ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
- short_integer_type_node,
- short_integer_type_node,
- short_integer_type_node, NULL_TREE);
- def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi",
- ftype, IX86_BUILTIN_VEC_INIT_V4HI);
-
- ftype = build_function_type_list (V8QI_type_node, char_type_node,
- char_type_node, char_type_node,
- char_type_node, char_type_node,
- char_type_node, char_type_node,
- char_type_node, NULL_TREE);
- def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi",
- ftype, IX86_BUILTIN_VEC_INIT_V8QI);
-
- /* Access to the vec_extract patterns. */
- ftype = build_function_type_list (double_type_node, V2DF_type_node,
- integer_type_node, NULL_TREE);
- def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v2df",
- ftype, IX86_BUILTIN_VEC_EXT_V2DF);
-
- ftype = build_function_type_list (long_long_integer_type_node,
- V2DI_type_node, integer_type_node,
- NULL_TREE);
- def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v2di",
- ftype, IX86_BUILTIN_VEC_EXT_V2DI);
-
- ftype = build_function_type_list (float_type_node, V4SF_type_node,
- integer_type_node, NULL_TREE);
- def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf",
- ftype, IX86_BUILTIN_VEC_EXT_V4SF);
-
- ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
- integer_type_node, NULL_TREE);
- def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v4si",
- ftype, IX86_BUILTIN_VEC_EXT_V4SI);
-
- /* APPLE LOCAL begin radar 4469713 */
- /* The return type of the builtin function should be an unsigned instead
- of a signed type. */
- ftype = build_function_type_list (unsigned_intHI_type_node, V8HI_type_node,
- /* APPLE LOCAL end radar 4469713 */
- integer_type_node, NULL_TREE);
- def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v8hi",
- ftype, IX86_BUILTIN_VEC_EXT_V8HI);
-
- ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
- integer_type_node, NULL_TREE);
- def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi",
- ftype, IX86_BUILTIN_VEC_EXT_V4HI);
-
- ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
- integer_type_node, NULL_TREE);
- def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si",
- ftype, IX86_BUILTIN_VEC_EXT_V2SI);
-
- ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
- integer_type_node, NULL_TREE);
- def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
-
- /* APPLE LOCAL begin 5612787 mainline sse4 */
- /* Access to the vec_set patterns. */
- ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
- intDI_type_node,
- integer_type_node, NULL_TREE);
- def_builtin (MASK_SSE4_1 | MASK_64BIT, "__builtin_ia32_vec_set_v2di", ftype, IX86_BUILTIN_VEC_SET_V2DI);
-
- ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
- float_type_node,
- integer_type_node, NULL_TREE);
- def_builtin (MASK_SSE4_1, "__builtin_ia32_vec_set_v4sf", ftype, IX86_BUILTIN_VEC_SET_V4SF);
-
- ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
- intSI_type_node,
- integer_type_node, NULL_TREE);
- def_builtin (MASK_SSE4_1, "__builtin_ia32_vec_set_v4si", ftype, IX86_BUILTIN_VEC_SET_V4SI);
- /* APPLE LOCAL end 5612787 mainline sse4 */
-
- ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
- intHI_type_node,
- integer_type_node, NULL_TREE);
- def_builtin (MASK_SSE2, "__builtin_ia32_vec_set_v8hi",
- ftype, IX86_BUILTIN_VEC_SET_V8HI);
-
- ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
- intHI_type_node,
- integer_type_node, NULL_TREE);
- def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi",
- ftype, IX86_BUILTIN_VEC_SET_V4HI);
- /* APPLE LOCAL begin 5612787 mainline sse4 */
- ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
- intQI_type_node,
- integer_type_node, NULL_TREE);
- def_builtin (MASK_SSE4_1, "__builtin_ia32_vec_set_v16qi", ftype, IX86_BUILTIN_VEC_SET_V16QI);
- /* APPLE LOCAL end 5612787 mainline sse4 */
-}
-
-/* Errors in the source file can cause expand_expr to return const0_rtx
- where we expect a vector. To avoid crashing, use one of the vector
- clear instructions. */
-static rtx
-safe_vector_operand (rtx x, enum machine_mode mode)
-{
- if (x == const0_rtx)
- x = CONST0_RTX (mode);
- return x;
-}
-
-/* APPLE LOCAL begin 5612787 mainline sse4 */
-/* Subroutine of ix86_expand_builtin to take care of SSE insns with
- 4 operands. The third argument must be a constant smaller than 8
- bits or xmm0. */
-
-static rtx
-ix86_expand_sse_4_operands_builtin (enum insn_code icode, tree exp,
- rtx target)
-{
- rtx pat;
- tree arg0 = TREE_VALUE (exp);
- tree arg1 = TREE_VALUE (TREE_CHAIN (exp));
- tree arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (exp)));
- rtx op0 = expand_normal (arg0);
- rtx op1 = expand_normal (arg1);
- rtx op2 = expand_normal (arg2);
- enum machine_mode tmode = insn_data[icode].operand[0].mode;
- enum machine_mode mode1 = insn_data[icode].operand[1].mode;
- enum machine_mode mode2 = insn_data[icode].operand[2].mode;
- enum machine_mode mode3 = insn_data[icode].operand[3].mode;
-
- if (VECTOR_MODE_P (mode1))
- op0 = safe_vector_operand (op0, mode1);
- if (VECTOR_MODE_P (mode2))
- op1 = safe_vector_operand (op1, mode2);
- if (VECTOR_MODE_P (mode3))
- op2 = safe_vector_operand (op2, mode3);
-
- if (optimize
- || target == 0
- || GET_MODE (target) != tmode
- || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
- target = gen_reg_rtx (tmode);
-
- if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
- op0 = copy_to_mode_reg (mode1, op0);
- if ((optimize && !register_operand (op1, mode2))
- || !(*insn_data[icode].operand[2].predicate) (op1, mode2))
- op1 = copy_to_mode_reg (mode2, op1);
-
- if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
- switch (icode)
- {
- case CODE_FOR_sse4_1_blendvpd:
- case CODE_FOR_sse4_1_blendvps:
- case CODE_FOR_sse4_1_pblendvb:
- op2 = copy_to_mode_reg (mode3, op2);
- break;
-
- case CODE_FOR_sse4_1_roundsd:
- case CODE_FOR_sse4_1_roundss:
- error ("the third argument must be a 4-bit immediate");
- return const0_rtx;
-
- default:
- error ("the third argument must be an 8-bit immediate");
- return const0_rtx;
- }
-
- pat = GEN_FCN (icode) (target, op0, op1, op2);
- if (! pat)
- return 0;
- emit_insn (pat);
- return target;
-}
-
-/* Subroutine of ix86_expand_builtin to take care of crc32 insns. */
-
-static rtx
-ix86_expand_crc32 (enum insn_code icode, tree exp, rtx target)
-{
- rtx pat;
- tree arg0 = TREE_VALUE (exp);
- tree arg1 = TREE_VALUE (TREE_CHAIN (exp));
- rtx op0 = expand_normal (arg0);
- rtx op1 = expand_normal (arg1);
- enum machine_mode tmode = insn_data[icode].operand[0].mode;
- enum machine_mode mode0 = insn_data[icode].operand[1].mode;
- enum machine_mode mode1 = insn_data[icode].operand[2].mode;
-
- if (optimize
- || !target
- || GET_MODE (target) != tmode
- || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
- target = gen_reg_rtx (tmode);
-
- if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
- op0 = copy_to_mode_reg (mode0, op0);
- if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
- {
- op1 = copy_to_reg (op1);
- op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
- }
-
- pat = GEN_FCN (icode) (target, op0, op1);
- if (! pat)
- return 0;
- emit_insn (pat);
- return target;
-}
-/* APPLE LOCAL end 5612787 mainline sse4 */
-
-/* Subroutine of ix86_expand_builtin to take care of binop insns. */
-
-static rtx
-ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
-{
- rtx pat, xops[3];
- tree arg0 = TREE_VALUE (arglist);
- tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
- rtx op0 = expand_normal (arg0);
- rtx op1 = expand_normal (arg1);
- enum machine_mode tmode = insn_data[icode].operand[0].mode;
- enum machine_mode mode0 = insn_data[icode].operand[1].mode;
- enum machine_mode mode1 = insn_data[icode].operand[2].mode;
-
- if (VECTOR_MODE_P (mode0))
- op0 = safe_vector_operand (op0, mode0);
- if (VECTOR_MODE_P (mode1))
- op1 = safe_vector_operand (op1, mode1);
-
- if (optimize || !target
- || GET_MODE (target) != tmode
- || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
- target = gen_reg_rtx (tmode);
-
- if (GET_MODE (op1) == SImode && mode1 == TImode)
- {
- rtx x = gen_reg_rtx (V4SImode);
- emit_insn (gen_sse2_loadd (x, op1));
- op1 = gen_lowpart (TImode, x);
- }
-
- /* The insn must want input operands in the same modes as the
- result. */
- gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
- && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
-
- if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
- op0 = copy_to_mode_reg (mode0, op0);
- if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
- op1 = copy_to_mode_reg (mode1, op1);
-
- /* ??? Using ix86_fixup_binary_operands is problematic when
- we've got mismatched modes. Fake it. */
-
- xops[0] = target;
- xops[1] = op0;
- xops[2] = op1;
-
- if (tmode == mode0 && tmode == mode1)
- {
- target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
- op0 = xops[1];
- op1 = xops[2];
- }
- else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
- {
- op0 = force_reg (mode0, op0);
- op1 = force_reg (mode1, op1);
- target = gen_reg_rtx (tmode);
- }
-
- pat = GEN_FCN (icode) (target, op0, op1);
- if (! pat)
- return 0;
- emit_insn (pat);
- return target;
-}
-
-/* Subroutine of ix86_expand_builtin to take care of stores. */
-
-static rtx
-ix86_expand_store_builtin (enum insn_code icode, tree arglist)
-{
- rtx pat;
- tree arg0 = TREE_VALUE (arglist);
- tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
- rtx op0 = expand_normal (arg0);
- rtx op1 = expand_normal (arg1);
- enum machine_mode mode0 = insn_data[icode].operand[0].mode;
- enum machine_mode mode1 = insn_data[icode].operand[1].mode;
-
- if (VECTOR_MODE_P (mode1))
- op1 = safe_vector_operand (op1, mode1);
-
- op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
- op1 = copy_to_mode_reg (mode1, op1);
-
- pat = GEN_FCN (icode) (op0, op1);
- if (pat)
- emit_insn (pat);
- return 0;
-}
-
-/* Subroutine of ix86_expand_builtin to take care of unop insns. */
-
-static rtx
-ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
- rtx target, int do_load)
-{
- rtx pat;
- tree arg0 = TREE_VALUE (arglist);
- rtx op0 = expand_normal (arg0);
- enum machine_mode tmode = insn_data[icode].operand[0].mode;
- enum machine_mode mode0 = insn_data[icode].operand[1].mode;
-
- if (optimize || !target
- || GET_MODE (target) != tmode
- || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
- target = gen_reg_rtx (tmode);
- if (do_load)
- op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
- else
- {
- if (VECTOR_MODE_P (mode0))
- op0 = safe_vector_operand (op0, mode0);
-
- if ((optimize && !register_operand (op0, mode0))
- || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
- op0 = copy_to_mode_reg (mode0, op0);
- }
-
- /* APPLE LOCAL begin 5612787 mainline sse4 */
- switch (icode)
- {
- case CODE_FOR_sse4_1_roundpd:
- case CODE_FOR_sse4_1_roundps:
- {
- tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
- rtx op1 = expand_normal (arg1);
- enum machine_mode mode1 = insn_data[icode].operand[2].mode;
-
- if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
- {
- error ("the second argument must be a 4-bit immediate");
- return const0_rtx;
- }
- pat = GEN_FCN (icode) (target, op0, op1);
- }
- break;
- default:
- pat = GEN_FCN (icode) (target, op0);
- break;
- }
- /* APPLE LOCAL end 5612787 mainline sse4 */
-
- if (! pat)
- return 0;
- emit_insn (pat);
- return target;
-}
-
-/* Subroutine of ix86_expand_builtin to take care of three special unop insns:
- sqrtss, rsqrtss, rcpss. */
-
-static rtx
-ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
-{
- rtx pat;
- tree arg0 = TREE_VALUE (arglist);
- rtx op1, op0 = expand_normal (arg0);
- enum machine_mode tmode = insn_data[icode].operand[0].mode;
- enum machine_mode mode0 = insn_data[icode].operand[1].mode;
-
- if (optimize || !target
- || GET_MODE (target) != tmode
- || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
- target = gen_reg_rtx (tmode);
-
- if (VECTOR_MODE_P (mode0))
- op0 = safe_vector_operand (op0, mode0);
-
- if ((optimize && !register_operand (op0, mode0))
- || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
- op0 = copy_to_mode_reg (mode0, op0);
-
- op1 = op0;
- if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
- op1 = copy_to_mode_reg (mode0, op1);
-
- pat = GEN_FCN (icode) (target, op0, op1);
- if (! pat)
- return 0;
- emit_insn (pat);
- return target;
-}
-
-/* Subroutine of ix86_expand_builtin to take care of comparison insns. */
-
-static rtx
-ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
- rtx target)
-{
- rtx pat;
- tree arg0 = TREE_VALUE (arglist);
- tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
- rtx op0 = expand_normal (arg0);
- rtx op1 = expand_normal (arg1);
- rtx op2;
- enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
- enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
- enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
- enum rtx_code comparison = d->comparison;
-
- if (VECTOR_MODE_P (mode0))
- op0 = safe_vector_operand (op0, mode0);
- if (VECTOR_MODE_P (mode1))
- op1 = safe_vector_operand (op1, mode1);
-
- /* Swap operands if we have a comparison that isn't available in
- hardware. */
- if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
- {
- rtx tmp = gen_reg_rtx (mode1);
- emit_move_insn (tmp, op1);
- op1 = op0;
- op0 = tmp;
- }
-
- if (optimize || !target
- || GET_MODE (target) != tmode
- || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
- target = gen_reg_rtx (tmode);
-
- if ((optimize && !register_operand (op0, mode0))
- || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
- op0 = copy_to_mode_reg (mode0, op0);
- if ((optimize && !register_operand (op1, mode1))
- || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
- op1 = copy_to_mode_reg (mode1, op1);
-
- op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
- pat = GEN_FCN (d->icode) (target, op0, op1, op2);
- if (! pat)
- return 0;
- emit_insn (pat);
- return target;
-}
-
-/* Subroutine of ix86_expand_builtin to take care of comi insns. */
-
-static rtx
-ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
- rtx target)
-{
- rtx pat;
- tree arg0 = TREE_VALUE (arglist);
- tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
- rtx op0 = expand_normal (arg0);
- rtx op1 = expand_normal (arg1);
- rtx op2;
- enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
- enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
- enum rtx_code comparison = d->comparison;
-
- if (VECTOR_MODE_P (mode0))
- op0 = safe_vector_operand (op0, mode0);
- if (VECTOR_MODE_P (mode1))
- op1 = safe_vector_operand (op1, mode1);
-
- /* Swap operands if we have a comparison that isn't available in
- hardware. */
- if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
- {
- rtx tmp = op1;
- op1 = op0;
- op0 = tmp;
- }
-
- target = gen_reg_rtx (SImode);
- emit_move_insn (target, const0_rtx);
- target = gen_rtx_SUBREG (QImode, target, 0);
-
- if ((optimize && !register_operand (op0, mode0))
- || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
- op0 = copy_to_mode_reg (mode0, op0);
- if ((optimize && !register_operand (op1, mode1))
- || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
- op1 = copy_to_mode_reg (mode1, op1);
-
- op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
- pat = GEN_FCN (d->icode) (op0, op1);
- if (! pat)
- return 0;
- emit_insn (pat);
- emit_insn (gen_rtx_SET (VOIDmode,
- gen_rtx_STRICT_LOW_PART (VOIDmode, target),
- gen_rtx_fmt_ee (comparison, QImode,
- SET_DEST (pat),
- const0_rtx)));
-
- return SUBREG_REG (target);
-}
-
-/* APPLE LOCAL begin 4299257 */
-/* Subroutine of ix86_expand_builtin to take care of ucomi insns. */
-
-static rtx
-ix86_expand_sse_ucomi (const struct builtin_description *d, tree arglist,
- rtx target)
-{
- tree arg0 = TREE_VALUE (arglist);
- tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
- rtx op0 = expand_normal (arg0);
- rtx op1 = expand_normal (arg1);
- enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
- enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
- enum machine_mode scalar_mode;
- enum rtx_code comparison = d->comparison;
-
- if (VECTOR_MODE_P (mode0))
- op0 = safe_vector_operand (op0, mode0);
- if (VECTOR_MODE_P (mode1))
- op1 = safe_vector_operand (op1, mode1);
-
- /* Swap operands if we have a comparison that isn't available in
- hardware. */
- if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
- {
- rtx tmp = op1;
- op1 = op0;
- op0 = tmp;
- }
-
- target = gen_reg_rtx (SImode);
- emit_move_insn (target, const0_rtx);
- target = gen_rtx_SUBREG (QImode, target, 0);
-
- gcc_assert (mode0 == V4SFmode || mode0 == V2DFmode);
- gcc_assert (mode1 == V4SFmode || mode1 == V2DFmode);
-
- scalar_mode = (mode0 == V4SFmode) ? SFmode : DFmode;
- op0 = gen_rtx_SUBREG (scalar_mode, copy_to_mode_reg (mode0, op0), 0);
- op1 = gen_rtx_SUBREG (scalar_mode, copy_to_mode_reg (mode1, op1), 0);
-
- ix86_compare_op0 = op0;
- ix86_compare_op1 = op1;
- if (ix86_expand_setcc (comparison, target))
- return SUBREG_REG (target);
-
- return NULL_RTX;
-}
-/* APPLE LOCAL end 4299257 */
-
-/* APPLE LOCAL begin 5612787 mainline sse4 */
-/* Subroutine of ix86_expand_builtin to take care of ptest insns. */
-
-static rtx
-ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
- rtx target)
-{
- rtx pat;
- tree arg0 = TREE_VALUE (exp);
- tree arg1 = TREE_VALUE (TREE_CHAIN (exp));
- rtx op0 = expand_normal (arg0);
- rtx op1 = expand_normal (arg1);
- enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
- enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
- enum rtx_code comparison = d->comparison;
-
- if (VECTOR_MODE_P (mode0))
- op0 = safe_vector_operand (op0, mode0);
- if (VECTOR_MODE_P (mode1))
- op1 = safe_vector_operand (op1, mode1);
-
- target = gen_reg_rtx (SImode);
- emit_move_insn (target, const0_rtx);
- target = gen_rtx_SUBREG (QImode, target, 0);
-
- if ((optimize && !register_operand (op0, mode0))
- || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
- op0 = copy_to_mode_reg (mode0, op0);
- if ((optimize && !register_operand (op1, mode1))
- || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
- op1 = copy_to_mode_reg (mode1, op1);
-
- pat = GEN_FCN (d->icode) (op0, op1);
- if (! pat)
- return 0;
- emit_insn (pat);
- emit_insn (gen_rtx_SET (VOIDmode,
- gen_rtx_STRICT_LOW_PART (VOIDmode, target),
- gen_rtx_fmt_ee (comparison, QImode,
- SET_DEST (pat),
- const0_rtx)));
-
- return SUBREG_REG (target);
-}
-
-/* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
-
-static rtx
-ix86_expand_sse_pcmpestr (const struct builtin_description *d,
- tree exp, rtx target)
-{
- rtx pat;
- tree arg0 = TREE_VALUE (exp);
- tree arg1 = TREE_VALUE (TREE_CHAIN (exp));
- tree arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (exp)));
- tree arg3 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (exp))));
- tree arg4 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (exp)))));
- rtx scratch0, scratch1;
- rtx op0 = expand_normal (arg0);
- rtx op1 = expand_normal (arg1);
- rtx op2 = expand_normal (arg2);
- rtx op3 = expand_normal (arg3);
- rtx op4 = expand_normal (arg4);
- enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
-
- tmode0 = insn_data[d->icode].operand[0].mode;
- tmode1 = insn_data[d->icode].operand[1].mode;
- modev2 = insn_data[d->icode].operand[2].mode;
- modei3 = insn_data[d->icode].operand[3].mode;
- modev4 = insn_data[d->icode].operand[4].mode;
- modei5 = insn_data[d->icode].operand[5].mode;
- modeimm = insn_data[d->icode].operand[6].mode;
-
- if (VECTOR_MODE_P (modev2))
- op0 = safe_vector_operand (op0, modev2);
- if (VECTOR_MODE_P (modev4))
- op2 = safe_vector_operand (op2, modev4);
-
- if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
- op0 = copy_to_mode_reg (modev2, op0);
- if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3))
- op1 = copy_to_mode_reg (modei3, op1);
- if ((optimize && !register_operand (op2, modev4))
- || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4))
- op2 = copy_to_mode_reg (modev4, op2);
- if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5))
- op3 = copy_to_mode_reg (modei5, op3);
-
- if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm))
- {
- error ("the fifth argument must be a 8-bit immediate");
- return const0_rtx;
- }
-
- if (d->code == IX86_BUILTIN_PCMPESTRI128)
- {
- if (optimize || !target
- || GET_MODE (target) != tmode0
- || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
- target = gen_reg_rtx (tmode0);
-
- scratch1 = gen_reg_rtx (tmode1);
-
- pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
- }
- else if (d->code == IX86_BUILTIN_PCMPESTRM128)
- {
- if (optimize || !target
- || GET_MODE (target) != tmode1
- || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
- target = gen_reg_rtx (tmode1);
-
- scratch0 = gen_reg_rtx (tmode0);
-
- pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
- }
- else
- {
- gcc_assert (d->flag);
-
- scratch0 = gen_reg_rtx (tmode0);
- scratch1 = gen_reg_rtx (tmode1);
-
- pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
- }
-
- if (! pat)
- return 0;
-
- emit_insn (pat);
-
- if (d->flag)
- {
- target = gen_reg_rtx (SImode);
- emit_move_insn (target, const0_rtx);
- target = gen_rtx_SUBREG (QImode, target, 0);
-
- emit_insn
- (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
- gen_rtx_fmt_ee (EQ, QImode,
- gen_rtx_REG ((enum machine_mode) d->flag,
- FLAGS_REG),
- const0_rtx)));
- return SUBREG_REG (target);
- }
- else
- return target;
-}
-
-
-/* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
-
-static rtx
-ix86_expand_sse_pcmpistr (const struct builtin_description *d,
- tree exp, rtx target)
-{
- rtx pat;
- tree arg0 = TREE_VALUE (exp);
- tree arg1 = TREE_VALUE (TREE_CHAIN (exp));
- tree arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (exp)));
- rtx scratch0, scratch1;
- rtx op0 = expand_normal (arg0);
- rtx op1 = expand_normal (arg1);
- rtx op2 = expand_normal (arg2);
- enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
-
- tmode0 = insn_data[d->icode].operand[0].mode;
- tmode1 = insn_data[d->icode].operand[1].mode;
- modev2 = insn_data[d->icode].operand[2].mode;
- modev3 = insn_data[d->icode].operand[3].mode;
- modeimm = insn_data[d->icode].operand[4].mode;
-
- if (VECTOR_MODE_P (modev2))
- op0 = safe_vector_operand (op0, modev2);
- if (VECTOR_MODE_P (modev3))
- op1 = safe_vector_operand (op1, modev3);
-
- if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
- op0 = copy_to_mode_reg (modev2, op0);
- if ((optimize && !register_operand (op1, modev3))
- || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3))
- op1 = copy_to_mode_reg (modev3, op1);
-
- if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm))
- {
- error ("the third argument must be a 8-bit immediate");
- return const0_rtx;
- }
-
- if (d->code == IX86_BUILTIN_PCMPISTRI128)
- {
- if (optimize || !target
- || GET_MODE (target) != tmode0
- || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
- target = gen_reg_rtx (tmode0);
-
- scratch1 = gen_reg_rtx (tmode1);
-
- pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
- }
- else if (d->code == IX86_BUILTIN_PCMPISTRM128)
- {
- if (optimize || !target
- || GET_MODE (target) != tmode1
- || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
- target = gen_reg_rtx (tmode1);
-
- scratch0 = gen_reg_rtx (tmode0);
-
- pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
- }
- else
- {
- gcc_assert (d->flag);
-
- scratch0 = gen_reg_rtx (tmode0);
- scratch1 = gen_reg_rtx (tmode1);
-
- pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
- }
-
- if (! pat)
- return 0;
-
- emit_insn (pat);
-
- if (d->flag)
- {
- target = gen_reg_rtx (SImode);
- emit_move_insn (target, const0_rtx);
- target = gen_rtx_SUBREG (QImode, target, 0);
-
- emit_insn
- (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
- gen_rtx_fmt_ee (EQ, QImode,
- gen_rtx_REG ((enum machine_mode) d->flag,
- FLAGS_REG),
- const0_rtx)));
- return SUBREG_REG (target);
- }
- else
- return target;
-}
-/* APPLE LOCAL end 5612787 mainline sse4 */
-
-/* Return the integer constant in ARG. Constrain it to be in the range
- of the subparts of VEC_TYPE; issue an error if not. */
-
-static int
-get_element_number (tree vec_type, tree arg)
-{
- unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
-
- if (!host_integerp (arg, 1)
- || (elt = tree_low_cst (arg, 1), elt > max))
- {
- error ("selector must be an integer constant in the range 0..%wi", max);
- return 0;
- }
-
- return elt;
-}
-
-/* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
- ix86_expand_vector_init. We DO have language-level syntax for this, in
- the form of (type){ init-list }. Except that since we can't place emms
- instructions from inside the compiler, we can't allow the use of MMX
- registers unless the user explicitly asks for it. So we do *not* define
- vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
- we have builtins invoked by mmintrin.h that gives us license to emit
- these sorts of instructions. */
-
-static rtx
-ix86_expand_vec_init_builtin (tree type, tree arglist, rtx target)
-{
- enum machine_mode tmode = TYPE_MODE (type);
- enum machine_mode inner_mode = GET_MODE_INNER (tmode);
- int i, n_elt = GET_MODE_NUNITS (tmode);
- rtvec v = rtvec_alloc (n_elt);
-
- gcc_assert (VECTOR_MODE_P (tmode));
-
- for (i = 0; i < n_elt; ++i, arglist = TREE_CHAIN (arglist))
- {
- rtx x = expand_normal (TREE_VALUE (arglist));
- RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
- }
-
- gcc_assert (arglist == NULL);
-
- if (!target || !register_operand (target, tmode))
- target = gen_reg_rtx (tmode);
-
- ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
- return target;
-}
-
-/* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
- ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
- had a language-level syntax for referencing vector elements. */
-
-static rtx
-ix86_expand_vec_ext_builtin (tree arglist, rtx target)
-{
- enum machine_mode tmode, mode0;
- tree arg0, arg1;
- int elt;
- rtx op0;
-
- arg0 = TREE_VALUE (arglist);
- arg1 = TREE_VALUE (TREE_CHAIN (arglist));
-
- op0 = expand_normal (arg0);
- elt = get_element_number (TREE_TYPE (arg0), arg1);
-
- tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
- mode0 = TYPE_MODE (TREE_TYPE (arg0));
- gcc_assert (VECTOR_MODE_P (mode0));
-
- op0 = force_reg (mode0, op0);
-
- if (optimize || !target || !register_operand (target, tmode))
- target = gen_reg_rtx (tmode);
-
- ix86_expand_vector_extract (true, target, op0, elt);
-
- return target;
-}
-
-/* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
- ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
- a language-level syntax for referencing vector elements. */
-
-static rtx
-ix86_expand_vec_set_builtin (tree arglist)
-{
- enum machine_mode tmode, mode1;
- tree arg0, arg1, arg2;
- int elt;
- rtx op0, op1, target;
-
- arg0 = TREE_VALUE (arglist);
- arg1 = TREE_VALUE (TREE_CHAIN (arglist));
- arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
-
- tmode = TYPE_MODE (TREE_TYPE (arg0));
- mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
- gcc_assert (VECTOR_MODE_P (tmode));
-
- op0 = expand_expr (arg0, NULL_RTX, tmode, 0);
- op1 = expand_expr (arg1, NULL_RTX, mode1, 0);
- elt = get_element_number (TREE_TYPE (arg0), arg2);
-
- if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
- op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
-
- op0 = force_reg (tmode, op0);
- op1 = force_reg (mode1, op1);
-
- /* OP0 is the source of these builtin functions and shouldn't be
- modified. Create a copy, use it and return it as target. */
- target = gen_reg_rtx (tmode);
- emit_move_insn (target, op0);
- ix86_expand_vector_set (true, target, op1, elt);
-
- return target;
-}
-
-/* Expand an expression EXP that calls a built-in function,
- with result going to TARGET if that's convenient
- (and in mode MODE if that's convenient).
- SUBTARGET may be used as the target for computing one of EXP's operands.
- IGNORE is nonzero if the value is to be ignored. */
-
-static rtx
-ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
- enum machine_mode mode ATTRIBUTE_UNUSED,
- int ignore ATTRIBUTE_UNUSED)
-{
- const struct builtin_description *d;
- size_t i;
- enum insn_code icode;
- tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
- tree arglist = TREE_OPERAND (exp, 1);
- /* APPLE LOCAL begin 5612787 mainline sse4 */
- tree arg0, arg1, arg2, arg3;
- rtx op0, op1, op2, op3, pat;
- /* APPLE LOCAL ssse3 */
- enum machine_mode tmode, mode0, mode1, mode2, mode3, mode4;
- /* APPLE LOCAL end 5612787 mainline sse4 */
- unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
-
- switch (fcode)
- {
- case IX86_BUILTIN_EMMS:
- emit_insn (gen_mmx_emms ());
- return 0;
-
- case IX86_BUILTIN_SFENCE:
- emit_insn (gen_sse_sfence ());
- return 0;
-
- case IX86_BUILTIN_MASKMOVQ:
- case IX86_BUILTIN_MASKMOVDQU:
- icode = (fcode == IX86_BUILTIN_MASKMOVQ
- ? CODE_FOR_mmx_maskmovq
- : CODE_FOR_sse2_maskmovdqu);
- /* Note the arg order is different from the operand order. */
- arg1 = TREE_VALUE (arglist);
- arg2 = TREE_VALUE (TREE_CHAIN (arglist));
- arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
- op0 = expand_normal (arg0);
- op1 = expand_normal (arg1);
- op2 = expand_normal (arg2);
- mode0 = insn_data[icode].operand[0].mode;
- mode1 = insn_data[icode].operand[1].mode;
- mode2 = insn_data[icode].operand[2].mode;
-
- op0 = force_reg (Pmode, op0);
- op0 = gen_rtx_MEM (mode1, op0);
-
- if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
- op0 = copy_to_mode_reg (mode0, op0);
- if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
- op1 = copy_to_mode_reg (mode1, op1);
- if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
- op2 = copy_to_mode_reg (mode2, op2);
- pat = GEN_FCN (icode) (op0, op1, op2);
- if (! pat)
- return 0;
- emit_insn (pat);
- return 0;
-
- case IX86_BUILTIN_SQRTSS:
- return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, arglist, target);
- case IX86_BUILTIN_RSQRTSS:
- return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, arglist, target);
- case IX86_BUILTIN_RCPSS:
- return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, arglist, target);
-
- /* APPLE LOCAL begin 4099020 */
- case IX86_BUILTIN_LOADQ:
- return ix86_expand_unop_builtin (CODE_FOR_sse_loadqv4si, arglist, target, 1);
-
- case IX86_BUILTIN_MOVQ:
- return ix86_expand_unop_builtin (CODE_FOR_sse_movqv4si, arglist, target, 0);
-
- case IX86_BUILTIN_STOREQ:
- return ix86_expand_store_builtin (CODE_FOR_sse_storeqv4si, arglist);
- /* APPLE LOCAL end 4099020 */
-
- case IX86_BUILTIN_LOADUPS:
- return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
-
- case IX86_BUILTIN_STOREUPS:
- return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
-
- case IX86_BUILTIN_LOADHPS:
- case IX86_BUILTIN_LOADLPS:
- case IX86_BUILTIN_LOADHPD:
- case IX86_BUILTIN_LOADLPD:
- icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
- : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
- : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
- : CODE_FOR_sse2_loadlpd);
- arg0 = TREE_VALUE (arglist);
- arg1 = TREE_VALUE (TREE_CHAIN (arglist));
- op0 = expand_normal (arg0);
- op1 = expand_normal (arg1);
- tmode = insn_data[icode].operand[0].mode;
- mode0 = insn_data[icode].operand[1].mode;
- mode1 = insn_data[icode].operand[2].mode;
-
- op0 = force_reg (mode0, op0);
- op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
- if (optimize || target == 0
- || GET_MODE (target) != tmode
- || !register_operand (target, tmode))
- target = gen_reg_rtx (tmode);
- pat = GEN_FCN (icode) (target, op0, op1);
- if (! pat)
- return 0;
- emit_insn (pat);
- return target;
-
- case IX86_BUILTIN_STOREHPS:
- case IX86_BUILTIN_STORELPS:
- icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
- : CODE_FOR_sse_storelps);
- arg0 = TREE_VALUE (arglist);
- arg1 = TREE_VALUE (TREE_CHAIN (arglist));
- op0 = expand_normal (arg0);
- op1 = expand_normal (arg1);
- mode0 = insn_data[icode].operand[0].mode;
- mode1 = insn_data[icode].operand[1].mode;
-
- op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
- op1 = force_reg (mode1, op1);
-
- pat = GEN_FCN (icode) (op0, op1);
- if (! pat)
- return 0;
- emit_insn (pat);
- return const0_rtx;
-
- case IX86_BUILTIN_MOVNTPS:
- return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
- case IX86_BUILTIN_MOVNTQ:
- /* APPLE LOCAL 4656532 use V1DImode for _m64 */
- return ix86_expand_store_builtin (CODE_FOR_sse_movntv1di, arglist);
-
- case IX86_BUILTIN_LDMXCSR:
- op0 = expand_normal (TREE_VALUE (arglist));
- target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
- emit_move_insn (target, op0);
- emit_insn (gen_sse_ldmxcsr (target));
- return 0;
-
- case IX86_BUILTIN_STMXCSR:
- target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
- emit_insn (gen_sse_stmxcsr (target));
- return copy_to_mode_reg (SImode, target);
-
- case IX86_BUILTIN_SHUFPS:
- case IX86_BUILTIN_SHUFPD:
- icode = (fcode == IX86_BUILTIN_SHUFPS
- ? CODE_FOR_sse_shufps
- : CODE_FOR_sse2_shufpd);
- arg0 = TREE_VALUE (arglist);
- arg1 = TREE_VALUE (TREE_CHAIN (arglist));
- arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
- op0 = expand_normal (arg0);
- op1 = expand_normal (arg1);
- op2 = expand_normal (arg2);
- tmode = insn_data[icode].operand[0].mode;
- mode0 = insn_data[icode].operand[1].mode;
- mode1 = insn_data[icode].operand[2].mode;
- mode2 = insn_data[icode].operand[3].mode;
-
- if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
- op0 = copy_to_mode_reg (mode0, op0);
- if ((optimize && !register_operand (op1, mode1))
- || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
- op1 = copy_to_mode_reg (mode1, op1);
- if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
- {
- /* @@@ better error message */
- error ("mask must be an immediate");
- return gen_reg_rtx (tmode);
- }
- if (optimize || target == 0
- || GET_MODE (target) != tmode
- || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
- target = gen_reg_rtx (tmode);
- pat = GEN_FCN (icode) (target, op0, op1, op2);
- if (! pat)
- return 0;
- emit_insn (pat);
- return target;
-
- case IX86_BUILTIN_PSHUFW:
- case IX86_BUILTIN_PSHUFD:
- case IX86_BUILTIN_PSHUFHW:
- case IX86_BUILTIN_PSHUFLW:
- icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
- : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
- : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
- : CODE_FOR_mmx_pshufw);
- arg0 = TREE_VALUE (arglist);
- arg1 = TREE_VALUE (TREE_CHAIN (arglist));
- op0 = expand_normal (arg0);
- op1 = expand_normal (arg1);
- tmode = insn_data[icode].operand[0].mode;
- mode1 = insn_data[icode].operand[1].mode;
- mode2 = insn_data[icode].operand[2].mode;
-
- if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
- op0 = copy_to_mode_reg (mode1, op0);
- if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
- {
- /* @@@ better error message */
- error ("mask must be an immediate");
- return const0_rtx;
- }
- if (target == 0
- || GET_MODE (target) != tmode
- || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
- target = gen_reg_rtx (tmode);
- pat = GEN_FCN (icode) (target, op0, op1);
- if (! pat)
- return 0;
- emit_insn (pat);
- return target;
-
- case IX86_BUILTIN_PSLLWI128:
- icode = CODE_FOR_ashlv8hi3;
- goto do_pshifti;
- case IX86_BUILTIN_PSLLDI128:
- icode = CODE_FOR_ashlv4si3;
- goto do_pshifti;
- case IX86_BUILTIN_PSLLQI128:
- icode = CODE_FOR_ashlv2di3;
- goto do_pshifti;
- case IX86_BUILTIN_PSRAWI128:
- icode = CODE_FOR_ashrv8hi3;
- goto do_pshifti;
- case IX86_BUILTIN_PSRADI128:
- icode = CODE_FOR_ashrv4si3;
- goto do_pshifti;
- case IX86_BUILTIN_PSRLWI128:
- icode = CODE_FOR_lshrv8hi3;
- goto do_pshifti;
- case IX86_BUILTIN_PSRLDI128:
- icode = CODE_FOR_lshrv4si3;
- goto do_pshifti;
- case IX86_BUILTIN_PSRLQI128:
- icode = CODE_FOR_lshrv2di3;
- goto do_pshifti;
- do_pshifti:
- arg0 = TREE_VALUE (arglist);
- arg1 = TREE_VALUE (TREE_CHAIN (arglist));
- op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
- op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
-
- /* APPLE LOCAL begin radar 5543378 mainline candidate */
- if (GET_CODE (op1) == CONST_INT)
- {
- if (INTVAL (op1) < 0 || INTVAL (op1) > 255)
- op1 = GEN_INT (255);
- }
- else
- {
- mode2 = insn_data[icode].operand[2].mode;
- if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
- {
- op1 = copy_to_reg (op1);
- if (GET_MODE (op1) != mode2)
- op1 = convert_to_mode (mode2, op1, 0);
- }
- }
- /* APPLE LOCAL end radar 5543378 mainline candidate */
-
- tmode = insn_data[icode].operand[0].mode;
- mode1 = insn_data[icode].operand[1].mode;
- if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
- op0 = copy_to_reg (op0);
-
- target = gen_reg_rtx (tmode);
- pat = GEN_FCN (icode) (target, op0, op1);
- if (!pat)
- return 0;
- emit_insn (pat);
- return target;
-
- case IX86_BUILTIN_PSLLW128:
- icode = CODE_FOR_ashlv8hi3;
- goto do_pshift;
- case IX86_BUILTIN_PSLLD128:
- icode = CODE_FOR_ashlv4si3;
- goto do_pshift;
- case IX86_BUILTIN_PSLLQ128:
- icode = CODE_FOR_ashlv2di3;
- goto do_pshift;
- case IX86_BUILTIN_PSRAW128:
- icode = CODE_FOR_ashrv8hi3;
- goto do_pshift;
- case IX86_BUILTIN_PSRAD128:
- icode = CODE_FOR_ashrv4si3;
- goto do_pshift;
- case IX86_BUILTIN_PSRLW128:
- icode = CODE_FOR_lshrv8hi3;
- goto do_pshift;
- case IX86_BUILTIN_PSRLD128:
- icode = CODE_FOR_lshrv4si3;
- goto do_pshift;
- case IX86_BUILTIN_PSRLQ128:
- icode = CODE_FOR_lshrv2di3;
- goto do_pshift;
- do_pshift:
- arg0 = TREE_VALUE (arglist);
- arg1 = TREE_VALUE (TREE_CHAIN (arglist));
- op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
- op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
-
- tmode = insn_data[icode].operand[0].mode;
- mode1 = insn_data[icode].operand[1].mode;
-
- if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
- op0 = copy_to_reg (op0);
-
- op1 = simplify_gen_subreg (TImode, op1, GET_MODE (op1), 0);
- if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
- op1 = copy_to_reg (op1);
-
- target = gen_reg_rtx (tmode);
- pat = GEN_FCN (icode) (target, op0, op1);
- if (!pat)
- return 0;
- emit_insn (pat);
- return target;
-
- /* APPLE LOCAL begin 5919583 */
- case IX86_BUILTIN_PSLLDQI128:
- case IX86_BUILTIN_PSRLDQI128:
- case IX86_BUILTIN_PSLLDQI128_BYTESHIFT:
- case IX86_BUILTIN_PSRLDQI128_BYTESHIFT:
- icode = ((fcode == IX86_BUILTIN_PSLLDQI128
- || fcode == IX86_BUILTIN_PSLLDQI128_BYTESHIFT)
- ? CODE_FOR_sse2_ashlti3
- : CODE_FOR_sse2_lshrti3);
- /* APPLE LOCAL end 5919583 */
- arg0 = TREE_VALUE (arglist);
- arg1 = TREE_VALUE (TREE_CHAIN (arglist));
- op0 = expand_normal (arg0);
- op1 = expand_normal (arg1);
- tmode = insn_data[icode].operand[0].mode;
- mode1 = insn_data[icode].operand[1].mode;
- mode2 = insn_data[icode].operand[2].mode;
-
- /* APPLE LOCAL begin 591583 */
- if (! CONST_INT_P (op1))
- {
- error ("shift must be an immediate");
- return const0_rtx;
- }
- /* The _mm_srli_si128/_mm_slli_si128 primitives are defined with
- a byte-shift count; inside of GCC, we prefer to specify the
- width of a shift in bits. The original non-BYTESHIFT
- primitives were problematic due to the "*8" in their macro
- bodies; we have moved the "*8" here to resolve this. The
- original builtins are still supported because many developers
- rely upon them. */
- if (fcode == IX86_BUILTIN_PSLLDQI128_BYTESHIFT
- || fcode == IX86_BUILTIN_PSRLDQI128_BYTESHIFT)
- op1 = gen_rtx_CONST_INT (SImode, INTVAL (op1) * 8);
- /* APPLE LOCAL end 591583 */
- if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
- {
- op0 = copy_to_reg (op0);
- op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
- }
- if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
- {
- error ("shift must be an immediate");
- return const0_rtx;
- }
- target = gen_reg_rtx (V2DImode);
- pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0),
- op0, op1);
- if (! pat)
- return 0;
- emit_insn (pat);
- return target;
-
- case IX86_BUILTIN_FEMMS:
- emit_insn (gen_mmx_femms ());
- return NULL_RTX;
-
- case IX86_BUILTIN_PAVGUSB:
- return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, arglist, target);
-
- case IX86_BUILTIN_PF2ID:
- return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, arglist, target, 0);
-
- case IX86_BUILTIN_PFACC:
- return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, arglist, target);
-
- case IX86_BUILTIN_PFADD:
- return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, arglist, target);
-
- case IX86_BUILTIN_PFCMPEQ:
- return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, arglist, target);
-
- case IX86_BUILTIN_PFCMPGE:
- return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, arglist, target);
-
- case IX86_BUILTIN_PFCMPGT:
- return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, arglist, target);
-
- case IX86_BUILTIN_PFMAX:
- return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, arglist, target);
-
- case IX86_BUILTIN_PFMIN:
- return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, arglist, target);
-
- case IX86_BUILTIN_PFMUL:
- return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, arglist, target);
-
- case IX86_BUILTIN_PFRCP:
- return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, arglist, target, 0);
-
- case IX86_BUILTIN_PFRCPIT1:
- return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, arglist, target);
-
- case IX86_BUILTIN_PFRCPIT2:
- return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, arglist, target);
-
- case IX86_BUILTIN_PFRSQIT1:
- return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, arglist, target);
-
- case IX86_BUILTIN_PFRSQRT:
- return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, arglist, target, 0);
-
- case IX86_BUILTIN_PFSUB:
- return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, arglist, target);
-
- case IX86_BUILTIN_PFSUBR:
- return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, arglist, target);
-
- case IX86_BUILTIN_PI2FD:
- return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, arglist, target, 0);
-
- case IX86_BUILTIN_PMULHRW:
- return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, arglist, target);
-
- case IX86_BUILTIN_PF2IW:
- return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, arglist, target, 0);
-
- case IX86_BUILTIN_PFNACC:
- return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, arglist, target);
-
- case IX86_BUILTIN_PFPNACC:
- return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, arglist, target);
-
- case IX86_BUILTIN_PI2FW:
- return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, arglist, target, 0);
-
- case IX86_BUILTIN_PSWAPDSI:
- return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, arglist, target, 0);
-
- case IX86_BUILTIN_PSWAPDSF:
- return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, arglist, target, 0);
-
- case IX86_BUILTIN_SQRTSD:
- return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, arglist, target);
- case IX86_BUILTIN_LOADUPD:
- return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
- case IX86_BUILTIN_STOREUPD:
- return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
-
- case IX86_BUILTIN_MFENCE:
- emit_insn (gen_sse2_mfence ());
- return 0;
- case IX86_BUILTIN_LFENCE:
- emit_insn (gen_sse2_lfence ());
- return 0;
-
- case IX86_BUILTIN_CLFLUSH:
- arg0 = TREE_VALUE (arglist);
- op0 = expand_normal (arg0);
- icode = CODE_FOR_sse2_clflush;
- if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
- op0 = copy_to_mode_reg (Pmode, op0);
-
- emit_insn (gen_sse2_clflush (op0));
- return 0;
-
- case IX86_BUILTIN_MOVNTPD:
- return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
- case IX86_BUILTIN_MOVNTDQ:
- return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
- case IX86_BUILTIN_MOVNTI:
- return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
-
- case IX86_BUILTIN_LOADDQU:
- return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
- case IX86_BUILTIN_STOREDQU:
- return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
-
- case IX86_BUILTIN_MONITOR:
- arg0 = TREE_VALUE (arglist);
- arg1 = TREE_VALUE (TREE_CHAIN (arglist));
- arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
- op0 = expand_normal (arg0);
- op1 = expand_normal (arg1);
- op2 = expand_normal (arg2);
- if (!REG_P (op0))
- op0 = copy_to_mode_reg (Pmode, op0);
- if (!REG_P (op1))
- op1 = copy_to_mode_reg (SImode, op1);
- if (!REG_P (op2))
- op2 = copy_to_mode_reg (SImode, op2);
- if (!TARGET_64BIT)
- emit_insn (gen_sse3_monitor (op0, op1, op2));
- else
- emit_insn (gen_sse3_monitor64 (op0, op1, op2));
- return 0;
-
- case IX86_BUILTIN_MWAIT:
- arg0 = TREE_VALUE (arglist);
- arg1 = TREE_VALUE (TREE_CHAIN (arglist));
- op0 = expand_normal (arg0);
- op1 = expand_normal (arg1);
- if (!REG_P (op0))
- op0 = copy_to_mode_reg (SImode, op0);
- if (!REG_P (op1))
- op1 = copy_to_mode_reg (SImode, op1);
- emit_insn (gen_sse3_mwait (op0, op1));
- return 0;
-
- case IX86_BUILTIN_LDDQU:
- return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, arglist,
- target, 1);
- /* APPLE LOCAL begin mainline */
- case IX86_BUILTIN_PALIGNR:
- case IX86_BUILTIN_PALIGNR128:
- if (fcode == IX86_BUILTIN_PALIGNR)
- {
- /* APPLE LOCAL begin 4656532 use V1DImode for _m64 */
- icode = CODE_FOR_ssse3_palignrv1di;
- mode = V1DImode;
- /* APPLE LOCAL end 4656532 use V1DImode for _m64 */
- }
- else
- {
- icode = CODE_FOR_ssse3_palignrti;
- mode = V2DImode;
- }
- arg0 = TREE_VALUE (arglist);
- arg1 = TREE_VALUE (TREE_CHAIN (arglist));
- arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
- op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
- op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
- op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
- tmode = insn_data[icode].operand[0].mode;
- mode1 = insn_data[icode].operand[1].mode;
- mode2 = insn_data[icode].operand[2].mode;
- mode3 = insn_data[icode].operand[3].mode;
-
- if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
- {
- op0 = copy_to_reg (op0);
- op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
- }
- if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
- {
- op1 = copy_to_reg (op1);
- op1 = simplify_gen_subreg (mode2, op1, GET_MODE (op1), 0);
- }
- if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
- {
- error ("shift must be an immediate");
- return const0_rtx;
- }
- target = gen_reg_rtx (mode);
- pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, mode, 0),
- op0, op1, op2);
- if (! pat)
- return 0;
- emit_insn (pat);
- return target;
-
- /* APPLE LOCAL end mainline */
- /* APPLE LOCAL begin 5612787 mainline sse4 */
- case IX86_BUILTIN_MOVNTDQA:
- return ix86_expand_unop_builtin (CODE_FOR_sse4_1_movntdqa, arglist,
- target, 1);
-
- case IX86_BUILTIN_MOVNTSD:
- return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df, exp);
-
- case IX86_BUILTIN_MOVNTSS:
- return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv4sf, exp);
-
- case IX86_BUILTIN_INSERTQ:
- case IX86_BUILTIN_EXTRQ:
- icode = (fcode == IX86_BUILTIN_EXTRQ
- ? CODE_FOR_sse4a_extrq
- : CODE_FOR_sse4a_insertq);
- arg0 = TREE_VALUE (arglist);
- arg1 = TREE_VALUE (TREE_CHAIN (arglist));
- op0 = expand_normal (arg0);
- op1 = expand_normal (arg1);
- tmode = insn_data[icode].operand[0].mode;
- mode1 = insn_data[icode].operand[1].mode;
- mode2 = insn_data[icode].operand[2].mode;
- if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
- op0 = copy_to_mode_reg (mode1, op0);
- if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
- op1 = copy_to_mode_reg (mode2, op1);
- if (optimize || target == 0
- || GET_MODE (target) != tmode
- || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
- target = gen_reg_rtx (tmode);
- pat = GEN_FCN (icode) (target, op0, op1);
- if (! pat)
- return NULL_RTX;
- emit_insn (pat);
- return target;
-
- case IX86_BUILTIN_EXTRQI:
- icode = CODE_FOR_sse4a_extrqi;
- arg0 = TREE_VALUE (arglist);
- arg1 = TREE_VALUE (TREE_CHAIN (arglist));
- arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
- op0 = expand_normal (arg0);
- op1 = expand_normal (arg1);
- op2 = expand_normal (arg2);
- tmode = insn_data[icode].operand[0].mode;
- mode1 = insn_data[icode].operand[1].mode;
- mode2 = insn_data[icode].operand[2].mode;
- mode3 = insn_data[icode].operand[3].mode;
- if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
- op0 = copy_to_mode_reg (mode1, op0);
- if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
- {
- error ("index mask must be an immediate");
- return gen_reg_rtx (tmode);
- }
- if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
- {
- error ("length mask must be an immediate");
- return gen_reg_rtx (tmode);
- }
- if (optimize || target == 0
- || GET_MODE (target) != tmode
- || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
- target = gen_reg_rtx (tmode);
- pat = GEN_FCN (icode) (target, op0, op1, op2);
- if (! pat)
- return NULL_RTX;
- emit_insn (pat);
- return target;
-
- case IX86_BUILTIN_INSERTQI:
- icode = CODE_FOR_sse4a_insertqi;
- arg0 = TREE_VALUE (exp);
- arg1 = TREE_VALUE (TREE_CHAIN (arglist));
- arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
- arg3 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
- op0 = expand_normal (arg0);
- op1 = expand_normal (arg1);
- op2 = expand_normal (arg2);
- op3 = expand_normal (arg3);
- tmode = insn_data[icode].operand[0].mode;
- mode1 = insn_data[icode].operand[1].mode;
- mode2 = insn_data[icode].operand[2].mode;
- mode3 = insn_data[icode].operand[3].mode;
- mode4 = insn_data[icode].operand[4].mode;
-
- if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
- op0 = copy_to_mode_reg (mode1, op0);
-
- if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
- op1 = copy_to_mode_reg (mode2, op1);
-
- if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
- {
- error ("index mask must be an immediate");
- return gen_reg_rtx (tmode);
- }
- if (! (*insn_data[icode].operand[4].predicate) (op3, mode4))
- {
- error ("length mask must be an immediate");
- return gen_reg_rtx (tmode);
- }
- if (optimize || target == 0
- || GET_MODE (target) != tmode
- || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
- target = gen_reg_rtx (tmode);
- pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
- if (! pat)
- return NULL_RTX;
- emit_insn (pat);
- return target;
- /* APPLE LOCAL end 5612787 mainline sse4 */
-
- case IX86_BUILTIN_VEC_INIT_V2SI:
- case IX86_BUILTIN_VEC_INIT_V4HI:
- case IX86_BUILTIN_VEC_INIT_V8QI:
- return ix86_expand_vec_init_builtin (TREE_TYPE (exp), arglist, target);
-
- case IX86_BUILTIN_VEC_EXT_V2DF:
- case IX86_BUILTIN_VEC_EXT_V2DI:
- case IX86_BUILTIN_VEC_EXT_V4SF:
- case IX86_BUILTIN_VEC_EXT_V4SI:
- case IX86_BUILTIN_VEC_EXT_V8HI:
- case IX86_BUILTIN_VEC_EXT_V16QI:
- case IX86_BUILTIN_VEC_EXT_V2SI:
- case IX86_BUILTIN_VEC_EXT_V4HI:
- return ix86_expand_vec_ext_builtin (arglist, target);
-
- /* APPLE LOCAL begin 5612787 mainline sse4 */
- case IX86_BUILTIN_VEC_SET_V2DI:
- case IX86_BUILTIN_VEC_SET_V4SF:
- case IX86_BUILTIN_VEC_SET_V4SI:
- /* APPLE LOCAL end 5612787 mainline sse4 */
- case IX86_BUILTIN_VEC_SET_V8HI:
- case IX86_BUILTIN_VEC_SET_V4HI:
- /* APPLE LOCAL 5612787 mainline sse4 */
- case IX86_BUILTIN_VEC_SET_V16QI:
- return ix86_expand_vec_set_builtin (arglist);
-
- /* APPLE LOCAL begin 5612787 mainline sse4 */
- case IX86_BUILTIN_INFQ:
- {
- REAL_VALUE_TYPE inf;
- rtx tmp;
-
- real_inf (&inf);
- tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
-
- tmp = validize_mem (force_const_mem (mode, tmp));
-
- if (target == 0)
- target = gen_reg_rtx (mode);
-
- emit_move_insn (target, tmp);
- return target;
- }
-
- case IX86_BUILTIN_FABSQ:
- return ix86_expand_unop_builtin (CODE_FOR_abstf2, arglist, target, 0);
- /* APPLE LOCAL end 5612787 mainline sse4 */
-
- default:
- break;
- }
-
- /* APPLE LOCAL begin 5612787 mainline sse4 */
- for (i = 0, d = bdesc_sse_3arg;
- i < ARRAY_SIZE (bdesc_sse_3arg);
- i++, d++)
- if (d->code == fcode)
- return ix86_expand_sse_4_operands_builtin (d->icode,
- arglist,
- target);
- /* APPLE LOCAL end 5612787 mainline sse4 */
-
- for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
- if (d->code == fcode)
- {
- /* Compares are treated specially. */
- if (d->icode == CODE_FOR_sse_maskcmpv4sf3
- || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
- || d->icode == CODE_FOR_sse2_maskcmpv2df3
- || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
- return ix86_expand_sse_compare (d, arglist, target);
-
- return ix86_expand_binop_builtin (d->icode, arglist, target);
- }
-
- for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
- if (d->code == fcode)
- return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
-
- for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
- if (d->code == fcode)
- return ix86_expand_sse_comi (d, arglist, target);
-
- /* APPLE LOCAL begin 4299257 */
- for (i = 0, d = bdesc_ucomi; i < ARRAY_SIZE (bdesc_ucomi); i++, d++)
- if (d->code == fcode)
- return ix86_expand_sse_ucomi (d, arglist, target);
- /* APPLE LOCAL end 4299257 */
-
- /* APPLE LOCAL begin 5612787 mainline sse4 */
- for (i = 0, d = bdesc_ptest; i < ARRAY_SIZE (bdesc_ptest); i++, d++)
- if (d->code == fcode)
- return ix86_expand_sse_ptest (d, arglist, target);
-
- for (i = 0, d = bdesc_crc32; i < ARRAY_SIZE (bdesc_crc32); i++, d++)
- if (d->code == fcode)
- return ix86_expand_crc32 (d->icode, arglist, target);
-
- for (i = 0, d = bdesc_pcmpestr;
- i < ARRAY_SIZE (bdesc_pcmpestr);
- i++, d++)
- if (d->code == fcode)
- return ix86_expand_sse_pcmpestr (d, arglist, target);
-
- for (i = 0, d = bdesc_pcmpistr;
- i < ARRAY_SIZE (bdesc_pcmpistr);
- i++, d++)
- if (d->code == fcode)
- return ix86_expand_sse_pcmpistr (d, arglist, target);
- /* APPLE LOCAL end 5612787 mainline sse4 */
-
- gcc_unreachable ();
-}
-
-/* Store OPERAND to the memory after reload is completed. This means
- that we can't easily use assign_stack_local. */
-rtx
-ix86_force_to_memory (enum machine_mode mode, rtx operand)
-{
- rtx result;
-
- gcc_assert (reload_completed);
- if (TARGET_RED_ZONE)
- {
- result = gen_rtx_MEM (mode,
- gen_rtx_PLUS (Pmode,
- stack_pointer_rtx,
- GEN_INT (-RED_ZONE_SIZE)));
- emit_move_insn (result, operand);
- }
- else if (!TARGET_RED_ZONE && TARGET_64BIT)
- {
- switch (mode)
- {
- case HImode:
- case SImode:
- operand = gen_lowpart (DImode, operand);
- /* FALLTHRU */
- case DImode:
- emit_insn (
- gen_rtx_SET (VOIDmode,
- gen_rtx_MEM (DImode,
- gen_rtx_PRE_DEC (DImode,
- stack_pointer_rtx)),
- operand));
- break;
- default:
- gcc_unreachable ();
- }
- result = gen_rtx_MEM (mode, stack_pointer_rtx);
- }
- else
- {
- switch (mode)
- {
- case DImode:
- {
- rtx operands[2];
- split_di (&operand, 1, operands, operands + 1);
- emit_insn (
- gen_rtx_SET (VOIDmode,
- gen_rtx_MEM (SImode,
- gen_rtx_PRE_DEC (Pmode,
- stack_pointer_rtx)),
- operands[1]));
- emit_insn (
- gen_rtx_SET (VOIDmode,
- gen_rtx_MEM (SImode,
- gen_rtx_PRE_DEC (Pmode,
- stack_pointer_rtx)),
- operands[0]));
- }
- break;
- case HImode:
- /* Store HImodes as SImodes. */
- operand = gen_lowpart (SImode, operand);
- /* FALLTHRU */
- case SImode:
- emit_insn (
- gen_rtx_SET (VOIDmode,
- gen_rtx_MEM (GET_MODE (operand),
- gen_rtx_PRE_DEC (SImode,
- stack_pointer_rtx)),
- operand));
- break;
- default:
- gcc_unreachable ();
- }
- result = gen_rtx_MEM (mode, stack_pointer_rtx);
- }
- return result;
-}
-
-/* Free operand from the memory. */
-void
-ix86_free_from_memory (enum machine_mode mode)
-{
- if (!TARGET_RED_ZONE)
- {
- int size;
-
- if (mode == DImode || TARGET_64BIT)
- size = 8;
- else
- size = 4;
- /* Use LEA to deallocate stack space. In peephole2 it will be converted
- to pop or add instruction if registers are available. */
- emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
- gen_rtx_PLUS (Pmode, stack_pointer_rtx,
- GEN_INT (size))));
- }
-}
-
-/* Put float CONST_DOUBLE in the constant pool instead of fp regs.
- QImode must go into class Q_REGS.
- Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
- movdf to do mem-to-mem moves through integer regs. */
-enum reg_class
-ix86_preferred_reload_class (rtx x, enum reg_class class)
-{
- enum machine_mode mode = GET_MODE (x);
-
- /* We're only allowed to return a subclass of CLASS. Many of the
- following checks fail for NO_REGS, so eliminate that early. */
- if (class == NO_REGS)
- return NO_REGS;
-
- /* All classes can load zeros. */
- if (x == CONST0_RTX (mode))
- return class;
-
- /* Force constants into memory if we are loading a (nonzero) constant into
- an MMX or SSE register. This is because there are no MMX/SSE instructions
- to load from a constant. */
- if (CONSTANT_P (x)
- && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)))
- return NO_REGS;
-
- /* APPLE LOCAL begin */
- /* MERGE FIXME - ensure that 3501055 is fixed. */
- /* MERGE FIXME - ensure that 4206991 is fixed. */
- /* APPLE LOCAL end */
- /* Prefer SSE regs only, if we can use them for math. */
- if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
- return SSE_CLASS_P (class) ? class : NO_REGS;
-
- /* Floating-point constants need more complex checks. */
- if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
- {
- /* General regs can load everything. */
- if (reg_class_subset_p (class, GENERAL_REGS))
- return class;
-
- /* Floats can load 0 and 1 plus some others. Note that we eliminated
- zero above. We only want to wind up preferring 80387 registers if
- we plan on doing computation with them. */
- if (TARGET_80387
- && standard_80387_constant_p (x))
- {
- /* Limit class to non-sse. */
- if (class == FLOAT_SSE_REGS)
- return FLOAT_REGS;
- if (class == FP_TOP_SSE_REGS)
- return FP_TOP_REG;
- if (class == FP_SECOND_SSE_REGS)
- return FP_SECOND_REG;
- if (class == FLOAT_INT_REGS || class == FLOAT_REGS)
- return class;
- }
-
- return NO_REGS;
- }
-
- /* Generally when we see PLUS here, it's the function invariant
- (plus soft-fp const_int). Which can only be computed into general
- regs. */
- if (GET_CODE (x) == PLUS)
- return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS;
-
- /* QImode constants are easy to load, but non-constant QImode data
- must go into Q_REGS. */
- if (GET_MODE (x) == QImode && !CONSTANT_P (x))
- {
- if (reg_class_subset_p (class, Q_REGS))
- return class;
- if (reg_class_subset_p (Q_REGS, class))
- return Q_REGS;
- return NO_REGS;
- }
-
- return class;
-}
-
-/* Discourage putting floating-point values in SSE registers unless
- SSE math is being used, and likewise for the 387 registers. */
-enum reg_class
-ix86_preferred_output_reload_class (rtx x, enum reg_class class)
-{
- enum machine_mode mode = GET_MODE (x);
-
- /* Restrict the output reload class to the register bank that we are doing
- math on. If we would like not to return a subset of CLASS, reject this
- alternative: if reload cannot do this, it will still use its choice. */
- mode = GET_MODE (x);
- if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
- return MAYBE_SSE_CLASS_P (class) ? SSE_REGS : NO_REGS;
-
- if (TARGET_80387 && SCALAR_FLOAT_MODE_P (mode))
- {
- if (class == FP_TOP_SSE_REGS)
- return FP_TOP_REG;
- else if (class == FP_SECOND_SSE_REGS)
- return FP_SECOND_REG;
- else
- return FLOAT_CLASS_P (class) ? class : NO_REGS;
- }
-
- return class;
-}
-
-/* If we are copying between general and FP registers, we need a memory
- location. The same is true for SSE and MMX registers.
-
- The macro can't work reliably when one of the CLASSES is class containing
- registers from multiple units (SSE, MMX, integer). We avoid this by never
- combining those units in single alternative in the machine description.
- Ensure that this constraint holds to avoid unexpected surprises.
-
- When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
- enforce these sanity checks. */
-
-int
-ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
- enum machine_mode mode, int strict)
-{
- if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
- || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
- || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
- || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
- || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
- || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
- {
- gcc_assert (!strict);
- return true;
- }
-
- if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
- return true;
-
- /* ??? This is a lie. We do have moves between mmx/general, and for
- mmx/sse2. But by saying we need secondary memory we discourage the
- register allocator from using the mmx registers unless needed. */
- if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
- return true;
-
- if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
- {
- /* SSE1 doesn't have any direct moves from other classes. */
- if (!TARGET_SSE2)
- return true;
-
- /* If the target says that inter-unit moves are more expensive
- than moving through memory, then don't generate them. */
- if (!TARGET_INTER_UNIT_MOVES && !optimize_size)
- return true;
-
- /* Between SSE and general, we have moves no larger than word size. */
- if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
- return true;
-
- /* ??? For the cost of one register reformat penalty, we could use
- the same instructions to move SFmode and DFmode data, but the
- relevant move patterns don't support those alternatives. */
- if (mode == SFmode || mode == DFmode)
- return true;
- }
-
- return false;
-}
-
-/* Return true if the registers in CLASS cannot represent the change from
- modes FROM to TO. */
-
-bool
-ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
- enum reg_class class)
-{
- if (from == to)
- return false;
-
- /* x87 registers can't do subreg at all, as all values are reformatted
- to extended precision. */
- if (MAYBE_FLOAT_CLASS_P (class))
- return true;
-
- if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
- {
- /* Vector registers do not support QI or HImode loads. If we don't
- disallow a change to these modes, reload will assume it's ok to
- drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
- the vec_dupv4hi pattern. */
- if (GET_MODE_SIZE (from) < 4)
- return true;
-
- /* Vector registers do not support subreg with nonzero offsets, which
- are otherwise valid for integer registers. Since we can't see
- whether we have a nonzero offset from here, prohibit all
- nonparadoxical subregs changing size. */
- if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
- return true;
- }
-
- return false;
-}
-
-/* Return the cost of moving data from a register in class CLASS1 to
- one in class CLASS2.
-
- It is not required that the cost always equal 2 when FROM is the same as TO;
- on some machines it is expensive to move between registers if they are not
- general registers. */
-
-int
-ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
- enum reg_class class2)
-{
- /* In case we require secondary memory, compute cost of the store followed
- by load. In order to avoid bad register allocation choices, we need
- for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
-
- if (ix86_secondary_memory_needed (class1, class2, mode, 0))
- {
- int cost = 1;
-
- cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
- MEMORY_MOVE_COST (mode, class1, 1));
- cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
- MEMORY_MOVE_COST (mode, class2, 1));
-
- /* In case of copying from general_purpose_register we may emit multiple
- stores followed by single load causing memory size mismatch stall.
- Count this as arbitrarily high cost of 20. */
- if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
- cost += 20;
-
- /* In the case of FP/MMX moves, the registers actually overlap, and we
- have to switch modes in order to treat them differently. */
- if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
- || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
- cost += 20;
-
- return cost;
- }
-
- /* Moves between SSE/MMX and integer unit are expensive. */
- if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
- || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
- return ix86_cost->mmxsse_to_integer;
- if (MAYBE_FLOAT_CLASS_P (class1))
- return ix86_cost->fp_move;
- if (MAYBE_SSE_CLASS_P (class1))
- return ix86_cost->sse_move;
- if (MAYBE_MMX_CLASS_P (class1))
- return ix86_cost->mmx_move;
- return 2;
-}
-
-/* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
-
-bool
-ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
-{
- /* Flags and only flags can only hold CCmode values. */
- if (CC_REGNO_P (regno))
- return GET_MODE_CLASS (mode) == MODE_CC;
- if (GET_MODE_CLASS (mode) == MODE_CC
- || GET_MODE_CLASS (mode) == MODE_RANDOM
- || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
- return 0;
- if (FP_REGNO_P (regno))
- return VALID_FP_MODE_P (mode);
- if (SSE_REGNO_P (regno))
- {
- /* We implement the move patterns for all vector modes into and
- out of SSE registers, even when no operation instructions
- are available. */
- return (VALID_SSE_REG_MODE (mode)
- || VALID_SSE2_REG_MODE (mode)
- || VALID_MMX_REG_MODE (mode)
- || VALID_MMX_REG_MODE_3DNOW (mode));
- }
- if (MMX_REGNO_P (regno))
- {
- /* We implement the move patterns for 3DNOW modes even in MMX mode,
- so if the register is available at all, then we can move data of
- the given mode into or out of it. */
- return (VALID_MMX_REG_MODE (mode)
- || VALID_MMX_REG_MODE_3DNOW (mode));
- }
-
- if (mode == QImode)
- {
- /* Take care for QImode values - they can be in non-QI regs,
- but then they do cause partial register stalls. */
- if (regno < 4 || TARGET_64BIT)
- return 1;
- if (!TARGET_PARTIAL_REG_STALL)
- return 1;
- return reload_in_progress || reload_completed;
- }
- /* We handle both integer and floats in the general purpose registers. */
- else if (VALID_INT_MODE_P (mode))
- return 1;
- else if (VALID_FP_MODE_P (mode))
- return 1;
- /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
- on to use that value in smaller contexts, this can easily force a
- pseudo to be allocated to GENERAL_REGS. Since this is no worse than
- supporting DImode, allow it. */
- else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
- return 1;
-
- return 0;
-}
-
-/* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
- tieable integer mode. */
-
-static bool
-ix86_tieable_integer_mode_p (enum machine_mode mode)
-{
- switch (mode)
- {
- case HImode:
- case SImode:
- return true;
-
- case QImode:
- return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
-
- case DImode:
- /* APPLE LOCAL 5695218 convert int to logical bool */
- return !!TARGET_64BIT;
-
- default:
- return false;
- }
-}
-
-/* Return true if MODE1 is accessible in a register that can hold MODE2
- without copying. That is, all register classes that can hold MODE2
- can also hold MODE1. */
-
-bool
-ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
-{
- if (mode1 == mode2)
- return true;
-
- if (ix86_tieable_integer_mode_p (mode1)
- && ix86_tieable_integer_mode_p (mode2))
- return true;
-
- /* MODE2 being XFmode implies fp stack or general regs, which means we
- can tie any smaller floating point modes to it. Note that we do not
- tie this with TFmode. */
- if (mode2 == XFmode)
- return mode1 == SFmode || mode1 == DFmode;
-
- /* MODE2 being DFmode implies fp stack, general or sse regs, which means
- that we can tie it with SFmode. */
- if (mode2 == DFmode)
- return mode1 == SFmode;
-
- /* If MODE2 is only appropriate for an SSE register, then tie with
- any other mode acceptable to SSE registers. */
- if (GET_MODE_SIZE (mode2) >= 8
- && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
- return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1);
-
- /* If MODE2 is appropriate for an MMX (or SSE) register, then tie
- with any other mode acceptable to MMX registers. */
- if (GET_MODE_SIZE (mode2) == 8
- && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
- return ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1);
-
- return false;
-}
-
-/* Return the cost of moving data of mode M between a
- register and memory. A value of 2 is the default; this cost is
- relative to those in `REGISTER_MOVE_COST'.
-
- If moving between registers and memory is more expensive than
- between two registers, you should define this macro to express the
- relative cost.
-
- Model also increased moving costs of QImode registers in non
- Q_REGS classes.
- */
-int
-ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
-{
- if (FLOAT_CLASS_P (class))
- {
- int index;
- switch (mode)
- {
- case SFmode:
- index = 0;
- break;
- case DFmode:
- index = 1;
- break;
- case XFmode:
- index = 2;
- break;
- default:
- return 100;
- }
- return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
- }
- if (SSE_CLASS_P (class))
- {
- int index;
- switch (GET_MODE_SIZE (mode))
- {
- case 4:
- index = 0;
- break;
- case 8:
- index = 1;
- break;
- case 16:
- index = 2;
- break;
- default:
- return 100;
- }
- return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
- }
- if (MMX_CLASS_P (class))
- {
- int index;
- switch (GET_MODE_SIZE (mode))
- {
- case 4:
- index = 0;
- break;
- case 8:
- index = 1;
- break;
- default:
- return 100;
- }
- return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
- }
- switch (GET_MODE_SIZE (mode))
- {
- case 1:
- if (in)
- return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
- : ix86_cost->movzbl_load);
- else
- return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
- : ix86_cost->int_store[0] + 4);
- break;
- case 2:
- return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
- default:
- /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
- if (mode == TFmode)
- mode = XFmode;
- return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
- * (((int) GET_MODE_SIZE (mode)
- + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
- }
-}
-
-/* Compute a (partial) cost for rtx X. Return true if the complete
- cost has been computed, and false if subexpressions should be
- scanned. In either case, *TOTAL contains the cost result. */
-
-static bool
-ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
-{
- enum machine_mode mode = GET_MODE (x);
-
- switch (code)
- {
- case CONST_INT:
- case CONST:
- case LABEL_REF:
- case SYMBOL_REF:
- if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
- *total = 3;
- else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
- *total = 2;
- else if (flag_pic && SYMBOLIC_CONST (x)
- && (!TARGET_64BIT
- || (!GET_CODE (x) != LABEL_REF
- && (GET_CODE (x) != SYMBOL_REF
- || !SYMBOL_REF_LOCAL_P (x)))))
- *total = 1;
- else
- *total = 0;
- return true;
-
- case CONST_DOUBLE:
- if (mode == VOIDmode)
- *total = 0;
- else
- switch (standard_80387_constant_p (x))
- {
- case 1: /* 0.0 */
- *total = 1;
- break;
- default: /* Other constants */
- *total = 2;
- break;
- case 0:
- case -1:
- /* Start with (MEM (SYMBOL_REF)), since that's where
- it'll probably end up. Add a penalty for size. */
- *total = (COSTS_N_INSNS (1)
- + (flag_pic != 0 && !TARGET_64BIT)
- + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
- break;
- }
- return true;
-
- case ZERO_EXTEND:
- /* The zero extensions is often completely free on x86_64, so make
- it as cheap as possible. */
- if (TARGET_64BIT && mode == DImode
- && GET_MODE (XEXP (x, 0)) == SImode)
- *total = 1;
- else if (TARGET_ZERO_EXTEND_WITH_AND)
- *total = ix86_cost->add;
- else
- *total = ix86_cost->movzx;
- return false;
-
- case SIGN_EXTEND:
- *total = ix86_cost->movsx;
- return false;
-
- case ASHIFT:
- if (GET_CODE (XEXP (x, 1)) == CONST_INT
- && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
- {
- HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
- if (value == 1)
- {
- *total = ix86_cost->add;
- return false;
- }
- if ((value == 2 || value == 3)
- && ix86_cost->lea <= ix86_cost->shift_const)
- {
- *total = ix86_cost->lea;
- return false;
- }
- }
- /* FALLTHRU */
-
- case ROTATE:
- case ASHIFTRT:
- case LSHIFTRT:
- case ROTATERT:
- if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
- {
- if (GET_CODE (XEXP (x, 1)) == CONST_INT)
- {
- if (INTVAL (XEXP (x, 1)) > 32)
- *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
- else
- *total = ix86_cost->shift_const * 2;
- }
- else
- {
- if (GET_CODE (XEXP (x, 1)) == AND)
- *total = ix86_cost->shift_var * 2;
- else
- *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
- }
- }
- else
- {
- if (GET_CODE (XEXP (x, 1)) == CONST_INT)
- *total = ix86_cost->shift_const;
- else
- *total = ix86_cost->shift_var;
- }
- return false;
-
- case MULT:
- if (FLOAT_MODE_P (mode))
- {
- *total = ix86_cost->fmul;
- return false;
- }
- else
- {
- rtx op0 = XEXP (x, 0);
- rtx op1 = XEXP (x, 1);
- int nbits;
- if (GET_CODE (XEXP (x, 1)) == CONST_INT)
- {
- unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
- for (nbits = 0; value != 0; value &= value - 1)
- nbits++;
- }
- else
- /* This is arbitrary. */
- nbits = 7;
-
- /* Compute costs correctly for widening multiplication. */
- if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
- && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
- == GET_MODE_SIZE (mode))
- {
- int is_mulwiden = 0;
- enum machine_mode inner_mode = GET_MODE (op0);
-
- if (GET_CODE (op0) == GET_CODE (op1))
- is_mulwiden = 1, op1 = XEXP (op1, 0);
- else if (GET_CODE (op1) == CONST_INT)
- {
- if (GET_CODE (op0) == SIGN_EXTEND)
- is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
- == INTVAL (op1);
- else
- is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
- }
-
- if (is_mulwiden)
- op0 = XEXP (op0, 0), mode = GET_MODE (op0);
- }
-
- *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
- + nbits * ix86_cost->mult_bit
- + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
-
- return true;
- }
-
- case DIV:
- case UDIV:
- case MOD:
- case UMOD:
- if (FLOAT_MODE_P (mode))
- *total = ix86_cost->fdiv;
- else
- *total = ix86_cost->divide[MODE_INDEX (mode)];
- return false;
-
- case PLUS:
- if (FLOAT_MODE_P (mode))
- *total = ix86_cost->fadd;
- else if (GET_MODE_CLASS (mode) == MODE_INT
- && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
- {
- if (GET_CODE (XEXP (x, 0)) == PLUS
- && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
- && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
- && CONSTANT_P (XEXP (x, 1)))
- {
- HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
- if (val == 2 || val == 4 || val == 8)
- {
- *total = ix86_cost->lea;
- *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
- *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
- outer_code);
- *total += rtx_cost (XEXP (x, 1), outer_code);
- return true;
- }
- }
- else if (GET_CODE (XEXP (x, 0)) == MULT
- && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
- {
- HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
- if (val == 2 || val == 4 || val == 8)
- {
- *total = ix86_cost->lea;
- *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
- *total += rtx_cost (XEXP (x, 1), outer_code);
- return true;
- }
- }
- else if (GET_CODE (XEXP (x, 0)) == PLUS)
- {
- *total = ix86_cost->lea;
- *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
- *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
- *total += rtx_cost (XEXP (x, 1), outer_code);
- return true;
- }
- }
- /* FALLTHRU */
-
- case MINUS:
- if (FLOAT_MODE_P (mode))
- {
- *total = ix86_cost->fadd;
- return false;
- }
- /* FALLTHRU */
-
- case AND:
- case IOR:
- case XOR:
- if (!TARGET_64BIT && mode == DImode)
- {
- *total = (ix86_cost->add * 2
- + (rtx_cost (XEXP (x, 0), outer_code)
- << (GET_MODE (XEXP (x, 0)) != DImode))
- + (rtx_cost (XEXP (x, 1), outer_code)
- << (GET_MODE (XEXP (x, 1)) != DImode)));
- return true;
- }
- /* FALLTHRU */
-
- case NEG:
- if (FLOAT_MODE_P (mode))
- {
- *total = ix86_cost->fchs;
- return false;
- }
- /* FALLTHRU */
-
- case NOT:
- if (!TARGET_64BIT && mode == DImode)
- *total = ix86_cost->add * 2;
- else
- *total = ix86_cost->add;
- return false;
-
- case COMPARE:
- if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
- && XEXP (XEXP (x, 0), 1) == const1_rtx
- && GET_CODE (XEXP (XEXP (x, 0), 2)) == CONST_INT
- && XEXP (x, 1) == const0_rtx)
- {
- /* This kind of construct is implemented using test[bwl].
- Treat it as if we had an AND. */
- *total = (ix86_cost->add
- + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
- + rtx_cost (const1_rtx, outer_code));
- return true;
- }
- return false;
-
- case FLOAT_EXTEND:
- if (!TARGET_SSE_MATH
- || mode == XFmode
- || (mode == DFmode && !TARGET_SSE2))
- /* For standard 80387 constants, raise the cost to prevent
- compress_float_constant() to generate load from memory. */
- switch (standard_80387_constant_p (XEXP (x, 0)))
- {
- case -1:
- case 0:
- *total = 0;
- break;
- case 1: /* 0.0 */
- *total = 1;
- break;
- default:
- *total = (x86_ext_80387_constants & TUNEMASK
- || optimize_size
- ? 1 : 0);
- }
- return false;
-
- case ABS:
- if (FLOAT_MODE_P (mode))
- *total = ix86_cost->fabs;
- return false;
-
- case SQRT:
- if (FLOAT_MODE_P (mode))
- *total = ix86_cost->fsqrt;
- return false;
-
- case UNSPEC:
- if (XINT (x, 1) == UNSPEC_TP)
- *total = 0;
- return false;
-
- default:
- return false;
- }
-}
-
-#if TARGET_MACHO
-
-static int current_machopic_label_num;
-
-/* Given a symbol name and its associated stub, write out the
- definition of the stub. */
-
-void
-machopic_output_stub (FILE *file, const char *symb, const char *stub)
-{
- unsigned int length;
- char *binder_name, *symbol_name, lazy_ptr_name[32];
- int label = ++current_machopic_label_num;
-
- /* For 64-bit we shouldn't get here. */
- gcc_assert (!TARGET_64BIT);
-
- /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
- symb = (*targetm.strip_name_encoding) (symb);
-
- length = strlen (stub);
- binder_name = alloca (length + 32);
- GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
-
- length = strlen (symb);
- symbol_name = alloca (length + 32);
- GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
-
- sprintf (lazy_ptr_name, "L%d$lz", label);
-
- /* APPLE LOCAL begin deep branch prediction pic-base */
- /* APPLE LOCAL begin AT&T-style stub 4164563 */
- /* Choose one of four possible sections for this stub. */
- if (MACHOPIC_ATT_STUB)
- switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
- else if (MACHOPIC_PURE)
- /* APPLE LOCAL end AT&T-style stub 4164563 */
- {
- if (TARGET_DEEP_BRANCH_PREDICTION)
- switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
- else
- switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
- }
- else
- /* APPLE LOCAL end deep branch prediction pic-base */
- switch_to_section (darwin_sections[machopic_symbol_stub_section]);
-
- fprintf (file, "%s:\n", stub);
- fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
-
- /* APPLE LOCAL begin use %ecx in stubs 4146993 */
- /* APPLE LOCAL begin deep branch prediction pic-base */
- /* APPLE LOCAL begin AT&T-style stub 4164563 */
- if (MACHOPIC_ATT_STUB)
- {
- fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
- }
- else if (MACHOPIC_PURE)
- /* APPLE LOCAL end AT&T-style stub 4164563 */
- {
- /* PIC stub. */
- if (TARGET_DEEP_BRANCH_PREDICTION)
- {
- /* 25-byte PIC stub using "CALL get_pc_thunk". */
- rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
- output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
- fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n", label, lazy_ptr_name, label);
- }
- else
- {
- /* 26-byte PIC stub using inline picbase: "CALL L42 ! L42: pop %eax". */
- fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%ecx\n", label, label);
- fprintf (file, "\tmovl %s-LPC$%d(%%ecx),%%ecx\n", lazy_ptr_name, label);
- }
- fprintf (file, "\tjmp\t*%%ecx\n");
- }
- else /* 16-byte -mdynamic-no-pic stub. */
- fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
-
- /* APPLE LOCAL begin AT&T-style stub 4164563 */
- /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
- it needs no stub-binding-helper. */
- if (MACHOPIC_ATT_STUB)
- return;
- /* APPLE LOCAL end AT&T-style stub 4164563 */
-
- /* The "stub_binding_helper" is a fragment that gets executed only
- once, the first time this stub is invoked (then it becomes "dead
- code"). It asks the dynamic linker to set the
- lazy_symbol_pointer to point at the function we want
- (e.g. printf) so that subsequent invocations of this stub go
- directly to that dynamically-linked callee. Other UN*X systems
- use similar stubs, but those are generated by the static linker
- and never appear in assembly files. */
- /* APPLE LOCAL end deep branch prediction pic-base */
- fprintf (file, "%s:\n", binder_name);
-
- /* APPLE LOCAL begin deep branch prediction pic-base * tabify insns */
- if (MACHOPIC_PURE)
- {
- fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
- fprintf (file, "\tpushl\t%%ecx\n");
- }
- else
- fprintf (file, "\t pushl\t$%s\n", lazy_ptr_name);
-
- fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
- /* APPLE LOCAL end deep branch prediction pic-base * tabify insns */
- /* APPLE LOCAL end use %ecx in stubs 4146993 */
-
- /* APPLE LOCAL begin deep branch prediction pic-base. */
- /* N.B. Keep the correspondence of these
- 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
- old-pic/new-pic/non-pic stubs; altering this will break
- compatibility with existing dylibs. */
- if (MACHOPIC_PURE)
- {
- /* PIC stubs. */
- if (TARGET_DEEP_BRANCH_PREDICTION)
- /* 25-byte PIC stub using "CALL get_pc_thunk". */
- switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
- else
- /* 26-byte PIC stub using inline picbase: "CALL L42 ! L42: pop %ebx". */
- switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
- }
- else
- /* 16-byte -mdynamic-no-pic stub. */
- switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
-
- fprintf (file, "%s:\n", lazy_ptr_name);
- fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
- fprintf (file, "\t.long\t%s\n", binder_name);
-}
-/* APPLE LOCAL end deep branch prediction pic-base */
-
-void
-darwin_x86_file_end (void)
-{
- darwin_file_end ();
- ix86_file_end ();
-}
-
-/* APPLE LOCAL begin 4457939 stack alignment mishandled */
-void
-ix86_darwin_init_expanders (void)
-{
- /* <rdar://problem/4471596> stack alignment is not handled properly
-
- Please remove this entire function when addressing this
- Radar. Please be sure to delete the definition of INIT_EXPANDERS
- in i386/darwin.h as well. */
- /* Darwin/x86_32 stack pointer will be 16-byte aligned at every
- CALL, but the frame pointer, when used, will be 8-bytes offset
- from a 16-byte alignment (the size of the return address and the
- saved frame pointer). */
- if (cfun && cfun->emit
- && cfun->emit->regno_pointer_align)
- {
- REGNO_POINTER_ALIGN (STACK_POINTER_REGNUM) = STACK_BOUNDARY;
- REGNO_POINTER_ALIGN (FRAME_POINTER_REGNUM) = BITS_PER_WORD;
- REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = BITS_PER_WORD;
- REGNO_POINTER_ALIGN (ARG_POINTER_REGNUM) = BITS_PER_WORD;
- }
-}
-/* APPLE LOCAL end 4457939 stack alignment mishandled */
-#endif /* TARGET_MACHO */
-
-/* Order the registers for register allocator. */
-
-void
-x86_order_regs_for_local_alloc (void)
-{
- int pos = 0;
- int i;
-
- /* First allocate the local general purpose registers. */
- for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
- if (GENERAL_REGNO_P (i) && call_used_regs[i])
- reg_alloc_order [pos++] = i;
-
- /* Global general purpose registers. */
- for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
- if (GENERAL_REGNO_P (i) && !call_used_regs[i])
- reg_alloc_order [pos++] = i;
-
- /* x87 registers come first in case we are doing FP math
- using them. */
- if (!TARGET_SSE_MATH)
- for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
- reg_alloc_order [pos++] = i;
-
- /* SSE registers. */
- for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
- reg_alloc_order [pos++] = i;
- for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
- reg_alloc_order [pos++] = i;
-
- /* x87 registers. */
- if (TARGET_SSE_MATH)
- for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
- reg_alloc_order [pos++] = i;
-
- for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
- reg_alloc_order [pos++] = i;
-
- /* Initialize the rest of array as we do not allocate some registers
- at all. */
- while (pos < FIRST_PSEUDO_REGISTER)
- reg_alloc_order [pos++] = 0;
-}
-
-/* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
- struct attribute_spec.handler. */
-static tree
-ix86_handle_struct_attribute (tree *node, tree name,
- tree args ATTRIBUTE_UNUSED,
- int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
-{
- tree *type = NULL;
- if (DECL_P (*node))
- {
- if (TREE_CODE (*node) == TYPE_DECL)
- type = &TREE_TYPE (*node);
- }
- else
- type = node;
-
- if (!(type && (TREE_CODE (*type) == RECORD_TYPE
- || TREE_CODE (*type) == UNION_TYPE)))
- {
- warning (OPT_Wattributes, "%qs attribute ignored",
- IDENTIFIER_POINTER (name));
- *no_add_attrs = true;
- }
-
- else if ((is_attribute_p ("ms_struct", name)
- && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
- || ((is_attribute_p ("gcc_struct", name)
- && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
- {
- warning (OPT_Wattributes, "%qs incompatible attribute ignored",
- IDENTIFIER_POINTER (name));
- *no_add_attrs = true;
- }
-
- return NULL_TREE;
-}
-
-static bool
-ix86_ms_bitfield_layout_p (tree record_type)
-{
- return (TARGET_MS_BITFIELD_LAYOUT &&
- !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
- || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
-}
-
-/* Returns an expression indicating where the this parameter is
- located on entry to the FUNCTION. */
-
-static rtx
-x86_this_parameter (tree function)
-{
- tree type = TREE_TYPE (function);
-
- if (TARGET_64BIT)
- {
- int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
- return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
- }
-
- if (ix86_function_regparm (type, function) > 0)
- {
- tree parm;
-
- parm = TYPE_ARG_TYPES (type);
- /* Figure out whether or not the function has a variable number of
- arguments. */
- for (; parm; parm = TREE_CHAIN (parm))
- if (TREE_VALUE (parm) == void_type_node)
- break;
- /* If not, the this parameter is in the first argument. */
- if (parm)
- {
- int regno = 0;
- if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
- regno = 2;
- return gen_rtx_REG (SImode, regno);
- }
- }
-
- if (aggregate_value_p (TREE_TYPE (type), type))
- return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
- else
- return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
-}
-
-/* Determine whether x86_output_mi_thunk can succeed. */
-
-static bool
-x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
- HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
- HOST_WIDE_INT vcall_offset, tree function)
-{
- /* 64-bit can handle anything. */
- if (TARGET_64BIT)
- return true;
-
- /* For 32-bit, everything's fine if we have one free register. */
- if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
- return true;
-
- /* Need a free register for vcall_offset. */
- if (vcall_offset)
- return false;
-
- /* Need a free register for GOT references. */
- if (flag_pic && !(*targetm.binds_local_p) (function))
- return false;
-
- /* Otherwise ok. */
- return true;
-}
-
-/* Output the assembler code for a thunk function. THUNK_DECL is the
- declaration for the thunk function itself, FUNCTION is the decl for
- the target function. DELTA is an immediate constant offset to be
- added to THIS. If VCALL_OFFSET is nonzero, the word at
- *(*this + vcall_offset) should be added to THIS. */
-
-static void
-x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
- tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
- HOST_WIDE_INT vcall_offset, tree function)
-{
- rtx xops[3];
- rtx this = x86_this_parameter (function);
- rtx this_reg, tmp;
-
- /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
- pull it in now and let DELTA benefit. */
- if (REG_P (this))
- this_reg = this;
- else if (vcall_offset)
- {
- /* Put the this parameter into %eax. */
- xops[0] = this;
- xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
- output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
- }
- else
- this_reg = NULL_RTX;
-
- /* Adjust the this parameter by a fixed constant. */
- if (delta)
- {
- xops[0] = GEN_INT (delta);
- xops[1] = this_reg ? this_reg : this;
- if (TARGET_64BIT)
- {
- if (!x86_64_general_operand (xops[0], DImode))
- {
- tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
- xops[1] = tmp;
- output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
- xops[0] = tmp;
- xops[1] = this;
- }
- output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
- }
- else
- output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
- }
-
- /* Adjust the this parameter by a value stored in the vtable. */
- if (vcall_offset)
- {
- if (TARGET_64BIT)
- tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
- else
- {
- int tmp_regno = 2 /* ECX */;
- if (lookup_attribute ("fastcall",
- TYPE_ATTRIBUTES (TREE_TYPE (function))))
- tmp_regno = 0 /* EAX */;
- tmp = gen_rtx_REG (SImode, tmp_regno);
- }
-
- xops[0] = gen_rtx_MEM (Pmode, this_reg);
- xops[1] = tmp;
- if (TARGET_64BIT)
- output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
- else
- output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
-
- /* Adjust the this parameter. */
- xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
- if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
- {
- rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
- xops[0] = GEN_INT (vcall_offset);
- xops[1] = tmp2;
- output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
- xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
- }
- xops[1] = this_reg;
- if (TARGET_64BIT)
- output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
- else
- output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
- }
-
- /* If necessary, drop THIS back to its stack slot. */
- if (this_reg && this_reg != this)
- {
- xops[0] = this_reg;
- xops[1] = this;
- output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
- }
-
- xops[0] = XEXP (DECL_RTL (function), 0);
- if (TARGET_64BIT)
- {
- if (!flag_pic || (*targetm.binds_local_p) (function))
- output_asm_insn ("jmp\t%P0", xops);
- else
- {
- tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
- tmp = gen_rtx_CONST (Pmode, tmp);
- tmp = gen_rtx_MEM (QImode, tmp);
- xops[0] = tmp;
- output_asm_insn ("jmp\t%A0", xops);
- }
- }
- else
- {
- if (!flag_pic || (*targetm.binds_local_p) (function))
- output_asm_insn ("jmp\t%P0", xops);
- else
-#if TARGET_MACHO
- if (TARGET_MACHO)
- {
- rtx sym_ref = XEXP (DECL_RTL (function), 0);
- /* APPLE LOCAL begin axe stubs 5571540 */
- if (darwin_stubs)
- sym_ref = (gen_rtx_SYMBOL_REF
- (Pmode,
- machopic_indirection_name (sym_ref, /*stub_p=*/true)));
- tmp = gen_rtx_MEM (QImode, sym_ref);
- /* APPLE LOCAL end axe stubs 5571540 */
- xops[0] = tmp;
- output_asm_insn ("jmp\t%0", xops);
- }
- else
-#endif /* TARGET_MACHO */
- {
- tmp = gen_rtx_REG (SImode, 2 /* ECX */);
- output_set_got (tmp, NULL_RTX);
-
- xops[1] = tmp;
- output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
- output_asm_insn ("jmp\t{*}%1", xops);
- }
- }
-}
-
-static void
-x86_file_start (void)
-{
- default_file_start ();
-#if TARGET_MACHO
- darwin_file_start ();
-#endif
- if (X86_FILE_START_VERSION_DIRECTIVE)
- fputs ("\t.version\t\"01.01\"\n", asm_out_file);
- if (X86_FILE_START_FLTUSED)
- fputs ("\t.global\t__fltused\n", asm_out_file);
- if (ix86_asm_dialect == ASM_INTEL)
- fputs ("\t.intel_syntax\n", asm_out_file);
-}
-
-int
-x86_field_alignment (tree field, int computed)
-{
- enum machine_mode mode;
- tree type = TREE_TYPE (field);
-
- if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
- return computed;
- /* APPLE LOCAL begin mac68k alignment */
-#if TARGET_MACHO
- if (OPTION_ALIGN_MAC68K)
- {
- if (computed >= 128)
- return computed;
- return MIN (computed, 16);
- }
-#endif
- /* APPLE LOCAL end mac68k alignment */
- mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
- ? get_inner_array_type (type) : type);
- if (mode == DFmode || mode == DCmode
- || GET_MODE_CLASS (mode) == MODE_INT
- || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
- return MIN (32, computed);
- return computed;
-}
-
-/* Output assembler code to FILE to increment profiler label # LABELNO
- for profiling a function entry. */
-void
-x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
-{
- if (TARGET_64BIT)
- if (flag_pic)
- {
-#ifndef NO_PROFILE_COUNTERS
- fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
-#endif
- fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
- }
- else
- {
-#ifndef NO_PROFILE_COUNTERS
- fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
-#endif
- fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
- }
- else if (flag_pic)
- {
-#ifndef NO_PROFILE_COUNTERS
- fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
- LPREFIX, labelno, PROFILE_COUNT_REGISTER);
-#endif
- fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
- }
- else
- {
-#ifndef NO_PROFILE_COUNTERS
- fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
- PROFILE_COUNT_REGISTER);
-#endif
- fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
- }
-}
-
-/* We don't have exact information about the insn sizes, but we may assume
- quite safely that we are informed about all 1 byte insns and memory
- address sizes. This is enough to eliminate unnecessary padding in
- 99% of cases. */
-
-static int
-min_insn_size (rtx insn)
-{
- int l = 0;
-
- if (!INSN_P (insn) || !active_insn_p (insn))
- return 0;
-
- /* Discard alignments we've emit and jump instructions. */
- if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
- && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
- return 0;
- if (GET_CODE (insn) == JUMP_INSN
- && (GET_CODE (PATTERN (insn)) == ADDR_VEC
- || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
- return 0;
-
- /* Important case - calls are always 5 bytes.
- It is common to have many calls in the row. */
- if (GET_CODE (insn) == CALL_INSN
- && symbolic_reference_mentioned_p (PATTERN (insn))
- && !SIBLING_CALL_P (insn))
- return 5;
- if (get_attr_length (insn) <= 1)
- return 1;
-
- /* For normal instructions we may rely on the sizes of addresses
- and the presence of symbol to require 4 bytes of encoding.
- This is not the case for jumps where references are PC relative. */
- if (GET_CODE (insn) != JUMP_INSN)
- {
- l = get_attr_length_address (insn);
- if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
- l = 4;
- }
- if (l)
- return 1+l;
- else
- return 2;
-}
-
-/* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
- window. */
-
-static void
-ix86_avoid_jump_misspredicts (void)
-{
- rtx insn, start = get_insns ();
- int nbytes = 0, njumps = 0;
- int isjump = 0;
-
- /* Look for all minimal intervals of instructions containing 4 jumps.
- The intervals are bounded by START and INSN. NBYTES is the total
- size of instructions in the interval including INSN and not including
- START. When the NBYTES is smaller than 16 bytes, it is possible
- that the end of START and INSN ends up in the same 16byte page.
-
- The smallest offset in the page INSN can start is the case where START
- ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
- We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
- */
- for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
- {
-
- nbytes += min_insn_size (insn);
- if (dump_file)
- fprintf(dump_file, "Insn %i estimated to %i bytes\n",
- INSN_UID (insn), min_insn_size (insn));
- if ((GET_CODE (insn) == JUMP_INSN
- && GET_CODE (PATTERN (insn)) != ADDR_VEC
- && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
- || GET_CODE (insn) == CALL_INSN)
- njumps++;
- else
- continue;
-
- while (njumps > 3)
- {
- start = NEXT_INSN (start);
- if ((GET_CODE (start) == JUMP_INSN
- && GET_CODE (PATTERN (start)) != ADDR_VEC
- && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
- || GET_CODE (start) == CALL_INSN)
- njumps--, isjump = 1;
- else
- isjump = 0;
- nbytes -= min_insn_size (start);
- }
- gcc_assert (njumps >= 0);
- if (dump_file)
- fprintf (dump_file, "Interval %i to %i has %i bytes\n",
- INSN_UID (start), INSN_UID (insn), nbytes);
-
- if (njumps == 3 && isjump && nbytes < 16)
- {
- int padsize = 15 - nbytes + min_insn_size (insn);
-
- if (dump_file)
- fprintf (dump_file, "Padding insn %i by %i bytes!\n",
- INSN_UID (insn), padsize);
- emit_insn_before (gen_align (GEN_INT (padsize)), insn);
- }
- }
-}
-
-/* AMD Athlon works faster
- when RET is not destination of conditional jump or directly preceded
- by other jump instruction. We avoid the penalty by inserting NOP just
- before the RET instructions in such cases. */
-static void
-ix86_pad_returns (void)
-{
- edge e;
- edge_iterator ei;
-
- FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
- {
- basic_block bb = e->src;
- rtx ret = BB_END (bb);
- rtx prev;
- bool replace = false;
-
- if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
- || !maybe_hot_bb_p (bb))
- continue;
- for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
- if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
- break;
- if (prev && GET_CODE (prev) == CODE_LABEL)
- {
- edge e;
- edge_iterator ei;
-
- FOR_EACH_EDGE (e, ei, bb->preds)
- if (EDGE_FREQUENCY (e) && e->src->index >= 0
- && !(e->flags & EDGE_FALLTHRU))
- replace = true;
- }
- if (!replace)
- {
- prev = prev_active_insn (ret);
- if (prev
- && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
- || GET_CODE (prev) == CALL_INSN))
- replace = true;
- /* Empty functions get branch mispredict even when the jump destination
- is not visible to us. */
- if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
- replace = true;
- }
- if (replace)
- {
- emit_insn_before (gen_return_internal_long (), ret);
- delete_insn (ret);
- }
- }
-}
-
-/* Implement machine specific optimizations. We implement padding of returns
- for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
-static void
-ix86_reorg (void)
-{
- if (TARGET_PAD_RETURNS && optimize && !optimize_size)
- ix86_pad_returns ();
- if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
- ix86_avoid_jump_misspredicts ();
-}
-
-/* Return nonzero when QImode register that must be represented via REX prefix
- is used. */
-bool
-x86_extended_QIreg_mentioned_p (rtx insn)
-{
- int i;
- extract_insn_cached (insn);
- for (i = 0; i < recog_data.n_operands; i++)
- if (REG_P (recog_data.operand[i])
- && REGNO (recog_data.operand[i]) >= 4)
- return true;
- return false;
-}
-
-/* Return nonzero when P points to register encoded via REX prefix.
- Called via for_each_rtx. */
-static int
-extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
-{
- unsigned int regno;
- if (!REG_P (*p))
- return 0;
- regno = REGNO (*p);
- return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
-}
-
-/* Return true when INSN mentions register that must be encoded using REX
- prefix. */
-bool
-x86_extended_reg_mentioned_p (rtx insn)
-{
- return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
-}
-
-/* Generate an unsigned DImode/SImode to FP conversion. This is the same code
- optabs would emit if we didn't have TFmode patterns. */
-
-void
-x86_emit_floatuns (rtx operands[2])
-{
- rtx neglab, donelab, i0, i1, f0, in, out;
- enum machine_mode mode, inmode;
-
- inmode = GET_MODE (operands[1]);
- /* APPLE LOCAL begin 4176531 4424891 */
- mode = GET_MODE (operands[0]);
- if (!TARGET_64BIT && mode == DFmode && !optimize_size)
- {
- switch (inmode)
- {
- case SImode:
- ix86_expand_convert_uns_SI2DF_sse (operands);
- break;
- case DImode:
- ix86_expand_convert_uns_DI2DF_sse (operands);
- break;
- default:
- abort ();
- break;
- }
- return;
- }
- /* APPLE LOCAL end 4176531 4424891 */
-
- out = operands[0];
- in = force_reg (inmode, operands[1]);
- /* APPLE LOCAL begin one line deletion 4424891 */
- /* APPLE LOCAL end one line deletion 4424891 */
- neglab = gen_label_rtx ();
- donelab = gen_label_rtx ();
- i1 = gen_reg_rtx (Pmode);
- f0 = gen_reg_rtx (mode);
-
- emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
-
- emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
- emit_jump_insn (gen_jump (donelab));
- emit_barrier ();
-
- emit_label (neglab);
-
- i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
- i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
- i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
- expand_float (f0, i0, 0);
- emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
-
- emit_label (donelab);
-}
-
-/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
- with all elements equal to VAR. Return true if successful. */
-
-static bool
-ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
- rtx target, rtx val)
-{
- enum machine_mode smode, wsmode, wvmode;
- rtx x;
-
- switch (mode)
- {
- case V2SImode:
- case V2SFmode:
- if (!mmx_ok)
- return false;
- /* FALLTHRU */
-
- case V2DFmode:
- case V2DImode:
- case V4SFmode:
- case V4SImode:
- val = force_reg (GET_MODE_INNER (mode), val);
- x = gen_rtx_VEC_DUPLICATE (mode, val);
- emit_insn (gen_rtx_SET (VOIDmode, target, x));
- return true;
-
- case V4HImode:
- if (!mmx_ok)
- return false;
- if (TARGET_SSE || TARGET_3DNOW_A)
- {
- val = gen_lowpart (SImode, val);
- x = gen_rtx_TRUNCATE (HImode, val);
- x = gen_rtx_VEC_DUPLICATE (mode, x);
- emit_insn (gen_rtx_SET (VOIDmode, target, x));
- return true;
- }
- else
- {
- smode = HImode;
- wsmode = SImode;
- wvmode = V2SImode;
- goto widen;
- }
-
- case V8QImode:
- if (!mmx_ok)
- return false;
- smode = QImode;
- wsmode = HImode;
- wvmode = V4HImode;
- goto widen;
- case V8HImode:
- if (TARGET_SSE2)
- {
- rtx tmp1, tmp2;
- /* Extend HImode to SImode using a paradoxical SUBREG. */
- tmp1 = gen_reg_rtx (SImode);
- emit_move_insn (tmp1, gen_lowpart (SImode, val));
- /* Insert the SImode value as low element of V4SImode vector. */
- tmp2 = gen_reg_rtx (V4SImode);
- tmp1 = gen_rtx_VEC_MERGE (V4SImode,
- gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
- CONST0_RTX (V4SImode),
- const1_rtx);
- emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
- /* Cast the V4SImode vector back to a V8HImode vector. */
- tmp1 = gen_reg_rtx (V8HImode);
- emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
- /* Duplicate the low short through the whole low SImode word. */
- emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
- /* Cast the V8HImode vector back to a V4SImode vector. */
- tmp2 = gen_reg_rtx (V4SImode);
- emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
- /* Replicate the low element of the V4SImode vector. */
- emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
- /* Cast the V2SImode back to V8HImode, and store in target. */
- emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
- return true;
- }
- smode = HImode;
- wsmode = SImode;
- wvmode = V4SImode;
- goto widen;
- case V16QImode:
- if (TARGET_SSE2)
- {
- rtx tmp1, tmp2;
- /* Extend QImode to SImode using a paradoxical SUBREG. */
- tmp1 = gen_reg_rtx (SImode);
- emit_move_insn (tmp1, gen_lowpart (SImode, val));
- /* Insert the SImode value as low element of V4SImode vector. */
- tmp2 = gen_reg_rtx (V4SImode);
- tmp1 = gen_rtx_VEC_MERGE (V4SImode,
- gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
- CONST0_RTX (V4SImode),
- const1_rtx);
- emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
- /* Cast the V4SImode vector back to a V16QImode vector. */
- tmp1 = gen_reg_rtx (V16QImode);
- emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
- /* Duplicate the low byte through the whole low SImode word. */
- emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
- emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
- /* Cast the V16QImode vector back to a V4SImode vector. */
- tmp2 = gen_reg_rtx (V4SImode);
- emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
- /* Replicate the low element of the V4SImode vector. */
- emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
- /* Cast the V2SImode back to V16QImode, and store in target. */
- emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
- return true;
- }
- smode = QImode;
- wsmode = HImode;
- wvmode = V8HImode;
- goto widen;
- widen:
- /* Replicate the value once into the next wider mode and recurse. */
- val = convert_modes (wsmode, smode, val, true);
- x = expand_simple_binop (wsmode, ASHIFT, val,
- GEN_INT (GET_MODE_BITSIZE (smode)),
- NULL_RTX, 1, OPTAB_LIB_WIDEN);
- val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
-
- x = gen_reg_rtx (wvmode);
- if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
- gcc_unreachable ();
- emit_move_insn (target, gen_lowpart (mode, x));
- return true;
-
- default:
- return false;
- }
-}
-
-/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
- whose ONE_VAR element is VAR, and other elements are zero. Return true
- if successful. */
-
-static bool
-ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
- rtx target, rtx var, int one_var)
-{
- enum machine_mode vsimode;
- rtx new_target;
- rtx x, tmp;
-
- switch (mode)
- {
- case V2SFmode:
- case V2SImode:
- if (!mmx_ok)
- return false;
- /* FALLTHRU */
-
- case V2DFmode:
- case V2DImode:
- if (one_var != 0)
- return false;
- var = force_reg (GET_MODE_INNER (mode), var);
- x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
- emit_insn (gen_rtx_SET (VOIDmode, target, x));
- return true;
-
- case V4SFmode:
- case V4SImode:
- if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
- new_target = gen_reg_rtx (mode);
- else
- new_target = target;
- var = force_reg (GET_MODE_INNER (mode), var);
- x = gen_rtx_VEC_DUPLICATE (mode, var);
- x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
- emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
- if (one_var != 0)
- {
- /* We need to shuffle the value to the correct position, so
- create a new pseudo to store the intermediate result. */
-
- /* With SSE2, we can use the integer shuffle insns. */
- if (mode != V4SFmode && TARGET_SSE2)
- {
- emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
- GEN_INT (1),
- GEN_INT (one_var == 1 ? 0 : 1),
- GEN_INT (one_var == 2 ? 0 : 1),
- GEN_INT (one_var == 3 ? 0 : 1)));
- if (target != new_target)
- emit_move_insn (target, new_target);
- return true;
- }
-
- /* Otherwise convert the intermediate result to V4SFmode and
- use the SSE1 shuffle instructions. */
- if (mode != V4SFmode)
- {
- tmp = gen_reg_rtx (V4SFmode);
- emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
- }
- else
- tmp = new_target;
-
- emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
- GEN_INT (1),
- GEN_INT (one_var == 1 ? 0 : 1),
- GEN_INT (one_var == 2 ? 0+4 : 1+4),
- GEN_INT (one_var == 3 ? 0+4 : 1+4)));
-
- if (mode != V4SFmode)
- emit_move_insn (target, gen_lowpart (V4SImode, tmp));
- else if (tmp != target)
- emit_move_insn (target, tmp);
- }
- else if (target != new_target)
- emit_move_insn (target, new_target);
- return true;
-
- case V8HImode:
- case V16QImode:
- vsimode = V4SImode;
- goto widen;
- case V4HImode:
- case V8QImode:
- if (!mmx_ok)
- return false;
- vsimode = V2SImode;
- goto widen;
- widen:
- if (one_var != 0)
- return false;
-
- /* Zero extend the variable element to SImode and recurse. */
- var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
-
- x = gen_reg_rtx (vsimode);
- if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
- var, one_var))
- gcc_unreachable ();
-
- emit_move_insn (target, gen_lowpart (mode, x));
- return true;
-
- default:
- return false;
- }
-}
-
-/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
- consisting of the values in VALS. It is known that all elements
- except ONE_VAR are constants. Return true if successful. */
-
-static bool
-ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
- rtx target, rtx vals, int one_var)
-{
- rtx var = XVECEXP (vals, 0, one_var);
- enum machine_mode wmode;
- rtx const_vec, x;
-
- const_vec = copy_rtx (vals);
- XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
- const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
-
- switch (mode)
- {
- case V2DFmode:
- case V2DImode:
- case V2SFmode:
- case V2SImode:
- /* For the two element vectors, it's just as easy to use
- the general case. */
- return false;
-
- case V4SFmode:
- case V4SImode:
- case V8HImode:
- case V4HImode:
- break;
-
- case V16QImode:
- wmode = V8HImode;
- goto widen;
- case V8QImode:
- wmode = V4HImode;
- goto widen;
- widen:
- /* There's no way to set one QImode entry easily. Combine
- the variable value with its adjacent constant value, and
- promote to an HImode set. */
- x = XVECEXP (vals, 0, one_var ^ 1);
- if (one_var & 1)
- {
- var = convert_modes (HImode, QImode, var, true);
- var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
- NULL_RTX, 1, OPTAB_LIB_WIDEN);
- x = GEN_INT (INTVAL (x) & 0xff);
- }
- else
- {
- var = convert_modes (HImode, QImode, var, true);
- x = gen_int_mode (INTVAL (x) << 8, HImode);
- }
- if (x != const0_rtx)
- var = expand_simple_binop (HImode, IOR, var, x, var,
- 1, OPTAB_LIB_WIDEN);
-
- x = gen_reg_rtx (wmode);
- emit_move_insn (x, gen_lowpart (wmode, const_vec));
- ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
-
- emit_move_insn (target, gen_lowpart (mode, x));
- return true;
-
- default:
- return false;
- }
-
- emit_move_insn (target, const_vec);
- ix86_expand_vector_set (mmx_ok, target, var, one_var);
- return true;
-}
-
-/* A subroutine of ix86_expand_vector_init. Handle the most general case:
- all values variable, and none identical. */
-
-static void
-ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
- rtx target, rtx vals)
-{
- enum machine_mode half_mode = GET_MODE_INNER (mode);
- rtx op0 = NULL, op1 = NULL;
- bool use_vec_concat = false;
-
- switch (mode)
- {
- case V2SFmode:
- case V2SImode:
- if (!mmx_ok && !TARGET_SSE)
- break;
- /* FALLTHRU */
-
- case V2DFmode:
- case V2DImode:
- /* For the two element vectors, we always implement VEC_CONCAT. */
- op0 = XVECEXP (vals, 0, 0);
- op1 = XVECEXP (vals, 0, 1);
- use_vec_concat = true;
- break;
-
- case V4SFmode:
- half_mode = V2SFmode;
- goto half;
- case V4SImode:
- half_mode = V2SImode;
- goto half;
- half:
- {
- rtvec v;
-
- /* For V4SF and V4SI, we implement a concat of two V2 vectors.
- Recurse to load the two halves. */
-
- op0 = gen_reg_rtx (half_mode);
- v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
- ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
-
- op1 = gen_reg_rtx (half_mode);
- v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
- ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
-
- use_vec_concat = true;
- }
- break;
-
- case V8HImode:
- case V16QImode:
- case V4HImode:
- case V8QImode:
- break;
-
- default:
- gcc_unreachable ();
- }
-
- if (use_vec_concat)
- {
- if (!register_operand (op0, half_mode))
- op0 = force_reg (half_mode, op0);
- if (!register_operand (op1, half_mode))
- op1 = force_reg (half_mode, op1);
-
- emit_insn (gen_rtx_SET (VOIDmode, target,
- gen_rtx_VEC_CONCAT (mode, op0, op1)));
- }
- else
- {
- int i, j, n_elts, n_words, n_elt_per_word;
- enum machine_mode inner_mode;
- rtx words[4], shift;
-
- inner_mode = GET_MODE_INNER (mode);
- n_elts = GET_MODE_NUNITS (mode);
- n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
- n_elt_per_word = n_elts / n_words;
- shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
-
- for (i = 0; i < n_words; ++i)
- {
- rtx word = NULL_RTX;
-
- for (j = 0; j < n_elt_per_word; ++j)
- {
- rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
- elt = convert_modes (word_mode, inner_mode, elt, true);
-
- if (j == 0)
- word = elt;
- else
- {
- word = expand_simple_binop (word_mode, ASHIFT, word, shift,
- word, 1, OPTAB_LIB_WIDEN);
- word = expand_simple_binop (word_mode, IOR, word, elt,
- word, 1, OPTAB_LIB_WIDEN);
- }
- }
-
- words[i] = word;
- }
-
- if (n_words == 1)
- emit_move_insn (target, gen_lowpart (mode, words[0]));
- else if (n_words == 2)
- {
- rtx tmp = gen_reg_rtx (mode);
- emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
- emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
- emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
- emit_move_insn (target, tmp);
- }
- else if (n_words == 4)
- {
- rtx tmp = gen_reg_rtx (V4SImode);
- vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
- ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
- emit_move_insn (target, gen_lowpart (mode, tmp));
- }
- else
- gcc_unreachable ();
- }
-}
-
-/* Initialize vector TARGET via VALS. Suppress the use of MMX
- instructions unless MMX_OK is true. */
-
-void
-ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
-{
- enum machine_mode mode = GET_MODE (target);
- enum machine_mode inner_mode = GET_MODE_INNER (mode);
- int n_elts = GET_MODE_NUNITS (mode);
- int n_var = 0, one_var = -1;
- bool all_same = true, all_const_zero = true;
- int i;
- rtx x;
-
- for (i = 0; i < n_elts; ++i)
- {
- x = XVECEXP (vals, 0, i);
- if (!CONSTANT_P (x))
- n_var++, one_var = i;
- else if (x != CONST0_RTX (inner_mode))
- all_const_zero = false;
- if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
- all_same = false;
- }
-
- /* Constants are best loaded from the constant pool. */
- if (n_var == 0)
- {
- emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
- return;
- }
-
- /* If all values are identical, broadcast the value. */
- if (all_same
- && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
- XVECEXP (vals, 0, 0)))
- return;
-
- /* Values where only one field is non-constant are best loaded from
- the pool and overwritten via move later. */
- if (n_var == 1)
- {
- if (all_const_zero
- && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
- XVECEXP (vals, 0, one_var),
- one_var))
- return;
-
- if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
- return;
- }
-
- ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
-}
-
-void
-ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
-{
- enum machine_mode mode = GET_MODE (target);
- enum machine_mode inner_mode = GET_MODE_INNER (mode);
- bool use_vec_merge = false;
- rtx tmp;
-
- switch (mode)
- {
- case V2SFmode:
- case V2SImode:
- if (mmx_ok)
- {
- tmp = gen_reg_rtx (GET_MODE_INNER (mode));
- ix86_expand_vector_extract (true, tmp, target, 1 - elt);
- if (elt == 0)
- tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
- else
- tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
- emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
- return;
- }
- break;
-
- /* APPLE LOCAL begin 5612787 mainline sse4 */
- case V2DImode:
- use_vec_merge = TARGET_SSE4_1;
- if (use_vec_merge)
- break;
-
- case V2DFmode:
- /* APPLE LOCAL end 5612787 mainline sse4 */
- {
- rtx op0, op1;
-
- /* For the two element vectors, we implement a VEC_CONCAT with
- the extraction of the other element. */
-
- tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
- tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
-
- if (elt == 0)
- op0 = val, op1 = tmp;
- else
- op0 = tmp, op1 = val;
-
- tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
- emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
- }
- return;
-
- case V4SFmode:
- /* APPLE LOCAL begin 5612787 mainline sse4 */
- use_vec_merge = TARGET_SSE4_1;
- if (use_vec_merge)
- break;
- /* APPLE LOCAL end 5612787 mainline sse4 */
- switch (elt)
- {
- case 0:
- use_vec_merge = true;
- break;
-
- case 1:
- /* tmp = target = A B C D */
- tmp = copy_to_reg (target);
- /* target = A A B B */
- emit_insn (gen_sse_unpcklps (target, target, target));
- /* target = X A B B */
- ix86_expand_vector_set (false, target, val, 0);
- /* target = A X C D */
- emit_insn (gen_sse_shufps_1 (target, target, tmp,
- GEN_INT (1), GEN_INT (0),
- GEN_INT (2+4), GEN_INT (3+4)));
- return;
-
- case 2:
- /* tmp = target = A B C D */
- tmp = copy_to_reg (target);
- /* tmp = X B C D */
- ix86_expand_vector_set (false, tmp, val, 0);
- /* target = A B X D */
- emit_insn (gen_sse_shufps_1 (target, target, tmp,
- GEN_INT (0), GEN_INT (1),
- GEN_INT (0+4), GEN_INT (3+4)));
- return;
-
- case 3:
- /* tmp = target = A B C D */
- tmp = copy_to_reg (target);
- /* tmp = X B C D */
- ix86_expand_vector_set (false, tmp, val, 0);
- /* target = A B X D */
- emit_insn (gen_sse_shufps_1 (target, target, tmp,
- GEN_INT (0), GEN_INT (1),
- GEN_INT (2+4), GEN_INT (0+4)));
- return;
-
- default:
- gcc_unreachable ();
- }
- break;
-
- case V4SImode:
- /* APPLE LOCAL begin 5612787 mainline sse4 */
- use_vec_merge = TARGET_SSE4_1;
- if (use_vec_merge)
- break;
- /* APPLE LOCAL end 5612787 mainline sse4 */
- /* Element 0 handled by vec_merge below. */
- if (elt == 0)
- {
- use_vec_merge = true;
- break;
- }
-
- if (TARGET_SSE2)
- {
- /* With SSE2, use integer shuffles to swap element 0 and ELT,
- store into element 0, then shuffle them back. */
-
- rtx order[4];
-
- order[0] = GEN_INT (elt);
- order[1] = const1_rtx;
- order[2] = const2_rtx;
- order[3] = GEN_INT (3);
- order[elt] = const0_rtx;
-
- emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
- order[1], order[2], order[3]));
-
- ix86_expand_vector_set (false, target, val, 0);
-
- emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
- order[1], order[2], order[3]));
- }
- else
- {
- /* For SSE1, we have to reuse the V4SF code. */
- ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
- gen_lowpart (SFmode, val), elt);
- }
- return;
-
- case V8HImode:
- use_vec_merge = TARGET_SSE2;
- break;
- case V4HImode:
- use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
- break;
-
- case V16QImode:
- /* APPLE LOCAL begin 5612787 mainline sse4 */
- use_vec_merge = TARGET_SSE4_1;
- break;
- /* APPLE LOCAL end 5612787 mainline sse4 */
- case V8QImode:
- default:
- break;
- }
-
- if (use_vec_merge)
- {
- tmp = gen_rtx_VEC_DUPLICATE (mode, val);
- tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
- emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
- }
- else
- {
- rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
-
- emit_move_insn (mem, target);
-
- tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
- emit_move_insn (tmp, val);
-
- emit_move_insn (target, mem);
- }
-}
-
-void
-ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
-{
- enum machine_mode mode = GET_MODE (vec);
- enum machine_mode inner_mode = GET_MODE_INNER (mode);
- bool use_vec_extr = false;
- rtx tmp;
-
- switch (mode)
- {
- case V2SImode:
- case V2SFmode:
- if (!mmx_ok)
- break;
- /* FALLTHRU */
-
- case V2DFmode:
- case V2DImode:
- use_vec_extr = true;
- break;
-
- case V4SFmode:
- /* APPLE LOCAL begin 5612787 mainline sse4 */
- use_vec_extr = TARGET_SSE4_1;
- if (use_vec_extr)
- break;
- /* APPLE LOCAL end 5612787 mainline sse4 */
- switch (elt)
- {
- case 0:
- tmp = vec;
- break;
-
- case 1:
- case 3:
- tmp = gen_reg_rtx (mode);
- emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
- GEN_INT (elt), GEN_INT (elt),
- GEN_INT (elt+4), GEN_INT (elt+4)));
- break;
-
- case 2:
- tmp = gen_reg_rtx (mode);
- emit_insn (gen_sse_unpckhps (tmp, vec, vec));
- break;
-
- default:
- gcc_unreachable ();
- }
- vec = tmp;
- use_vec_extr = true;
- elt = 0;
- break;
-
- case V4SImode:
- /* APPLE LOCAL begin 5612787 mainline sse4 */
- use_vec_extr = TARGET_SSE4_1;
- if (use_vec_extr)
- break;
- /* APPLE LOCAL end 5612787 mainline sse4 */
- if (TARGET_SSE2)
- {
- switch (elt)
- {
- case 0:
- tmp = vec;
- break;
-
- case 1:
- case 3:
- tmp = gen_reg_rtx (mode);
- emit_insn (gen_sse2_pshufd_1 (tmp, vec,
- GEN_INT (elt), GEN_INT (elt),
- GEN_INT (elt), GEN_INT (elt)));
- break;
-
- case 2:
- tmp = gen_reg_rtx (mode);
- emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
- break;
-
- default:
- gcc_unreachable ();
- }
- vec = tmp;
- use_vec_extr = true;
- elt = 0;
- }
- else
- {
- /* For SSE1, we have to reuse the V4SF code. */
- ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
- gen_lowpart (V4SFmode, vec), elt);
- return;
- }
- break;
-
- case V8HImode:
- use_vec_extr = TARGET_SSE2;
- break;
- case V4HImode:
- use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
- break;
-
- case V16QImode:
- /* APPLE LOCAL begin 5612787 mainline sse4 */
- use_vec_extr = TARGET_SSE4_1;
- break;
- /* APPLE LOCAL end 5612787 mainline sse4 */
- case V8QImode:
- /* ??? Could extract the appropriate HImode element and shift. */
- default:
- break;
- }
-
- if (use_vec_extr)
- {
- tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
- tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
-
- /* Let the rtl optimizers know about the zero extension performed. */
- /* APPLE LOCAL 5612787 mainline sse4 */
- if (inner_mode == QImode || inner_mode == HImode)
- {
- tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
- target = gen_lowpart (SImode, target);
- }
-
- emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
- }
- else
- {
- rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
-
- emit_move_insn (mem, vec);
-
- tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
- emit_move_insn (target, tmp);
- }
-}
-
-/* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
- pattern to reduce; DEST is the destination; IN is the input vector. */
-
-void
-ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
-{
- rtx tmp1, tmp2, tmp3;
-
- tmp1 = gen_reg_rtx (V4SFmode);
- tmp2 = gen_reg_rtx (V4SFmode);
- tmp3 = gen_reg_rtx (V4SFmode);
-
- emit_insn (gen_sse_movhlps (tmp1, in, in));
- emit_insn (fn (tmp2, tmp1, in));
-
- emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
- GEN_INT (1), GEN_INT (1),
- GEN_INT (1+4), GEN_INT (1+4)));
- emit_insn (fn (dest, tmp2, tmp3));
-}
-
-/* Target hook for scalar_mode_supported_p. */
-static bool
-ix86_scalar_mode_supported_p (enum machine_mode mode)
-{
- if (DECIMAL_FLOAT_MODE_P (mode))
- return true;
- else
- return default_scalar_mode_supported_p (mode);
-}
-
-/* Implements target hook vector_mode_supported_p. */
-static bool
-ix86_vector_mode_supported_p (enum machine_mode mode)
-{
- if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
- return true;
- if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
- return true;
- if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
- return true;
- if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
- return true;
- return false;
-}
-
-/* Worker function for TARGET_MD_ASM_CLOBBERS.
-
- We do this in the new i386 backend to maintain source compatibility
- with the old cc0-based compiler. */
-
-static tree
-ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
- tree inputs ATTRIBUTE_UNUSED,
- tree clobbers)
-{
- clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
- clobbers);
- clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
- clobbers);
- clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
- clobbers);
- return clobbers;
-}
-
-/* Return true if this goes in small data/bss. */
-
-static bool
-ix86_in_large_data_p (tree exp)
-{
- if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
- return false;
-
- /* Functions are never large data. */
- if (TREE_CODE (exp) == FUNCTION_DECL)
- return false;
-
- if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
- {
- const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
- if (strcmp (section, ".ldata") == 0
- || strcmp (section, ".lbss") == 0)
- return true;
- return false;
- }
- else
- {
- HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
-
- /* If this is an incomplete type with size 0, then we can't put it
- in data because it might be too big when completed. */
- if (!size || size > ix86_section_threshold)
- return true;
- }
-
- return false;
-}
-static void
-ix86_encode_section_info (tree decl, rtx rtl, int first)
-{
- default_encode_section_info (decl, rtl, first);
-
- if (TREE_CODE (decl) == VAR_DECL
- && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
- && ix86_in_large_data_p (decl))
- SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
-}
-
-/* Worker function for REVERSE_CONDITION. */
-
-enum rtx_code
-ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
-{
- return (mode != CCFPmode && mode != CCFPUmode
- ? reverse_condition (code)
- : reverse_condition_maybe_unordered (code));
-}
-
-/* Output code to perform an x87 FP register move, from OPERANDS[1]
- to OPERANDS[0]. */
-
-const char *
-output_387_reg_move (rtx insn, rtx *operands)
-{
- if (REG_P (operands[1])
- && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
- {
- if (REGNO (operands[0]) == FIRST_STACK_REG)
- return output_387_ffreep (operands, 0);
- return "fstp\t%y0";
- }
- if (STACK_TOP_P (operands[0]))
- return "fld%z1\t%y1";
- return "fst\t%y0";
-}
-
-/* Output code to perform a conditional jump to LABEL, if C2 flag in
- FP status register is set. */
-
-void
-ix86_emit_fp_unordered_jump (rtx label)
-{
- rtx reg = gen_reg_rtx (HImode);
- rtx temp;
-
- emit_insn (gen_x86_fnstsw_1 (reg));
-
- if (TARGET_USE_SAHF)
- {
- emit_insn (gen_x86_sahf_1 (reg));
-
- temp = gen_rtx_REG (CCmode, FLAGS_REG);
- temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
- }
- else
- {
- emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
-
- temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
- temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
- }
-
- temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
- gen_rtx_LABEL_REF (VOIDmode, label),
- pc_rtx);
- temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
- emit_jump_insn (temp);
-}
-
-/* Output code to perform a log1p XFmode calculation. */
-
-void ix86_emit_i387_log1p (rtx op0, rtx op1)
-{
- rtx label1 = gen_label_rtx ();
- rtx label2 = gen_label_rtx ();
-
- rtx tmp = gen_reg_rtx (XFmode);
- rtx tmp2 = gen_reg_rtx (XFmode);
-
- emit_insn (gen_absxf2 (tmp, op1));
- emit_insn (gen_cmpxf (tmp,
- CONST_DOUBLE_FROM_REAL_VALUE (
- REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
- XFmode)));
- emit_jump_insn (gen_bge (label1));
-
- emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
- emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1));
- emit_jump (label2);
-
- emit_label (label1);
- emit_move_insn (tmp, CONST1_RTX (XFmode));
- emit_insn (gen_addxf3 (tmp, op1, tmp));
- emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
- emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp));
-
- emit_label (label2);
-}
-
-/* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
-
-static void
-i386_solaris_elf_named_section (const char *name, unsigned int flags,
- tree decl)
-{
- /* With Binutils 2.15, the "@unwind" marker must be specified on
- every occurrence of the ".eh_frame" section, not just the first
- one. */
- if (TARGET_64BIT
- && strcmp (name, ".eh_frame") == 0)
- {
- fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
- flags & SECTION_WRITE ? "aw" : "a");
- return;
- }
- default_elf_asm_named_section (name, flags, decl);
-}
-
-/* APPLE LOCAL begin regparmandstackparm */
-
-/* Mark this fndecl as using the regparmandstackparm calling convention. */
-static void
-ix86_make_regparmandstackparmee (tree *pt)
-{
- decl_attributes (pt,
- tree_cons (get_identifier ("regparmandstackparmee"),
- NULL_TREE, TYPE_ATTRIBUTES (*pt)), 0);
-}
-
-/* Lookup fndecls marked 'regparmandstackparm', retrieve their $3SSE equivalents. */
-static splay_tree ix86_darwin_regparmandstackparm_st;
-/* Cache for regparmandstackparm fntypes. */
-static splay_tree ix86_darwin_fntype_st;
-
-/* Append "$3SSE" to an ID, returning a new IDENTIFIER_NODE. */
-static tree
-ix86_darwin_regparmandstackparm_mangle_name (tree id)
-{
- static const char *mangle_suffix = "$3SSE";
- unsigned int mangle_length = strlen (mangle_suffix);
- const char *name;
- unsigned int orig_length;
- char *buf;
-
- if (!id)
- return NULL_TREE;
-
- name = IDENTIFIER_POINTER (id);
- orig_length = strlen (name);
- buf = alloca (orig_length + mangle_length + 1);
-
- strcpy (buf, name);
- strcat (buf, mangle_suffix);
- return get_identifier (buf); /* Expecting get_identifier to reallocate the string. */
-}
-
-/* Given the "normal" TRAD_FNDECL marked with 'regparmandstackparm',
- return a duplicate fndecl marked 'regparmandstackparmee' (note trailing
- 'ee'). Enter them as a pair in the splay tree ST, if non-null;
- looking up the TRAD_FNDECL will return the new one. */
-static tree
-ix86_darwin_regparmandstackparm_dup_fndecl (tree trad_fndecl, splay_tree st)
-{
- tree fntype;
- tree new_fndecl;
-
- fntype = TREE_TYPE (trad_fndecl);
-
- /* NEW_FNDECL will be compiled with the XMM-based calling
- convention, and TRAD_FNDECL (the original) will be compiled with
- the traditional stack-based calling convention. */
- new_fndecl = copy_node (trad_fndecl);
- DECL_STRUCT_FUNCTION (new_fndecl) = (struct function *)0;
- allocate_struct_function (new_fndecl);
- DECL_STRUCT_FUNCTION (new_fndecl)->function_end_locus
- = DECL_STRUCT_FUNCTION (trad_fndecl)->function_end_locus;
- DECL_STRUCT_FUNCTION (new_fndecl)->static_chain_decl =
- DECL_STRUCT_FUNCTION (trad_fndecl)->static_chain_decl;
- DECL_RESULT (new_fndecl) = copy_node (DECL_RESULT (trad_fndecl));
- DECL_CONTEXT (DECL_RESULT (new_fndecl)) = new_fndecl;
- SET_DECL_ASSEMBLER_NAME (new_fndecl, 0);
- DECL_NAME (new_fndecl) = ix86_darwin_regparmandstackparm_mangle_name (DECL_NAME (trad_fndecl));
- TYPE_ATTRIBUTES (TREE_TYPE (new_fndecl))
- = copy_list (TYPE_ATTRIBUTES (TREE_TYPE (trad_fndecl)));
- ix86_make_regparmandstackparmee (&TREE_TYPE (new_fndecl));
- /* Kludge: block copied from tree-inline.c(save_body). Should
- be refactored into a common shareable routine. */
- {
- tree *parg;
-
- for (parg = &DECL_ARGUMENTS (new_fndecl);
- *parg;
- parg = &TREE_CHAIN (*parg))
- {
- tree new = copy_node (*parg);
-
- lang_hooks.dup_lang_specific_decl (new);
- DECL_ABSTRACT_ORIGIN (new) = DECL_ORIGIN (*parg);
- DECL_CONTEXT (new) = new_fndecl;
- /* Note: it may be possible to move the original parameters
- with the function body, making this splay tree
- unnecessary. */
- if (st)
- splay_tree_insert (st, (splay_tree_key) *parg, (splay_tree_value) new);
- TREE_CHAIN (new) = TREE_CHAIN (*parg);
- *parg = new;
- }
-
- if (DECL_STRUCT_FUNCTION (trad_fndecl)->static_chain_decl)
- {
- tree old = DECL_STRUCT_FUNCTION (trad_fndecl)->static_chain_decl;
- tree new = copy_node (old);
-
- lang_hooks.dup_lang_specific_decl (new);
- DECL_ABSTRACT_ORIGIN (new) = DECL_ORIGIN (old);
- DECL_CONTEXT (new) = new_fndecl;
- if (st)
- splay_tree_insert (st, (splay_tree_key) old, (splay_tree_value) new);
- TREE_CHAIN (new) = TREE_CHAIN (old);
- DECL_STRUCT_FUNCTION (new_fndecl)->static_chain_decl = new;
- }
-
- if (st)
- splay_tree_insert (st, (splay_tree_key) DECL_RESULT (trad_fndecl),
- (splay_tree_value) DECL_RESULT (new_fndecl));
- }
-#if 0
- /* Testing Kludge: If TREE_READONLY is set, cgen can and
- occasionally will delete "pure" (no side-effect) calls to a
- library function. Cleared here to preclude this when
- test-building libraries. */
- TREE_READONLY (new_fndecl) = false;
-#endif
-
- return new_fndecl;
-}
-
-/* FNDECL has no body, but user has marked it as a regparmandstackparm
- item. Create a corresponding regparmandstackparm decl for it, and
- arrange for calls to be redirected to the regparmandstackparm
- version. */
-static tree
-ix86_darwin_regparmandstackparm_extern_decl (tree trad_fndecl)
-{
- tree new_fndecl;
-
- /* new_fndecl = ix86_darwin_regparmandstackparm_dup_fndecl (trad_fndecl, (splay_tree)0); */
- new_fndecl = copy_node (trad_fndecl);
- DECL_NAME (new_fndecl) = ix86_darwin_regparmandstackparm_mangle_name (DECL_NAME (trad_fndecl));
- DECL_STRUCT_FUNCTION (new_fndecl) = (struct function *)0;
- SET_DECL_ASSEMBLER_NAME (new_fndecl, 0);
- ix86_make_regparmandstackparmee (&TREE_TYPE (new_fndecl));
- cgraph_finalize_function (new_fndecl, /* nested = */ true);
- if (!ix86_darwin_regparmandstackparm_st)
- ix86_darwin_regparmandstackparm_st
- = splay_tree_new (splay_tree_compare_pointers, NULL, NULL);
- splay_tree_insert (ix86_darwin_regparmandstackparm_st,
- (splay_tree_key) trad_fndecl, (splay_tree_value) new_fndecl);
- return new_fndecl;
-}
-
-/* Invoked after all functions have been seen and digested, but before
- any inlining decisions have been made. Walk the callgraph, seeking
- calls to functions that have regparmandstackparm variants. Rewrite the
- calls, directing them to the new 'regparmandstackparmee' versions. */
-void
-ix86_darwin_redirect_calls(void)
-{
- struct cgraph_node *fastcall_node, *node;
- struct cgraph_edge *edge, *next_edge;
- tree addr, fastcall_decl, orig_fntype;
- splay_tree_node call_stn, type_stn;
-
- if (!flag_unit_at_a_time)
- return;
-
- if (!ix86_darwin_fntype_st)
- ix86_darwin_fntype_st = splay_tree_new (splay_tree_compare_pointers, NULL, NULL);
-
- if (!ix86_darwin_regparmandstackparm_st)
- ix86_darwin_regparmandstackparm_st
- = splay_tree_new (splay_tree_compare_pointers, NULL, NULL);
-
- /* Extern decls marked "regparmandstackparm" beget regparmandstackparmee
- decls. */
- for (node = cgraph_nodes; node; node = node->next)
- if (!DECL_SAVED_TREE (node->decl)
- && lookup_attribute ("regparmandstackparm",
- TYPE_ATTRIBUTES (TREE_TYPE (node->decl)))
- && !lookup_attribute ("regparmandstackparmee",
- TYPE_ATTRIBUTES (TREE_TYPE (node->decl))))
- {
- fastcall_decl = ix86_darwin_regparmandstackparm_extern_decl (node->decl);
- splay_tree_insert (ix86_darwin_regparmandstackparm_st,
- (splay_tree_key) node->decl,
- (splay_tree_value) fastcall_decl);
- }
-
- /* Walk the callgraph, rewriting calls as we go. */
- for (node = cgraph_nodes; node; node = node->next)
- {
- call_stn = splay_tree_lookup (ix86_darwin_regparmandstackparm_st,
- (splay_tree_key)node->decl);
- /* If this function was in our splay-tree, we previously created
- a regparmandstackparm version of it. */
- if (call_stn)
- {
- fastcall_decl = (tree)call_stn->value;
- fastcall_node = cgraph_node (fastcall_decl);
- /* Redirect all calls to this fn to the regparmandstackparm
- version. */
- for (edge = next_edge = node->callers ; edge ; edge = next_edge)
- {
- tree call, stmt;
- next_edge = next_edge->next_caller;
- cgraph_redirect_edge_callee (edge, fastcall_node);
- /* APPLE LOCAL */
- /* MERGE FIXME call_expr -> call_stmt */
- stmt = edge->call_stmt;
- call = get_call_expr_in (stmt);
- addr = TREE_OPERAND (call, 0);
- TREE_OPERAND (addr, 0) = fastcall_decl;
- orig_fntype = TREE_TYPE (addr);
- /* Likewise, revise the TYPE of the ADDR node between
- the CALL_EXPR and the FNDECL. This type determines
- the parameters and calling convention applied to this
- CALL_EXPR. */
- type_stn = splay_tree_lookup (ix86_darwin_fntype_st, (splay_tree_value)orig_fntype);
- if (type_stn)
- TREE_TYPE (addr) = (tree)type_stn->value;
- else
- {
- ix86_make_regparmandstackparmee (&TREE_TYPE (addr));
- splay_tree_insert (ix86_darwin_fntype_st,
- (splay_tree_key)orig_fntype,
- (splay_tree_value)TREE_TYPE (addr));
- }
- }
- }
- }
-}
-
-/* Information necessary to re-context a function body. */
-typedef struct {
- tree old_context;
- tree new_context;
- splay_tree decl_map;
-} recontext_data;
-
-/* Visit every node of a function body; if it points at the
- OLD_CONTEXT, re-direct it to the NEW_CONTEXT. Invoked via
- walk_tree. DECL_MAP is a splay tree that maps the original
- parameters to new ones. */
-static tree
-ix86_darwin_re_context_1 (tree *tp, int *walk_subtrees ATTRIBUTE_UNUSED, void *data ATTRIBUTE_UNUSED)
-{
- tree t;
- recontext_data *rcd;
- enum tree_code_class class;
- splay_tree_node n;
-
- if (!tp)
- return NULL_TREE;
-
- t = *tp;
- if (!t)
- return NULL_TREE;
-
- rcd = (recontext_data *)data;
- n = splay_tree_lookup (rcd->decl_map, (splay_tree_key) t);
- if (n)
- {
- *tp = (tree)n->value;
- return NULL_TREE;
- }
-
- class = TREE_CODE_CLASS (TREE_CODE (t));
- if (class != tcc_declaration)
- return NULL_TREE;
-
- if (DECL_CONTEXT (t) == rcd->old_context)
- DECL_CONTEXT (t) = rcd->new_context;
-
- return NULL_TREE;
-}
-
-/* Walk a function body, updating every pointer to OLD_CONTEXT to
- NEW_CONTEXT. TP is the top of the function body, and ST is a splay
- tree of replacements for the parameters. */
-static tree
-ix86_darwin_re_context (tree *tp, tree old_context, tree new_context, splay_tree st)
-{
- recontext_data rcd;
- tree ret;
-
- rcd.old_context = old_context;
- rcd.new_context = new_context;
- rcd.decl_map = st;
-
- ret = walk_tree (tp, ix86_darwin_re_context_1,
- (void *)&rcd, (struct pointer_set_t *)0);
- return ret;
-}
-
-/* Given TRAD_FNDECL, create a regparmandstackparm variant and hang the
- DECL_SAVED_TREE body there. Create a new, one-statement body for
- TRAD_FNDECL that calls the new one. If the return types are
- compatible (e.g. non-FP), the call can usually be sibcalled. The
- inliner will often copy the body from NEW_FNDECL into TRAD_FNDECL,
- and we do nothing to prevent this. */
-static void
-ix86_darwin_regparmandstackparm_wrapper (tree trad_fndecl)
-{
- tree new_fndecl;
- splay_tree st;
- tree bind, block, call, clone_parm, modify, parmlist, rdecl, rtn, stmt_list, type;
- tree_stmt_iterator tsi;
-
- st = splay_tree_new (splay_tree_compare_pointers, NULL, NULL);
- new_fndecl = ix86_darwin_regparmandstackparm_dup_fndecl (trad_fndecl, st);
-
- for (parmlist = NULL, clone_parm = DECL_ARGUMENTS (trad_fndecl);
- clone_parm;
- clone_parm = TREE_CHAIN (clone_parm))
- {
- gcc_assert (clone_parm);
- DECL_ABSTRACT_ORIGIN (clone_parm) = NULL;
- parmlist = tree_cons (NULL, clone_parm, parmlist);
- }
-
- /* We built this list backwards; fix now. */
- parmlist = nreverse (parmlist);
- type = TREE_TYPE (TREE_TYPE (trad_fndecl));
- call = build_function_call (new_fndecl, parmlist);
- TREE_TYPE (call) = type;
- if (type == void_type_node)
- rtn = call;
- else if (0 && ix86_return_in_memory (type))
- {
- /* Return without a RESULT_DECL: RETURN_EXPR (CALL). */
- rtn = make_node (RETURN_EXPR);
- TREE_OPERAND (rtn, 0) = call;
- TREE_TYPE (rtn) = type;
- }
- else /* RETURN_EXPR(MODIFY(RESULT_DECL, CALL)). */
- {
- rdecl = make_node (RESULT_DECL);
- TREE_TYPE (rdecl) = type;
- DECL_MODE (rdecl) = TYPE_MODE (type);
- DECL_RESULT (trad_fndecl) = rdecl;
- DECL_CONTEXT (rdecl) = trad_fndecl;
- modify = build_modify_expr (rdecl, NOP_EXPR, call);
- TREE_TYPE (modify) = type;
- rtn = make_node (RETURN_EXPR);
- TREE_OPERAND (rtn, 0) = modify;
- TREE_TYPE (rtn) = type;
- }
- stmt_list = alloc_stmt_list ();
- tsi = tsi_start (stmt_list);
- tsi_link_after (&tsi, rtn, TSI_NEW_STMT);
-
- /* This wrapper consists of "return <my_name>$3SSE (<my_arguments>);"
- thus it has no local variables. */
- block = make_node (BLOCK);
- TREE_USED (block) = true;
- bind = make_node (BIND_EXPR);
- BIND_EXPR_BLOCK (bind) = block;
- BIND_EXPR_BODY (bind) = stmt_list;
- TREE_TYPE (bind) = void_type_node;
- TREE_SIDE_EFFECTS (bind) = true;
-
- DECL_SAVED_TREE (trad_fndecl) = bind;
-
- /* DECL_ABSTRACT_ORIGIN (new_fndecl) = NULL; *//* ? */
-
- ix86_darwin_re_context (&new_fndecl, trad_fndecl, new_fndecl, st);
- ix86_darwin_re_context (&DECL_SAVED_TREE (new_fndecl), trad_fndecl, new_fndecl, st);
- splay_tree_delete (st);
- gimplify_function_tree (new_fndecl);
- cgraph_finalize_function (new_fndecl, /* nested = */ true);
- gimplify_function_tree (trad_fndecl);
- if (!ix86_darwin_regparmandstackparm_st)
- ix86_darwin_regparmandstackparm_st
- = splay_tree_new (splay_tree_compare_pointers, NULL, NULL);
- splay_tree_insert (ix86_darwin_regparmandstackparm_st,
- (splay_tree_key) trad_fndecl, (splay_tree_value) new_fndecl);
-}
-
-/* Entry point into the regparmandstackparm stuff. FNDECL might be marked
- 'regparmandstackparm'; if it is, create the fast version, &etc. */
-void
-ix86_darwin_handle_regparmandstackparm (tree fndecl)
-{
- static unsigned int already_running = 0;
-
- /* We don't support variable-argument functions yet. */
- if (!fndecl || already_running)
- return;
-
- already_running++;
-
- if (lookup_attribute ("regparmandstackparm", TYPE_ATTRIBUTES (TREE_TYPE (fndecl)))
- && !lookup_attribute ("regparmandstackparmee", TYPE_ATTRIBUTES (TREE_TYPE (fndecl))))
- {
- if (DECL_STRUCT_FUNCTION (fndecl) && DECL_STRUCT_FUNCTION (fndecl)->stdarg)
- error ("regparmandstackparm is incompatible with varargs");
- else if (DECL_SAVED_TREE (fndecl))
- ix86_darwin_regparmandstackparm_wrapper (fndecl);
- }
-
- already_running--;
-}
-/* APPLE LOCAL end regparmandstackparm */
-
-/* APPLE LOCAL begin CW asm blocks */
-#include <ctype.h>
-#include "config/asm.h"
-
-/* Addition register names accepted for inline assembly that would
- otherwise not be registers. This table must be sorted for
- bsearch. */
-static const char *iasm_additional_names[] = {
- "AH", "AL", "AX", "BH", "BL", "BP", "BX", "CH", "CL", "CX", "DH",
- "DI", "DL", "DX", "EAX", "EBP", "EBX", "ECX", "EDI", "EDX", "ESI",
- "ESP", "MM0", "MM1", "MM2", "MM3", "MM4", "MM5", "MM6", "MM7", "R10",
- "R11", "R12", "R13", "R14", "R15", "R8", "R9", "RAX", "RBP", "RBX",
- "RCX", "RDI", "RDX", "RSI", "RSP", "SI", "SP", "ST", "ST(1)", "ST(2)",
- "ST(3)", "ST(4)", "ST(5)", "ST(6)", "ST(7)", "XMM0", "XMM1", "XMM10",
- "XMM11", "XMM12", "XMM13", "XMM14", "XMM15", "XMM2", "XMM3", "XMM4",
- "XMM5", "XMM6", "XMM7", "XMM8", "XMM9" };
-
-/* Comparison function for bsearch to find additional register names. */
-static int
-iasm_reg_comp (const void *a, const void *b)
-{
- char *const*x = a;
- char *const*y = b;
- int c = strcasecmp (*x, *y);
- return c;
-}
-
-/* Translate some register names seen in CW asm into GCC standard
- forms. */
-
-const char *
-i386_iasm_register_name (const char *regname, char *buf)
-{
- const char **r;
-
- /* If we can find the named register, return it. */
- if (decode_reg_name (regname) >= 0)
- {
- if (ASSEMBLER_DIALECT == ASM_INTEL)
- return regname;
- sprintf (buf, "%%%s", regname);
- return buf;
- }
-
- /* If we can find a lower case version of any registers in
- additional_names, return it. */
- r = bsearch (&regname, iasm_additional_names,
- sizeof (iasm_additional_names) / sizeof (iasm_additional_names[0]),
- sizeof (iasm_additional_names[0]), iasm_reg_comp);
- if (r)
- {
- char *p;
- const char *q;
- q = regname = *r;
- p = buf;
- if (ASSEMBLER_DIALECT != ASM_INTEL)
- *p++ = '%';
- regname = p;
- while ((*p++ = tolower (*q++)))
- ;
- if (decode_reg_name (regname) >= 0)
- return buf;
- }
-
- return NULL;
-}
-
-/* Return true iff the opcode wants memory to be stable. We arrange
- for a memory clobber in these instances. */
-bool
-iasm_memory_clobber (const char *ARG_UNUSED (opcode))
-{
- return true;
-}
-
-/* Return true iff the operands need swapping. */
-
-bool
-iasm_x86_needs_swapping (const char *opcode)
-{
- /* Don't swap if output format is the same as input format. */
- if (ASSEMBLER_DIALECT == ASM_INTEL)
- return false;
-
- /* These don't need swapping. */
- if (strcasecmp (opcode, "bound") == 0)
- return false;
- if (strcasecmp (opcode, "invlpga") == 0)
- return false;
- if (opcode[0] == ' ' && iasm_is_pseudo (opcode+1))
- return false;
-
- return true;
-}
-
-/* Swap operands, given in MS-style asm ordering when the output style
- is in ATT syntax. */
-
-static tree
-iasm_x86_swap_operands (const char *opcode, tree args)
-{
- int noperands;
-
- if (iasm_x86_needs_swapping (opcode) == false)
- return args;
-
-#if 0
- /* GAS also checks the type of the arguments to determine if they
- need swapping. */
- if ((argtype[0]&Imm) && (argtype[1]&Imm))
- return args;
-#endif
- noperands = list_length (args);
- if (noperands == 2 || noperands == 3)
- {
- /* Swap first and last (1 and 2 or 1 and 3). */
- return nreverse (args);
- }
- return args;
-}
-
-/* Map a register name to a high level tree type for a VAR_DECL of
- that type, whose RTL will refer to the given register. */
-
-static tree
-iasm_type_for (tree arg)
-{
- tree type = NULL_TREE;
-
- if (IDENTIFIER_LENGTH (arg) > 2
- && IDENTIFIER_POINTER (arg)[0] == '%')
- {
- enum machine_mode mode = VOIDmode;
- if (IDENTIFIER_POINTER (arg)[1] == 'e')
- mode = SImode;
- else if (/* IDENTIFIER_POINTER (arg)[2] == 'h'
- || */ IDENTIFIER_POINTER (arg)[2] == 'l')
- mode = QImode;
- else if (IDENTIFIER_POINTER (arg)[2] == 'x')
- mode = HImode;
- else if (IDENTIFIER_POINTER (arg)[1] == 'r')
- mode = DImode;
- else if (IDENTIFIER_POINTER (arg)[1] == 'x')
- mode = SFmode;
- else if (IDENTIFIER_POINTER (arg)[1] == 'm')
- mode = SFmode;
-
- if (mode != VOIDmode)
- type = lang_hooks.types.type_for_mode (mode, 1);
- }
-
- return type;
-}
-
-/* We raise the code from a named register into a VAR_DECL of an
- appropriate type that refers to the register so that reload doesn't
- run out of registers. */
-
-tree
-iasm_raise_reg (tree arg)
-{
- int regno = decode_reg_name (IDENTIFIER_POINTER (arg));
- if (regno >= 0)
- {
- tree decl = NULL_TREE;
-
- decl = lookup_name (arg);
- if (decl == error_mark_node)
- decl = 0;
- if (decl == 0)
- {
- tree type = iasm_type_for (arg);
- if (type)
- {
- decl = build_decl (VAR_DECL, arg, type);
- DECL_ARTIFICIAL (decl) = 1;
- DECL_REGISTER (decl) = 1;
- C_DECL_REGISTER (decl) = 1;
- DECL_HARD_REGISTER (decl) = 1;
- set_user_assembler_name (decl, IDENTIFIER_POINTER (arg));
- decl = lang_hooks.decls.pushdecl (decl);
- }
- }
-
- if (decl)
- return decl;
- }
-
- return arg;
-}
-
-/* Allow constants and readonly variables to be used in instructions
- in places that require constants. */
-
-static tree
-iasm_default_conv (tree e)
-{
- if (e == NULL_TREE)
- return e;
-
- if (TREE_CODE (e) == CONST_DECL)
- e = DECL_INITIAL (e);
-
- if (DECL_P (e) && DECL_MODE (e) != BLKmode)
- e = decl_constant_value (e);
- return e;
-}
-
-/* Return true iff the operand is suitible for as the offset for a
- memory instruction. */
-
-static bool
-iasm_is_offset (tree v)
-{
- if (TREE_CODE (v) == INTEGER_CST)
- return true;
- if (TREE_CODE (v) == ADDR_EXPR)
- {
- v = TREE_OPERAND (v, 0);
- if (TREE_CODE (v) == VAR_DECL
- && TREE_STATIC (v)
- && MEM_P (DECL_RTL (v)))
- {
- note_alternative_entry_points ();
- return true;
- }
- if (TREE_CODE (v) == LABEL_DECL)
- return true;
- return false;
- }
- if (TREE_CODE (v) == VAR_DECL
- && TREE_STATIC (v)
- && MEM_P (DECL_RTL (v)))
- {
- note_alternative_entry_points ();
- return true;
- }
- if ((TREE_CODE (v) == MINUS_EXPR
- || TREE_CODE (v) == PLUS_EXPR)
- && iasm_is_offset (TREE_OPERAND (v, 0))
- && iasm_is_offset (TREE_OPERAND (v, 1)))
- return true;
- if (TREE_CODE (v) == NEGATE_EXPR
- && iasm_is_offset (TREE_OPERAND (v, 0)))
- return true;
-
- return false;
-}
-
-/* Combine two types for [] expressions. */
-
-static tree
-iasm_combine_type (tree type0, tree type1)
-{
- if (type0 == void_type_node
- || type0 == NULL_TREE)
- {
- if (type1 == void_type_node)
- return NULL_TREE;
- return type1;
- }
-
- if (type1 == void_type_node
- || type1 == NULL_TREE)
- return type0;
-
- if (type0 == type1)
- return type0;
-
- error ("too many types in []");
-
- return type0;
-}
-
-/* We canonicalize the inputs form of bracket expressions as the input
- forms are less constrained than what the assembler will accept.
-
- TOP is the top of the canonical tree we're generating and
- TREE_OPERAND (, 0) is the offset portion of the expression. ARGP
- points to the current part of the tree we're walking.
-
- The tranformations we do:
-
- (A+O) ==> A
- (A-O) ==> A
- (O+A) ==> A
-
- where O are offset expressions. */
-
-static tree
-iasm_canonicalize_bracket_1 (tree* argp, tree top)
-{
- tree arg = *argp;
- tree offset = TREE_OPERAND (top, 0);
- tree arg0, arg1;
- tree rtype = NULL_TREE;
-
- *argp = arg = iasm_default_conv (arg);
-
- switch (TREE_CODE (arg))
- {
- case NOP_EXPR:
- if (TREE_CODE (TREE_TYPE (arg)) == IDENTIFIER_NODE)
- {
- *argp = TREE_OPERAND (arg, 0);
- return TREE_TYPE (arg);
- }
- break;
-
- case BRACKET_EXPR:
- rtype = TREE_TYPE (arg);
- /* fall thru */
- case PLUS_EXPR:
- arg0 = TREE_OPERAND (arg, 0);
- arg1 = TREE_OPERAND (arg, 1);
-
- arg0 = iasm_default_conv (arg0);
- arg1 = iasm_default_conv (arg1);
-
- if (iasm_is_offset (arg0))
- {
- if (offset != integer_zero_node)
- arg0 = build2 (PLUS_EXPR, void_type_node, arg0, offset);
- TREE_OPERAND (top, 0) = arg0;
-
- *argp = arg1;
- if (arg1)
- return iasm_combine_type (rtype, iasm_canonicalize_bracket_1 (argp, top));
- }
- else if (arg1 && iasm_is_offset (arg1))
- {
- if (offset != integer_zero_node)
- arg1 = build2 (PLUS_EXPR, void_type_node, arg1, offset);
- TREE_OPERAND (top, 0) = arg1;
- *argp = arg0;
- return iasm_combine_type (rtype, iasm_canonicalize_bracket_1 (argp, top));
- }
- else
- {
- rtype = iasm_combine_type (rtype,
- iasm_canonicalize_bracket_1 (&TREE_OPERAND (arg, 0), top));
-
- if (arg1)
- rtype = iasm_combine_type (rtype,
- iasm_canonicalize_bracket_1 (&TREE_OPERAND (arg, 1), top));
- if (TREE_OPERAND (arg, 0) == NULL_TREE)
- {
- if (TREE_OPERAND (arg, 1))
- {
- TREE_OPERAND (arg, 0) = TREE_OPERAND (arg, 1);
- TREE_OPERAND (arg, 1) = NULL_TREE;
- }
- else
- *argp = NULL_TREE;
- }
- else if (TREE_OPERAND (arg, 1) == NULL_TREE && rtype == NULL_TREE)
- *argp = TREE_OPERAND (arg, 0);
- if (TREE_CODE (arg) == PLUS_EXPR
- && TREE_TYPE (arg) == NULL_TREE
- && TREE_TYPE (TREE_OPERAND (arg, 0))
- && TREE_TYPE (TREE_OPERAND (arg, 1))
- && (POINTER_TYPE_P (TREE_TYPE (TREE_OPERAND (arg, 1)))
- || POINTER_TYPE_P (TREE_TYPE (TREE_OPERAND (arg, 0)))))
- {
- tree type = TREE_TYPE (TREE_OPERAND (arg, 1));
- if (INTEGRAL_TYPE_P (type))
- type = TREE_TYPE (TREE_OPERAND (arg, 0));
- TREE_TYPE (arg) = type;
- }
- if (TREE_CODE (arg) == PLUS_EXPR
- && TREE_TYPE (arg) == NULL_TREE
- && TREE_TYPE (TREE_OPERAND (arg, 0))
- && TREE_TYPE (TREE_OPERAND (arg, 0)) == TREE_TYPE (TREE_OPERAND (arg, 1)))
- {
- tree type = TREE_TYPE (TREE_OPERAND (arg, 0));
- TREE_TYPE (arg) = type;
- }
- }
- return rtype;
-
- case MINUS_EXPR:
- rtype = iasm_canonicalize_bracket_1 (&TREE_OPERAND (arg, 0), top);
- arg0 = TREE_OPERAND (arg, 0);
- arg1 = TREE_OPERAND (arg, 1);
- arg1 = iasm_default_conv (arg1);
- if (iasm_is_offset (arg1))
- {
- offset = TREE_OPERAND (top, 0);
- if (offset == integer_zero_node)
- arg1 = fold (build1 (NEGATE_EXPR,
- TREE_TYPE (arg1),
- arg1));
- else
- arg1 = build2 (MINUS_EXPR, void_type_node, offset, arg1);
- TREE_OPERAND (top, 0) = arg1;
- *argp = arg0;
- return iasm_combine_type (rtype, iasm_canonicalize_bracket_1 (argp, top));;
- }
- return rtype;
-
- case PARM_DECL:
- case VAR_DECL:
- {
- *argp = iasm_addr (arg);
- break;
- }
-
- case IDENTIFIER_NODE:
- {
- *argp = iasm_raise_reg (arg);
- break;
- }
-
- case MULT_EXPR:
- if (TREE_TYPE (arg) == NULL_TREE)
- {
- if (TREE_CODE (TREE_OPERAND (arg, 1)) == IDENTIFIER_NODE)
- TREE_OPERAND (arg, 1) = iasm_raise_reg (TREE_OPERAND (arg, 1));
- if (TREE_CODE (TREE_OPERAND (arg, 0)) == IDENTIFIER_NODE)
- TREE_OPERAND (arg, 0) = iasm_raise_reg (TREE_OPERAND (arg, 0));
- if (TREE_TYPE (TREE_OPERAND (arg, 0))
- && TREE_TYPE (TREE_OPERAND (arg, 1)))
- TREE_TYPE (arg) = TREE_TYPE (TREE_OPERAND (arg, 0));
- }
- break;
-
- default:
- break;
- }
-
- return NULL_TREE;
-}
-
-/* Form an indirection for an inline asm address expression operand.
- We give a warning when we think the optimizer might have to be used
- to reform complex addresses, &stack_var + %eax + 4 for example,
- after gimplification rips the address apart. */
-
-static tree
-iasm_indirect (tree addr)
-{
- if (TREE_CODE (addr) == ADDR_EXPR
- && TREE_CODE (TREE_TYPE (TREE_OPERAND (addr, 0))) != ARRAY_TYPE
- /* && TREE_CODE (TREE_OPERAND (addr, 0)) == ARRAY_REF */)
- return TREE_OPERAND (addr, 0);
-
- addr = fold (build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (addr)), addr));
-
- if (! optimize && TREE_CODE (addr) == INDIRECT_REF)
- warning (0, "addressing mode too complex when not optimizing, will consume extra register(s)");
-
- return addr;
-}
-
-/* For an address addition for an inline asm address expression. We
- try and form ARRAY_REFs, as they will go through gimplification
- without being ripped apart. */
-
-static tree
-iasm_add (tree addr, tree off)
-{
- if (integer_zerop (off))
- return addr;
-
- /* We have to convert the offset to an int type, as we rip apart
- trees whose type has been converted to a pointer type for the
- offset already. */
- return pointer_int_sum (PLUS_EXPR, addr, convert (integer_type_node, off));
-}
-
-/* We canonicalize the inputs form of bracket expressions as the input
- forms are less constrained than what the assembler will accept. */
-
-static tree
-iasm_canonicalize_bracket (tree arg)
-{
- tree rtype;
-
- gcc_assert (TREE_CODE (arg) == BRACKET_EXPR);
-
- /* Let the normal operand printer output this without trying to
- decompose it into parts so that things like (%esp + 20) + 4 can
- be output as 24(%esp) by the optimizer instead of 4(%0) and
- burning an "R" with (%esp + 20). */
- if (TREE_OPERAND (arg, 1) == NULL_TREE
- && TREE_TYPE (TREE_OPERAND (arg, 0))
- && POINTER_TYPE_P (TREE_TYPE (TREE_OPERAND (arg, 0))))
- {
- if (TREE_CODE (TREE_OPERAND (arg, 0)) == VAR_DECL
- || TREE_CODE (TREE_OPERAND (arg, 0)) == PARM_DECL)
- return arg;
- return iasm_indirect (TREE_OPERAND (arg, 0));
- }
-
- /* Ensure that 0 is an offset */
- if (TREE_OPERAND (arg, 0)
- && iasm_is_offset (TREE_OPERAND (arg, 0)))
- {
- /* we win if 0 is an offset already. */
- }
- else if (TREE_OPERAND (arg, 1) == NULL_TREE)
- {
- /* Move 0 to 1, if 1 is empty and 0 isn't already an offset */
- TREE_OPERAND (arg, 1) = TREE_OPERAND (arg, 0);
- TREE_OPERAND (arg, 0) = integer_zero_node;
- }
- else
- {
- tree swp;
- /* Just have to force it now */
- swp = iasm_build_bracket (TREE_OPERAND (arg, 0), TREE_OPERAND (arg, 1));
- TREE_OPERAND (arg, 0) = integer_zero_node;
- TREE_OPERAND (arg, 1) = swp;
- }
-
- if (TREE_OPERAND (arg, 1))
- {
- rtype = iasm_canonicalize_bracket_1 (&TREE_OPERAND (arg, 1), arg);
- if (rtype)
- TREE_TYPE (arg) = iasm_combine_type (TREE_TYPE (arg), rtype);
- }
-
- /* For correctness, pointer types should be raised to the tree
- level, as they denote address calculations with stack based
- objects, and we want print_operand to print the entire address so
- that it can combine contants and hard registers into the address.
- Unfortunnately we might have to rely upon the optimizer to reform
- the address after the gimplification pass rips it apart. */
-
- /* Handle [INTEGER_CST][ptr][op3] */
- if (TREE_OPERAND (arg, 1)
- && TREE_CODE (TREE_OPERAND (arg, 0)) == INTEGER_CST
- && TREE_CODE (TREE_OPERAND (arg, 1)) == BRACKET_EXPR
- && TREE_TYPE (TREE_OPERAND (TREE_OPERAND (arg, 1), 0))
- && POINTER_TYPE_P (TREE_TYPE (TREE_OPERAND (TREE_OPERAND (arg, 1), 0)))
- && TREE_TYPE (TREE_TYPE (TREE_OPERAND (TREE_OPERAND (arg, 1), 0))) != void_type_node
- && (TREE_TYPE (arg) == void_type_node
- || (TREE_TYPE (arg) == get_identifier ("word")
- && (TYPE_MODE (TREE_TYPE (TREE_TYPE (TREE_OPERAND (TREE_OPERAND (arg, 1), 0))))
- == HImode))))
- {
- tree op3 = TREE_OPERAND (TREE_OPERAND (arg, 1), 1);
- tree addr = iasm_add (TREE_OPERAND (TREE_OPERAND (arg, 1), 0),
- TREE_OPERAND (arg, 0));
- tree type;
- addr = iasm_indirect (addr);
- if (op3 == NULL_TREE)
- return addr;
- type = TREE_TYPE (addr);
- type = build_pointer_type (type);
- addr = build1 (ADDR_EXPR, type, addr);
- addr = fold (build2 (PLUS_EXPR, type, addr, op3));
- return iasm_indirect (addr);
- }
-
- /* Handle ptr + INTEGER_CST */
- if (TREE_OPERAND (arg, 1)
- && TREE_TYPE (arg) == void_type_node
- && TREE_TYPE (TREE_OPERAND (arg, 1))
- && POINTER_TYPE_P (TREE_TYPE (TREE_OPERAND (arg, 1)))
- && TREE_TYPE (TREE_TYPE (TREE_OPERAND (arg, 1))) != void_type_node)
- {
- if (TREE_CODE (TREE_OPERAND (arg, 1)) == ADDR_EXPR)
- {
- if (TREE_OPERAND (arg, 0) == integer_zero_node)
- return TREE_OPERAND (TREE_OPERAND (arg, 1), 0);
- if (TREE_CODE (TREE_OPERAND (arg, 0)) == INTEGER_CST)
- return iasm_indirect (iasm_add (TREE_OPERAND (arg, 1), TREE_OPERAND (arg, 0)));
- }
- if (TREE_CODE (TREE_OPERAND (arg, 1)) == PLUS_EXPR)
- {
- if (TREE_OPERAND (arg, 0) == integer_zero_node)
- return iasm_indirect (TREE_OPERAND (arg, 1));
- if (TREE_CODE (TREE_OPERAND (arg, 0)) == INTEGER_CST)
- return iasm_indirect (iasm_add (TREE_OPERAND (arg, 1), TREE_OPERAND (arg, 0)));
- }
- }
- return arg;
-}
-
-/* We canonicalize the instruction by swapping operands and rewritting
- the opcode if the output style is in ATT syntax. */
-
-tree
-iasm_x86_canonicalize_operands (const char **opcode_p, tree iargs, void *ep)
-{
- iasm_md_extra_info *e = ep;
- static char buf[40];
- tree args = iargs;
- int argnum = 1;
- const char *opcode = *opcode_p;
- bool fp_style = false;
- bool fpi_style = false;
-
- /* Don't transform if output format is the same as input format. */
- if (ASSEMBLER_DIALECT == ASM_INTEL)
- return iargs;
-
- if (strncasecmp (opcode, "f", 1) == 0)
- fp_style = true;
-
- if (fp_style
- && strncasecmp (opcode+1, "i", 1) == 0)
- fpi_style = true;
-
- while (args)
- {
- tree arg = TREE_VALUE (args);
-
- /* Handle st(3) */
- if (TREE_CODE (arg) == COMPOUND_EXPR
- && TREE_CODE (TREE_OPERAND (arg, 0)) == IDENTIFIER_NODE
- && strcasecmp (IDENTIFIER_POINTER (TREE_OPERAND (arg, 0)), "%st") == 0
- && TREE_CODE (TREE_OPERAND (arg, 1)) == INTEGER_CST)
- {
- int v = tree_low_cst (TREE_OPERAND (arg, 1), 0);
-
- if (v < 0 || v > 7)
- {
- error ("unknown floating point register st(%d)", v);
- v = 0;
- }
-
- /* Rewrite %st(0) to %st. */
- if (v == 0)
- TREE_VALUE (args) = TREE_OPERAND (arg, 0);
- else
- {
- char buf[20];
- sprintf (buf, "%%st(%d)", v);
- TREE_VALUE (args) = get_identifier (buf);
- }
- }
- else if (TREE_CODE (arg) == BRACKET_EXPR)
- TREE_VALUE (args) = arg = iasm_canonicalize_bracket (arg);
-
- switch (TREE_CODE (arg))
- {
- case ARRAY_REF:
- case VAR_DECL:
- case PARM_DECL:
- case INDIRECT_REF:
- if (TYPE_MODE (TREE_TYPE (arg)) == QImode)
- e->mod[argnum-1] = 'b';
- else if (TYPE_MODE (TREE_TYPE (arg)) == HImode)
- e->mod[argnum-1] = fpi_style ? 's' : 'w';
- else if (TYPE_MODE (TREE_TYPE (arg)) == SImode)
- e->mod[argnum-1] = fpi_style ? 'l' : (fp_style ? 's' : 'l');
- else if (TYPE_MODE (TREE_TYPE (arg)) == DImode)
- e->mod[argnum-1] = 'q';
- else if (TYPE_MODE (TREE_TYPE (arg)) == SFmode)
- e->mod[argnum-1] = 's';
- else if (TYPE_MODE (TREE_TYPE (arg)) == DFmode)
- e->mod[argnum-1] = 'l';
- else if (TYPE_MODE (TREE_TYPE (arg)) == XFmode)
- e->mod[argnum-1] = 't';
- break;
- case BRACKET_EXPR:
- /* We use the TREE_TYPE to indicate the type of operand, it
- it set with code like: inc dword ptr [eax]. */
- if (TREE_CODE (TREE_TYPE (arg)) == IDENTIFIER_NODE)
- {
- const char *s = IDENTIFIER_POINTER (TREE_TYPE (arg));
- if (strcasecmp (s, "byte") == 0)
- e->mod[argnum-1] = 'b';
- else if (strcasecmp (s, "word") == 0)
- e->mod[argnum-1] = fpi_style ? 's' : 'w';
- else if (strcasecmp (s, "dword") == 0)
- e->mod[argnum-1] = fpi_style ? 'l' : (fp_style ? 's' : 'l');
- else if (strcasecmp (s, "qword") == 0)
- e->mod[argnum-1] = 'q';
- else if (strcasecmp (s, "real4") == 0)
- e->mod[argnum-1] = 's';
- else if (strcasecmp (s, "real8") == 0)
- e->mod[argnum-1] = 'l';
- else if (strcasecmp (s, "real10") == 0)
- e->mod[argnum-1] = 't';
- else if (strcasecmp (s, "tbyte") == 0)
- e->mod[argnum-1] = 't';
- }
- break;
- case LABEL_DECL:
- e->mod[argnum-1] = 'l';
- break;
- case IDENTIFIER_NODE:
- if (IDENTIFIER_LENGTH (arg) > 2
- && IDENTIFIER_POINTER (arg)[0] == '%')
- {
- if (IDENTIFIER_POINTER (arg)[1] == 'e')
- e->mod[argnum-1] = 'l';
- else if (IDENTIFIER_POINTER (arg)[2] == 'h'
- || IDENTIFIER_POINTER (arg)[2] == 'l')
- e->mod[argnum-1] = 'b';
- else if (IDENTIFIER_POINTER (arg)[2] == 'x')
- e->mod[argnum-1] = 'w';
- }
- break;
- default:
- break;
- }
- args = TREE_CHAIN (args);
- ++argnum;
- }
- --argnum;
-
- args = iasm_x86_swap_operands (opcode, iargs);
- if (opcode[0] == ' ' && iasm_is_pseudo (opcode+1))
- e->pseudo = true;
-
- if (strcasecmp (opcode, "movs") == 0
- || strcasecmp (opcode, "scas") == 0
- || strcasecmp (opcode, "stos") == 0
- || strcasecmp (opcode, "xlat") == 0)
- args = NULL_TREE;
- else if (strcasecmp (opcode, "cmovpo") == 0)
- opcode = "cmovnp";
- else if (strcasecmp (opcode, "cmovpe") == 0)
- opcode = "cmovp";
- else if (strcasecmp (opcode, "outs") == 0
- && TREE_CHAIN (args))
- {
- e->mod[0] = e->mod[1];
- }
- else if (strcasecmp (opcode, "ins") == 0
- && TREE_CHAIN (args))
- {
- e->mod[1] = 0;
- }
- /* movsx isn't part of the AT&T syntax, they spell it movs. */
- else if (strcasecmp (opcode, "movsx") == 0)
- opcode = "movs";
- else if (strcasecmp (opcode, "pushfd") == 0)
- *opcode_p = "pushf";
- else if (strcasecmp (opcode, "popfd") == 0)
- *opcode_p = "popf";
-
- /* movzx isn't part of the AT&T syntax, they spell it movz. */
- if (strcasecmp (opcode, "movzx") == 0)
- {
- /* Silly extention of the day, A zero extended move that has the
- same before and after size is accepted and it just a normal
- move. */
- if (argnum == 2
- && (e->mod[0] == e->mod[1]
- || e->mod[1] == 0))
- opcode = "mov";
- else
- opcode = "movz";
- }
-
- if (strncasecmp (opcode, "f", 1) == 0 &&
- (!(strcasecmp (opcode, "fldcw") == 0)))
- {
- if (e->mod[0] == 'w')
- e->mod[0] = 's';
- if (e->mod[1] == 'w')
- e->mod[1] = 's';
- }
- else if (strcasecmp (opcode, "mov") == 0)
- {
- /* The 32-bit integer instructions can be used on floats. */
- if (e->mod[0] == 's')
- e->mod[0] = 'l';
- if (e->mod[1] == 's')
- e->mod[1] = 'l';
- }
-
- if (e->pseudo)
- e->mod[0] = e->mod[1] = 0;
- else if (strcasecmp (opcode, "clflush") == 0
- || strcasecmp (opcode, "fbld") == 0
- || strcasecmp (opcode, "fbstp") == 0
- || strcasecmp (opcode, "fldt") == 0
- || strcasecmp (opcode, "fnstcw") == 0
- || strcasecmp (opcode, "fnstsw") == 0
- || strcasecmp (opcode, "fstcw") == 0
- || strcasecmp (opcode, "fstsw") == 0
- || strcasecmp (opcode, "fxrstor") == 0
- || strcasecmp (opcode, "fxsave") == 0
- || strcasecmp (opcode, "invlpg") == 0
- || strcasecmp (opcode, "jmp") == 0
- || strcasecmp (opcode, "call") == 0
- || strcasecmp (opcode, "ja") == 0
- || strcasecmp (opcode, "jae") == 0
- || strcasecmp (opcode, "jb") == 0
- || strcasecmp (opcode, "jbe") == 0
- || strcasecmp (opcode, "jc") == 0
- || strcasecmp (opcode, "je") == 0
- || strcasecmp (opcode, "jg") == 0
- || strcasecmp (opcode, "jge") == 0
- || strcasecmp (opcode, "jl") == 0
- || strcasecmp (opcode, "jle") == 0
- || strcasecmp (opcode, "jna") == 0
- || strcasecmp (opcode, "jnae") == 0
- || strcasecmp (opcode, "jnb") == 0
- || strcasecmp (opcode, "jnc") == 0
- || strcasecmp (opcode, "jne") == 0
- || strcasecmp (opcode, "jng") == 0
- || strcasecmp (opcode, "jnge") == 0
- || strcasecmp (opcode, "jnl") == 0
- || strcasecmp (opcode, "jnle") == 0
- || strcasecmp (opcode, "jno") == 0
- || strcasecmp (opcode, "jnp") == 0
- || strcasecmp (opcode, "jns") == 0
- || strcasecmp (opcode, "jnz") == 0
- || strcasecmp (opcode, "jo") == 0
- || strcasecmp (opcode, "jp") == 0
- || strcasecmp (opcode, "jpe") == 0
- || strcasecmp (opcode, "jpo") == 0
- || strcasecmp (opcode, "js") == 0
- || strcasecmp (opcode, "jz") == 0
- || strcasecmp (opcode, "ldmxcsr") == 0
- || strcasecmp (opcode, "lgdt") == 0
- || strcasecmp (opcode, "lidt") == 0
- || strcasecmp (opcode, "lldt") == 0
- || strcasecmp (opcode, "lmsw") == 0
- || strcasecmp (opcode, "ltr") == 0
- || strcasecmp (opcode, "movapd") == 0
- || strcasecmp (opcode, "movaps") == 0
- || strcasecmp (opcode, "movd") == 0
- || strcasecmp (opcode, "movhpd") == 0
- || strcasecmp (opcode, "movhps") == 0
- || strcasecmp (opcode, "movlpd") == 0
- || strcasecmp (opcode, "movlps") == 0
- || strcasecmp (opcode, "movntdq") == 0
- || strcasecmp (opcode, "movntpd") == 0
- || strcasecmp (opcode, "movntps") == 0
- || strcasecmp (opcode, "movntq") == 0
- || strcasecmp (opcode, "movq") == 0
- || strcasecmp (opcode, "movsd") == 0
- || strcasecmp (opcode, "movss") == 0
- || strcasecmp (opcode, "movupd") == 0
- || strcasecmp (opcode, "movups") == 0
- || strcasecmp (opcode, "out") == 0
- || strcasecmp (opcode, "prefetchnta") == 0
- || strcasecmp (opcode, "prefetcht0") == 0
- || strcasecmp (opcode, "prefetcht1") == 0
- || strcasecmp (opcode, "prefetcht2") == 0
- || strcasecmp (opcode, "seta") == 0
- || strcasecmp (opcode, "setae") == 0
- || strcasecmp (opcode, "setb") == 0
- || strcasecmp (opcode, "setbe") == 0
- || strcasecmp (opcode, "setc") == 0
- || strcasecmp (opcode, "sete") == 0
- || strcasecmp (opcode, "setg") == 0
- || strcasecmp (opcode, "setge") == 0
- || strcasecmp (opcode, "setl") == 0
- || strcasecmp (opcode, "setle") == 0
- || strcasecmp (opcode, "setna") == 0
- || strcasecmp (opcode, "setnae") == 0
- || strcasecmp (opcode, "setnb") == 0
- || strcasecmp (opcode, "setnbe") == 0
- || strcasecmp (opcode, "setnc") == 0
- || strcasecmp (opcode, "setne") == 0
- || strcasecmp (opcode, "setng") == 0
- || strcasecmp (opcode, "setnge") == 0
- || strcasecmp (opcode, "setnl") == 0
- || strcasecmp (opcode, "setnle") == 0
- || strcasecmp (opcode, "setno") == 0
- || strcasecmp (opcode, "setnp") == 0
- || strcasecmp (opcode, "setns") == 0
- || strcasecmp (opcode, "setnz") == 0
- || strcasecmp (opcode, "seto") == 0
- || strcasecmp (opcode, "setp") == 0
- || strcasecmp (opcode, "setpe") == 0
- || strcasecmp (opcode, "setpo") == 0
- || strcasecmp (opcode, "sets") == 0
- || strcasecmp (opcode, "setz") == 0
- || strcasecmp (opcode, "sldt") == 0
- || strcasecmp (opcode, "smsw") == 0
- || strcasecmp (opcode, "stmxcsr") == 0
- || strcasecmp (opcode, "str") == 0
- || strcasecmp (opcode, "xlat") == 0)
- e->mod[0] = 0;
- else if (strcasecmp (opcode, "lea") == 0
- || strcasecmp (opcode, "rcl") == 0
- || strcasecmp (opcode, "rcr") == 0
- || strcasecmp (opcode, "rol") == 0
- || strcasecmp (opcode, "ror") == 0
- || strcasecmp (opcode, "sal") == 0
- || strcasecmp (opcode, "sar") == 0
- || strcasecmp (opcode, "shl") == 0
- || strcasecmp (opcode, "shr") == 0)
- e->mod[1] = 0;
-
- if ((argnum == 1 && e->mod[0])
- || (argnum == 2 && e->mod[0]
- && (e->mod[0] == e->mod[1]
- || e->mod[1] == 0)))
- {
- sprintf (buf, "%s%c", opcode, e->mod[0]);
- *opcode_p = buf;
- }
- else if (argnum == 2 && e->mod[0] && e->mod[1])
- {
- sprintf (buf, "%s%c%c", opcode, e->mod[1], e->mod[0]);
- *opcode_p = buf;
- }
-
- return args;
-}
-
-/* Character used to seperate the prefix words. */
-/* See radr://4141844 for the enhancement to make this uniformly ' '. */
-#define IASM_PREFIX_SEP '/'
-
-void
-iasm_x86_print_prefix (char *buf, tree prefix_list)
-{
- buf += strlen (buf);
- while (prefix_list)
- {
- tree prefix = TREE_VALUE (prefix_list);
- size_t len = IDENTIFIER_LENGTH (prefix);
- memcpy (buf, IDENTIFIER_POINTER (prefix), len);
- buf += len;
- buf[0] = IASM_PREFIX_SEP;
- ++buf;
- buf[0] = 0;
- prefix_list = TREE_CHAIN (prefix_list);
- }
-}
-
-/* Warn when a variables address is used to form a memory address when
- that address will use an extra register during reload. */
-
-static void
-iasm_warn_extra_reg (tree arg)
-{
- if (TREE_CODE (arg) == ADDR_EXPR
- && (TREE_CODE (TREE_OPERAND (arg, 0)) == VAR_DECL
- || TREE_CODE (TREE_OPERAND (arg, 0)) == PARM_DECL))
- warning (0, "addressing mode too complex, will consume an extra register");
-}
-
-bool
-iasm_print_op (char *buf, tree arg, unsigned argnum, tree *uses,
- bool must_be_reg, bool must_not_be_reg, void *ep)
-{
- iasm_md_extra_info *e = ep;
- switch (TREE_CODE (arg))
- {
- case BRACKET_EXPR:
- {
- tree op1 = TREE_OPERAND (arg, 0);
- tree op2 = TREE_OPERAND (arg, 1);
- tree op0 = NULL_TREE, op3 = NULL_TREE;
- tree scale = NULL_TREE;
-
- if (op2 == NULL_TREE
- && TREE_TYPE (op1)
- && POINTER_TYPE_P (TREE_TYPE (op1)))
- {
- /* Let the normal operand printer output this without trying to
- decompose it into parts so that things like (%esp + 20) + 4
- can be output as 24(%esp) by the optimizer instead of 4(%0)
- and burning an "R" with (%esp + 20). */
- iasm_force_constraint ("m", e);
- iasm_get_register_var (op1, "", buf, argnum, must_be_reg, e);
- iasm_force_constraint (0, e);
- break;
- }
-
- if (op2
- && TREE_CODE (op2) == BRACKET_EXPR)
- {
- op3 = TREE_OPERAND (op2, 1);
- op2 = TREE_OPERAND (op2, 0);
- if (TREE_CODE (op2) == BRACKET_EXPR)
- {
- op0 = TREE_OPERAND (op2, 1);
- op2 = TREE_OPERAND (op2, 0);
- }
- }
- if (op0)
- return false;
-
- if (ASSEMBLER_DIALECT == ASM_INTEL)
- strcat (buf, "[");
-
- if (op3 == NULL_TREE
- && op2 && TREE_CODE (op2) == PLUS_EXPR)
- {
- op3 = TREE_OPERAND (op2, 0);
- op2 = TREE_OPERAND (op2, 1);
- }
- if (op2 && TREE_CODE (op2) == MULT_EXPR)
- {
- tree t;
- t = op3;
- op3 = op2;
- op2 = t;
- }
-
- /* Crack out the scaling, if any. */
- if (ASSEMBLER_DIALECT == ASM_ATT
- && op3
- && TREE_CODE (op3) == MULT_EXPR)
- {
- if (TREE_CODE (TREE_OPERAND (op3, 1)) == INTEGER_CST)
- {
- scale = TREE_OPERAND (op3, 1);
- op3 = TREE_OPERAND (op3, 0);
- }
- else if (TREE_CODE (TREE_OPERAND (op3, 0)) == INTEGER_CST)
- {
- scale = TREE_OPERAND (op3, 0);
- op3 = TREE_OPERAND (op3, 1);
- }
- }
-
- /* Complicated expression as JMP or CALL target. */
- if (e->modifier && strcmp(e->modifier, "A") == 0)
- {
- strcat (buf, "*");
- e->modifier = 0;
- }
- e->as_immediate = true;
- iasm_print_operand (buf, op1, argnum, uses,
- must_be_reg, must_not_be_reg, e);
- e->as_immediate = false;
-
- /* Just an immediate. */
- if (op2 == NULL_TREE && op3 == NULL_TREE)
- break;
-
- if (ASSEMBLER_DIALECT == ASM_INTEL)
- strcat (buf, "]");
- if (ASSEMBLER_DIALECT == ASM_INTEL)
- strcat (buf, "[");
- else
- strcat (buf, "(");
-
- if (op2)
- {
- /* We know by context, this has to be an R. */
- iasm_force_constraint ("R", e);
- iasm_warn_extra_reg (op2);
- iasm_print_operand (buf, op2, argnum, uses,
- must_be_reg, must_not_be_reg, e);
- iasm_force_constraint (0, e);
- }
- if (op3)
- {
- if (ASSEMBLER_DIALECT == ASM_INTEL)
- strcat (buf, "][");
- else
- strcat (buf, ",");
-
- /* We know by context, this has to be an l. */
- iasm_force_constraint ("l", e);
- iasm_warn_extra_reg (op3);
- iasm_print_operand (buf, op3, argnum, uses,
- must_be_reg, must_not_be_reg, e);
- iasm_force_constraint (0, e);
- if (scale)
- {
- strcat (buf, ",");
- e->as_immediate = true;
- iasm_print_operand (buf, scale, argnum, uses,
- must_be_reg, must_not_be_reg, e);
- e->as_immediate = false;
- }
- }
- if (ASSEMBLER_DIALECT == ASM_INTEL)
- strcat (buf, "]");
- else
- strcat (buf, ")");
- }
- break;
-
- case ADDR_EXPR:
- if ((TREE_CODE (TREE_OPERAND (arg, 0)) == ARRAY_REF
- || TREE_CODE (TREE_OPERAND (arg, 0)) == VAR_DECL)
- && ! e->as_immediate)
- {
- iasm_get_register_var (arg, "", buf, argnum, must_be_reg, e);
- break;
- }
- if (! e->as_immediate)
- e->as_offset = true;
- iasm_print_operand (buf, TREE_OPERAND (arg, 0), argnum, uses,
- must_be_reg, must_not_be_reg, e);
- e->as_offset = false;
- break;
-
- case MULT_EXPR:
- iasm_print_operand (buf, TREE_OPERAND (arg, 0), argnum, uses,
- must_be_reg, must_not_be_reg, e);
- strcat (buf, "*");
- iasm_print_operand (buf, TREE_OPERAND (arg, 1), argnum, uses,
- must_be_reg, must_not_be_reg, e);
- break;
- default:
- return false;
- }
- return true;
-}
-/* APPLE LOCAL end CW asm blocks */
-
-/* Return the mangling of TYPE if it is an extended fundamental type. */
-
-static const char *
-/* APPLE LOCAL mangle_type 7105099 */
-ix86_mangle_type (tree type)
-{
- /* APPLE LOCAL begin mangle_type 7105099 */
- type = TYPE_MAIN_VARIANT (type);
-
- if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
- && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
- return NULL;
-
- /* APPLE LOCAL end mangle_type 7105099 */
- switch (TYPE_MODE (type))
- {
- case TFmode:
- /* __float128 is "g". */
- return "g";
- case XFmode:
- /* "long double" or __float80 is "e". */
- return "e";
- default:
- return NULL;
- }
-}
-
-/* For 32-bit code we can save PIC register setup by using
- __stack_chk_fail_local hidden function instead of calling
- __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
- register, so it is better to call __stack_chk_fail directly. */
-
-static tree
-ix86_stack_protect_fail (void)
-{
- return TARGET_64BIT
- ? default_external_stack_protect_fail ()
- : default_hidden_stack_protect_fail ();
-}
-
-/* Select a format to encode pointers in exception handling data. CODE
- is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
- true if the symbol may be affected by dynamic relocations.
-
- ??? All x86 object file formats are capable of representing this.
- After all, the relocation needed is the same as for the call insn.
- Whether or not a particular assembler allows us to enter such, I
- guess we'll have to see. */
-int
-asm_preferred_eh_data_format (int code, int global)
-{
- if (flag_pic)
- {
- int type = DW_EH_PE_sdata8;
- if (!TARGET_64BIT
- || ix86_cmodel == CM_SMALL_PIC
- || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
- type = DW_EH_PE_sdata4;
- return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
- }
- if (ix86_cmodel == CM_SMALL
- || (ix86_cmodel == CM_MEDIUM && code))
- return DW_EH_PE_udata4;
- return DW_EH_PE_absptr;
-}
-
-#include "gt-i386.h"