diff options
author | Dan Albert <danalbert@google.com> | 2015-06-17 11:09:54 -0700 |
---|---|---|
committer | Dan Albert <danalbert@google.com> | 2015-06-17 14:15:22 -0700 |
commit | f378ebf14df0952eae870c9865bab8326aa8f137 (patch) | |
tree | 31794503eb2a8c64ea5f313b93100f1163afcffb /gcc-4.4.3/gcc/config/i386/i386.c | |
parent | 2c58169824949d3a597d9fa81931e001ef9b1bd0 (diff) | |
download | toolchain_gcc-f378ebf14df0952eae870c9865bab8326aa8f137.tar.gz toolchain_gcc-f378ebf14df0952eae870c9865bab8326aa8f137.tar.bz2 toolchain_gcc-f378ebf14df0952eae870c9865bab8326aa8f137.zip |
Delete old versions of GCC.
Change-Id: I710f125d905290e1024cbd67f48299861790c66c
Diffstat (limited to 'gcc-4.4.3/gcc/config/i386/i386.c')
-rw-r--r-- | gcc-4.4.3/gcc/config/i386/i386.c | 30965 |
1 files changed, 0 insertions, 30965 deletions
diff --git a/gcc-4.4.3/gcc/config/i386/i386.c b/gcc-4.4.3/gcc/config/i386/i386.c deleted file mode 100644 index 2ca822040..000000000 --- a/gcc-4.4.3/gcc/config/i386/i386.c +++ /dev/null @@ -1,30965 +0,0 @@ -/* Subroutines used for code generation on IA-32. - Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, - 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 - Free Software Foundation, Inc. - -This file is part of GCC. - -GCC is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 3, or (at your option) -any later version. - -GCC is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with GCC; see the file COPYING3. If not see -<http://www.gnu.org/licenses/>. */ - -#include "config.h" -#include "system.h" -#include "coretypes.h" -#include "tm.h" -#include "rtl.h" -#include "tree.h" -#include "tm_p.h" -#include "regs.h" -#include "hard-reg-set.h" -#include "real.h" -#include "insn-config.h" -#include "conditions.h" -#include "output.h" -#include "insn-codes.h" -#include "insn-attr.h" -#include "flags.h" -#include "c-common.h" -#include "except.h" -#include "function.h" -#include "recog.h" -#include "expr.h" -#include "optabs.h" -#include "toplev.h" -#include "basic-block.h" -#include "ggc.h" -#include "target.h" -#include "target-def.h" -#include "langhooks.h" -#include "cgraph.h" -#include "gimple.h" -#include "dwarf2.h" -#include "df.h" -#include "tm-constrs.h" -#include "params.h" -#include "cselib.h" - -static int x86_builtin_vectorization_cost (bool); -static rtx legitimize_dllimport_symbol (rtx, bool); - -#ifndef CHECK_STACK_LIMIT -#define CHECK_STACK_LIMIT (0x1000) -#endif - -/* Return index of given mode in mult and division cost tables. */ -#define MODE_INDEX(mode) \ - ((mode) == QImode ? 0 \ - : (mode) == HImode ? 1 \ - : (mode) == SImode ? 2 \ - : (mode) == DImode ? 3 \ - : 4) - -/* Processor costs (relative to an add) */ -/* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */ -#define COSTS_N_BYTES(N) ((N) * 2) - -#define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}} - -const -struct processor_costs ix86_size_cost = {/* costs for tuning for size */ - COSTS_N_BYTES (2), /* cost of an add instruction */ - COSTS_N_BYTES (3), /* cost of a lea instruction */ - COSTS_N_BYTES (2), /* variable shift costs */ - COSTS_N_BYTES (3), /* constant shift costs */ - {COSTS_N_BYTES (3), /* cost of starting multiply for QI */ - COSTS_N_BYTES (3), /* HI */ - COSTS_N_BYTES (3), /* SI */ - COSTS_N_BYTES (3), /* DI */ - COSTS_N_BYTES (5)}, /* other */ - 0, /* cost of multiply per each bit set */ - {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */ - COSTS_N_BYTES (3), /* HI */ - COSTS_N_BYTES (3), /* SI */ - COSTS_N_BYTES (3), /* DI */ - COSTS_N_BYTES (5)}, /* other */ - COSTS_N_BYTES (3), /* cost of movsx */ - COSTS_N_BYTES (3), /* cost of movzx */ - 0, /* "large" insn */ - 2, /* MOVE_RATIO */ - 2, /* cost for loading QImode using movzbl */ - {2, 2, 2}, /* cost of loading integer registers - in QImode, HImode and SImode. - Relative to reg-reg move (2). */ - {2, 2, 2}, /* cost of storing integer registers */ - 2, /* cost of reg,reg fld/fst */ - {2, 2, 2}, /* cost of loading fp registers - in SFmode, DFmode and XFmode */ - {2, 2, 2}, /* cost of storing fp registers - in SFmode, DFmode and XFmode */ - 3, /* cost of moving MMX register */ - {3, 3}, /* cost of loading MMX registers - in SImode and DImode */ - {3, 3}, /* cost of storing MMX registers - in SImode and DImode */ - 3, /* cost of moving SSE register */ - {3, 3, 3}, /* cost of loading SSE registers - in SImode, DImode and TImode */ - {3, 3, 3}, /* cost of storing SSE registers - in SImode, DImode and TImode */ - 3, /* MMX or SSE register to integer */ - 0, /* size of l1 cache */ - 0, /* size of l2 cache */ - 0, /* size of prefetch block */ - 0, /* number of parallel prefetches */ - 2, /* Branch cost */ - COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */ - COSTS_N_BYTES (2), /* cost of FMUL instruction. */ - COSTS_N_BYTES (2), /* cost of FDIV instruction. */ - COSTS_N_BYTES (2), /* cost of FABS instruction. */ - COSTS_N_BYTES (2), /* cost of FCHS instruction. */ - COSTS_N_BYTES (2), /* cost of FSQRT instruction. */ - {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}, - {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}}, - {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}, - {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}}, - 1, /* scalar_stmt_cost. */ - 1, /* scalar load_cost. */ - 1, /* scalar_store_cost. */ - 1, /* vec_stmt_cost. */ - 1, /* vec_to_scalar_cost. */ - 1, /* scalar_to_vec_cost. */ - 1, /* vec_align_load_cost. */ - 1, /* vec_unalign_load_cost. */ - 1, /* vec_store_cost. */ - 1, /* cond_taken_branch_cost. */ - 1, /* cond_not_taken_branch_cost. */ -}; - -/* Processor costs (relative to an add) */ -static const -struct processor_costs i386_cost = { /* 386 specific costs */ - COSTS_N_INSNS (1), /* cost of an add instruction */ - COSTS_N_INSNS (1), /* cost of a lea instruction */ - COSTS_N_INSNS (3), /* variable shift costs */ - COSTS_N_INSNS (2), /* constant shift costs */ - {COSTS_N_INSNS (6), /* cost of starting multiply for QI */ - COSTS_N_INSNS (6), /* HI */ - COSTS_N_INSNS (6), /* SI */ - COSTS_N_INSNS (6), /* DI */ - COSTS_N_INSNS (6)}, /* other */ - COSTS_N_INSNS (1), /* cost of multiply per each bit set */ - {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */ - COSTS_N_INSNS (23), /* HI */ - COSTS_N_INSNS (23), /* SI */ - COSTS_N_INSNS (23), /* DI */ - COSTS_N_INSNS (23)}, /* other */ - COSTS_N_INSNS (3), /* cost of movsx */ - COSTS_N_INSNS (2), /* cost of movzx */ - 15, /* "large" insn */ - 3, /* MOVE_RATIO */ - 4, /* cost for loading QImode using movzbl */ - {2, 4, 2}, /* cost of loading integer registers - in QImode, HImode and SImode. - Relative to reg-reg move (2). */ - {2, 4, 2}, /* cost of storing integer registers */ - 2, /* cost of reg,reg fld/fst */ - {8, 8, 8}, /* cost of loading fp registers - in SFmode, DFmode and XFmode */ - {8, 8, 8}, /* cost of storing fp registers - in SFmode, DFmode and XFmode */ - 2, /* cost of moving MMX register */ - {4, 8}, /* cost of loading MMX registers - in SImode and DImode */ - {4, 8}, /* cost of storing MMX registers - in SImode and DImode */ - 2, /* cost of moving SSE register */ - {4, 8, 16}, /* cost of loading SSE registers - in SImode, DImode and TImode */ - {4, 8, 16}, /* cost of storing SSE registers - in SImode, DImode and TImode */ - 3, /* MMX or SSE register to integer */ - 0, /* size of l1 cache */ - 0, /* size of l2 cache */ - 0, /* size of prefetch block */ - 0, /* number of parallel prefetches */ - 1, /* Branch cost */ - COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */ - COSTS_N_INSNS (27), /* cost of FMUL instruction. */ - COSTS_N_INSNS (88), /* cost of FDIV instruction. */ - COSTS_N_INSNS (22), /* cost of FABS instruction. */ - COSTS_N_INSNS (24), /* cost of FCHS instruction. */ - COSTS_N_INSNS (122), /* cost of FSQRT instruction. */ - {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}, - DUMMY_STRINGOP_ALGS}, - {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}, - DUMMY_STRINGOP_ALGS}, - 1, /* scalar_stmt_cost. */ - 1, /* scalar load_cost. */ - 1, /* scalar_store_cost. */ - 1, /* vec_stmt_cost. */ - 1, /* vec_to_scalar_cost. */ - 1, /* scalar_to_vec_cost. */ - 1, /* vec_align_load_cost. */ - 2, /* vec_unalign_load_cost. */ - 1, /* vec_store_cost. */ - 3, /* cond_taken_branch_cost. */ - 1, /* cond_not_taken_branch_cost. */ -}; - -static const -struct processor_costs i486_cost = { /* 486 specific costs */ - COSTS_N_INSNS (1), /* cost of an add instruction */ - COSTS_N_INSNS (1), /* cost of a lea instruction */ - COSTS_N_INSNS (3), /* variable shift costs */ - COSTS_N_INSNS (2), /* constant shift costs */ - {COSTS_N_INSNS (12), /* cost of starting multiply for QI */ - COSTS_N_INSNS (12), /* HI */ - COSTS_N_INSNS (12), /* SI */ - COSTS_N_INSNS (12), /* DI */ - COSTS_N_INSNS (12)}, /* other */ - 1, /* cost of multiply per each bit set */ - {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */ - COSTS_N_INSNS (40), /* HI */ - COSTS_N_INSNS (40), /* SI */ - COSTS_N_INSNS (40), /* DI */ - COSTS_N_INSNS (40)}, /* other */ - COSTS_N_INSNS (3), /* cost of movsx */ - COSTS_N_INSNS (2), /* cost of movzx */ - 15, /* "large" insn */ - 3, /* MOVE_RATIO */ - 4, /* cost for loading QImode using movzbl */ - {2, 4, 2}, /* cost of loading integer registers - in QImode, HImode and SImode. - Relative to reg-reg move (2). */ - {2, 4, 2}, /* cost of storing integer registers */ - 2, /* cost of reg,reg fld/fst */ - {8, 8, 8}, /* cost of loading fp registers - in SFmode, DFmode and XFmode */ - {8, 8, 8}, /* cost of storing fp registers - in SFmode, DFmode and XFmode */ - 2, /* cost of moving MMX register */ - {4, 8}, /* cost of loading MMX registers - in SImode and DImode */ - {4, 8}, /* cost of storing MMX registers - in SImode and DImode */ - 2, /* cost of moving SSE register */ - {4, 8, 16}, /* cost of loading SSE registers - in SImode, DImode and TImode */ - {4, 8, 16}, /* cost of storing SSE registers - in SImode, DImode and TImode */ - 3, /* MMX or SSE register to integer */ - 4, /* size of l1 cache. 486 has 8kB cache - shared for code and data, so 4kB is - not really precise. */ - 4, /* size of l2 cache */ - 0, /* size of prefetch block */ - 0, /* number of parallel prefetches */ - 1, /* Branch cost */ - COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */ - COSTS_N_INSNS (16), /* cost of FMUL instruction. */ - COSTS_N_INSNS (73), /* cost of FDIV instruction. */ - COSTS_N_INSNS (3), /* cost of FABS instruction. */ - COSTS_N_INSNS (3), /* cost of FCHS instruction. */ - COSTS_N_INSNS (83), /* cost of FSQRT instruction. */ - {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}}, - DUMMY_STRINGOP_ALGS}, - {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}}, - DUMMY_STRINGOP_ALGS}, - 1, /* scalar_stmt_cost. */ - 1, /* scalar load_cost. */ - 1, /* scalar_store_cost. */ - 1, /* vec_stmt_cost. */ - 1, /* vec_to_scalar_cost. */ - 1, /* scalar_to_vec_cost. */ - 1, /* vec_align_load_cost. */ - 2, /* vec_unalign_load_cost. */ - 1, /* vec_store_cost. */ - 3, /* cond_taken_branch_cost. */ - 1, /* cond_not_taken_branch_cost. */ -}; - -static const -struct processor_costs pentium_cost = { - COSTS_N_INSNS (1), /* cost of an add instruction */ - COSTS_N_INSNS (1), /* cost of a lea instruction */ - COSTS_N_INSNS (4), /* variable shift costs */ - COSTS_N_INSNS (1), /* constant shift costs */ - {COSTS_N_INSNS (11), /* cost of starting multiply for QI */ - COSTS_N_INSNS (11), /* HI */ - COSTS_N_INSNS (11), /* SI */ - COSTS_N_INSNS (11), /* DI */ - COSTS_N_INSNS (11)}, /* other */ - 0, /* cost of multiply per each bit set */ - {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */ - COSTS_N_INSNS (25), /* HI */ - COSTS_N_INSNS (25), /* SI */ - COSTS_N_INSNS (25), /* DI */ - COSTS_N_INSNS (25)}, /* other */ - COSTS_N_INSNS (3), /* cost of movsx */ - COSTS_N_INSNS (2), /* cost of movzx */ - 8, /* "large" insn */ - 6, /* MOVE_RATIO */ - 6, /* cost for loading QImode using movzbl */ - {2, 4, 2}, /* cost of loading integer registers - in QImode, HImode and SImode. - Relative to reg-reg move (2). */ - {2, 4, 2}, /* cost of storing integer registers */ - 2, /* cost of reg,reg fld/fst */ - {2, 2, 6}, /* cost of loading fp registers - in SFmode, DFmode and XFmode */ - {4, 4, 6}, /* cost of storing fp registers - in SFmode, DFmode and XFmode */ - 8, /* cost of moving MMX register */ - {8, 8}, /* cost of loading MMX registers - in SImode and DImode */ - {8, 8}, /* cost of storing MMX registers - in SImode and DImode */ - 2, /* cost of moving SSE register */ - {4, 8, 16}, /* cost of loading SSE registers - in SImode, DImode and TImode */ - {4, 8, 16}, /* cost of storing SSE registers - in SImode, DImode and TImode */ - 3, /* MMX or SSE register to integer */ - 8, /* size of l1 cache. */ - 8, /* size of l2 cache */ - 0, /* size of prefetch block */ - 0, /* number of parallel prefetches */ - 2, /* Branch cost */ - COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */ - COSTS_N_INSNS (3), /* cost of FMUL instruction. */ - COSTS_N_INSNS (39), /* cost of FDIV instruction. */ - COSTS_N_INSNS (1), /* cost of FABS instruction. */ - COSTS_N_INSNS (1), /* cost of FCHS instruction. */ - COSTS_N_INSNS (70), /* cost of FSQRT instruction. */ - {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}}, - DUMMY_STRINGOP_ALGS}, - {{libcall, {{-1, rep_prefix_4_byte}}}, - DUMMY_STRINGOP_ALGS}, - 1, /* scalar_stmt_cost. */ - 1, /* scalar load_cost. */ - 1, /* scalar_store_cost. */ - 1, /* vec_stmt_cost. */ - 1, /* vec_to_scalar_cost. */ - 1, /* scalar_to_vec_cost. */ - 1, /* vec_align_load_cost. */ - 2, /* vec_unalign_load_cost. */ - 1, /* vec_store_cost. */ - 3, /* cond_taken_branch_cost. */ - 1, /* cond_not_taken_branch_cost. */ -}; - -static const -struct processor_costs pentiumpro_cost = { - COSTS_N_INSNS (1), /* cost of an add instruction */ - COSTS_N_INSNS (1), /* cost of a lea instruction */ - COSTS_N_INSNS (1), /* variable shift costs */ - COSTS_N_INSNS (1), /* constant shift costs */ - {COSTS_N_INSNS (4), /* cost of starting multiply for QI */ - COSTS_N_INSNS (4), /* HI */ - COSTS_N_INSNS (4), /* SI */ - COSTS_N_INSNS (4), /* DI */ - COSTS_N_INSNS (4)}, /* other */ - 0, /* cost of multiply per each bit set */ - {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */ - COSTS_N_INSNS (17), /* HI */ - COSTS_N_INSNS (17), /* SI */ - COSTS_N_INSNS (17), /* DI */ - COSTS_N_INSNS (17)}, /* other */ - COSTS_N_INSNS (1), /* cost of movsx */ - COSTS_N_INSNS (1), /* cost of movzx */ - 8, /* "large" insn */ - 6, /* MOVE_RATIO */ - 2, /* cost for loading QImode using movzbl */ - {4, 4, 4}, /* cost of loading integer registers - in QImode, HImode and SImode. - Relative to reg-reg move (2). */ - {2, 2, 2}, /* cost of storing integer registers */ - 2, /* cost of reg,reg fld/fst */ - {2, 2, 6}, /* cost of loading fp registers - in SFmode, DFmode and XFmode */ - {4, 4, 6}, /* cost of storing fp registers - in SFmode, DFmode and XFmode */ - 2, /* cost of moving MMX register */ - {2, 2}, /* cost of loading MMX registers - in SImode and DImode */ - {2, 2}, /* cost of storing MMX registers - in SImode and DImode */ - 2, /* cost of moving SSE register */ - {2, 2, 8}, /* cost of loading SSE registers - in SImode, DImode and TImode */ - {2, 2, 8}, /* cost of storing SSE registers - in SImode, DImode and TImode */ - 3, /* MMX or SSE register to integer */ - 8, /* size of l1 cache. */ - 256, /* size of l2 cache */ - 32, /* size of prefetch block */ - 6, /* number of parallel prefetches */ - 2, /* Branch cost */ - COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */ - COSTS_N_INSNS (5), /* cost of FMUL instruction. */ - COSTS_N_INSNS (56), /* cost of FDIV instruction. */ - COSTS_N_INSNS (2), /* cost of FABS instruction. */ - COSTS_N_INSNS (2), /* cost of FCHS instruction. */ - COSTS_N_INSNS (56), /* cost of FSQRT instruction. */ - /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure - the alignment). For small blocks inline loop is still a noticeable win, for bigger - blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently - more expensive startup time in CPU, but after 4K the difference is down in the noise. - */ - {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop}, - {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}}, - DUMMY_STRINGOP_ALGS}, - {{rep_prefix_4_byte, {{1024, unrolled_loop}, - {8192, rep_prefix_4_byte}, {-1, libcall}}}, - DUMMY_STRINGOP_ALGS}, - 1, /* scalar_stmt_cost. */ - 1, /* scalar load_cost. */ - 1, /* scalar_store_cost. */ - 1, /* vec_stmt_cost. */ - 1, /* vec_to_scalar_cost. */ - 1, /* scalar_to_vec_cost. */ - 1, /* vec_align_load_cost. */ - 2, /* vec_unalign_load_cost. */ - 1, /* vec_store_cost. */ - 3, /* cond_taken_branch_cost. */ - 1, /* cond_not_taken_branch_cost. */ -}; - -static const -struct processor_costs geode_cost = { - COSTS_N_INSNS (1), /* cost of an add instruction */ - COSTS_N_INSNS (1), /* cost of a lea instruction */ - COSTS_N_INSNS (2), /* variable shift costs */ - COSTS_N_INSNS (1), /* constant shift costs */ - {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ - COSTS_N_INSNS (4), /* HI */ - COSTS_N_INSNS (7), /* SI */ - COSTS_N_INSNS (7), /* DI */ - COSTS_N_INSNS (7)}, /* other */ - 0, /* cost of multiply per each bit set */ - {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */ - COSTS_N_INSNS (23), /* HI */ - COSTS_N_INSNS (39), /* SI */ - COSTS_N_INSNS (39), /* DI */ - COSTS_N_INSNS (39)}, /* other */ - COSTS_N_INSNS (1), /* cost of movsx */ - COSTS_N_INSNS (1), /* cost of movzx */ - 8, /* "large" insn */ - 4, /* MOVE_RATIO */ - 1, /* cost for loading QImode using movzbl */ - {1, 1, 1}, /* cost of loading integer registers - in QImode, HImode and SImode. - Relative to reg-reg move (2). */ - {1, 1, 1}, /* cost of storing integer registers */ - 1, /* cost of reg,reg fld/fst */ - {1, 1, 1}, /* cost of loading fp registers - in SFmode, DFmode and XFmode */ - {4, 6, 6}, /* cost of storing fp registers - in SFmode, DFmode and XFmode */ - - 1, /* cost of moving MMX register */ - {1, 1}, /* cost of loading MMX registers - in SImode and DImode */ - {1, 1}, /* cost of storing MMX registers - in SImode and DImode */ - 1, /* cost of moving SSE register */ - {1, 1, 1}, /* cost of loading SSE registers - in SImode, DImode and TImode */ - {1, 1, 1}, /* cost of storing SSE registers - in SImode, DImode and TImode */ - 1, /* MMX or SSE register to integer */ - 64, /* size of l1 cache. */ - 128, /* size of l2 cache. */ - 32, /* size of prefetch block */ - 1, /* number of parallel prefetches */ - 1, /* Branch cost */ - COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */ - COSTS_N_INSNS (11), /* cost of FMUL instruction. */ - COSTS_N_INSNS (47), /* cost of FDIV instruction. */ - COSTS_N_INSNS (1), /* cost of FABS instruction. */ - COSTS_N_INSNS (1), /* cost of FCHS instruction. */ - COSTS_N_INSNS (54), /* cost of FSQRT instruction. */ - {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}}, - DUMMY_STRINGOP_ALGS}, - {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}}, - DUMMY_STRINGOP_ALGS}, - 1, /* scalar_stmt_cost. */ - 1, /* scalar load_cost. */ - 1, /* scalar_store_cost. */ - 1, /* vec_stmt_cost. */ - 1, /* vec_to_scalar_cost. */ - 1, /* scalar_to_vec_cost. */ - 1, /* vec_align_load_cost. */ - 2, /* vec_unalign_load_cost. */ - 1, /* vec_store_cost. */ - 3, /* cond_taken_branch_cost. */ - 1, /* cond_not_taken_branch_cost. */ -}; - -static const -struct processor_costs k6_cost = { - COSTS_N_INSNS (1), /* cost of an add instruction */ - COSTS_N_INSNS (2), /* cost of a lea instruction */ - COSTS_N_INSNS (1), /* variable shift costs */ - COSTS_N_INSNS (1), /* constant shift costs */ - {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ - COSTS_N_INSNS (3), /* HI */ - COSTS_N_INSNS (3), /* SI */ - COSTS_N_INSNS (3), /* DI */ - COSTS_N_INSNS (3)}, /* other */ - 0, /* cost of multiply per each bit set */ - {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ - COSTS_N_INSNS (18), /* HI */ - COSTS_N_INSNS (18), /* SI */ - COSTS_N_INSNS (18), /* DI */ - COSTS_N_INSNS (18)}, /* other */ - COSTS_N_INSNS (2), /* cost of movsx */ - COSTS_N_INSNS (2), /* cost of movzx */ - 8, /* "large" insn */ - 4, /* MOVE_RATIO */ - 3, /* cost for loading QImode using movzbl */ - {4, 5, 4}, /* cost of loading integer registers - in QImode, HImode and SImode. - Relative to reg-reg move (2). */ - {2, 3, 2}, /* cost of storing integer registers */ - 4, /* cost of reg,reg fld/fst */ - {6, 6, 6}, /* cost of loading fp registers - in SFmode, DFmode and XFmode */ - {4, 4, 4}, /* cost of storing fp registers - in SFmode, DFmode and XFmode */ - 2, /* cost of moving MMX register */ - {2, 2}, /* cost of loading MMX registers - in SImode and DImode */ - {2, 2}, /* cost of storing MMX registers - in SImode and DImode */ - 2, /* cost of moving SSE register */ - {2, 2, 8}, /* cost of loading SSE registers - in SImode, DImode and TImode */ - {2, 2, 8}, /* cost of storing SSE registers - in SImode, DImode and TImode */ - 6, /* MMX or SSE register to integer */ - 32, /* size of l1 cache. */ - 32, /* size of l2 cache. Some models - have integrated l2 cache, but - optimizing for k6 is not important - enough to worry about that. */ - 32, /* size of prefetch block */ - 1, /* number of parallel prefetches */ - 1, /* Branch cost */ - COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */ - COSTS_N_INSNS (2), /* cost of FMUL instruction. */ - COSTS_N_INSNS (56), /* cost of FDIV instruction. */ - COSTS_N_INSNS (2), /* cost of FABS instruction. */ - COSTS_N_INSNS (2), /* cost of FCHS instruction. */ - COSTS_N_INSNS (56), /* cost of FSQRT instruction. */ - {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}}, - DUMMY_STRINGOP_ALGS}, - {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}}, - DUMMY_STRINGOP_ALGS}, - 1, /* scalar_stmt_cost. */ - 1, /* scalar load_cost. */ - 1, /* scalar_store_cost. */ - 1, /* vec_stmt_cost. */ - 1, /* vec_to_scalar_cost. */ - 1, /* scalar_to_vec_cost. */ - 1, /* vec_align_load_cost. */ - 2, /* vec_unalign_load_cost. */ - 1, /* vec_store_cost. */ - 3, /* cond_taken_branch_cost. */ - 1, /* cond_not_taken_branch_cost. */ -}; - -static const -struct processor_costs athlon_cost = { - COSTS_N_INSNS (1), /* cost of an add instruction */ - COSTS_N_INSNS (2), /* cost of a lea instruction */ - COSTS_N_INSNS (1), /* variable shift costs */ - COSTS_N_INSNS (1), /* constant shift costs */ - {COSTS_N_INSNS (5), /* cost of starting multiply for QI */ - COSTS_N_INSNS (5), /* HI */ - COSTS_N_INSNS (5), /* SI */ - COSTS_N_INSNS (5), /* DI */ - COSTS_N_INSNS (5)}, /* other */ - 0, /* cost of multiply per each bit set */ - {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ - COSTS_N_INSNS (26), /* HI */ - COSTS_N_INSNS (42), /* SI */ - COSTS_N_INSNS (74), /* DI */ - COSTS_N_INSNS (74)}, /* other */ - COSTS_N_INSNS (1), /* cost of movsx */ - COSTS_N_INSNS (1), /* cost of movzx */ - 8, /* "large" insn */ - 9, /* MOVE_RATIO */ - 4, /* cost for loading QImode using movzbl */ - {3, 4, 3}, /* cost of loading integer registers - in QImode, HImode and SImode. - Relative to reg-reg move (2). */ - {3, 4, 3}, /* cost of storing integer registers */ - 4, /* cost of reg,reg fld/fst */ - {4, 4, 12}, /* cost of loading fp registers - in SFmode, DFmode and XFmode */ - {6, 6, 8}, /* cost of storing fp registers - in SFmode, DFmode and XFmode */ - 2, /* cost of moving MMX register */ - {4, 4}, /* cost of loading MMX registers - in SImode and DImode */ - {4, 4}, /* cost of storing MMX registers - in SImode and DImode */ - 2, /* cost of moving SSE register */ - {4, 4, 6}, /* cost of loading SSE registers - in SImode, DImode and TImode */ - {4, 4, 5}, /* cost of storing SSE registers - in SImode, DImode and TImode */ - 5, /* MMX or SSE register to integer */ - 64, /* size of l1 cache. */ - 256, /* size of l2 cache. */ - 64, /* size of prefetch block */ - 6, /* number of parallel prefetches */ - 5, /* Branch cost */ - COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */ - COSTS_N_INSNS (4), /* cost of FMUL instruction. */ - COSTS_N_INSNS (24), /* cost of FDIV instruction. */ - COSTS_N_INSNS (2), /* cost of FABS instruction. */ - COSTS_N_INSNS (2), /* cost of FCHS instruction. */ - COSTS_N_INSNS (35), /* cost of FSQRT instruction. */ - /* For some reason, Athlon deals better with REP prefix (relative to loops) - compared to K8. Alignment becomes important after 8 bytes for memcpy and - 128 bytes for memset. */ - {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}}, - DUMMY_STRINGOP_ALGS}, - {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}}, - DUMMY_STRINGOP_ALGS}, - 1, /* scalar_stmt_cost. */ - 1, /* scalar load_cost. */ - 1, /* scalar_store_cost. */ - 1, /* vec_stmt_cost. */ - 1, /* vec_to_scalar_cost. */ - 1, /* scalar_to_vec_cost. */ - 1, /* vec_align_load_cost. */ - 2, /* vec_unalign_load_cost. */ - 1, /* vec_store_cost. */ - 3, /* cond_taken_branch_cost. */ - 1, /* cond_not_taken_branch_cost. */ -}; - -static const -struct processor_costs k8_cost = { - COSTS_N_INSNS (1), /* cost of an add instruction */ - COSTS_N_INSNS (2), /* cost of a lea instruction */ - COSTS_N_INSNS (1), /* variable shift costs */ - COSTS_N_INSNS (1), /* constant shift costs */ - {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ - COSTS_N_INSNS (4), /* HI */ - COSTS_N_INSNS (3), /* SI */ - COSTS_N_INSNS (4), /* DI */ - COSTS_N_INSNS (5)}, /* other */ - 0, /* cost of multiply per each bit set */ - {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ - COSTS_N_INSNS (26), /* HI */ - COSTS_N_INSNS (42), /* SI */ - COSTS_N_INSNS (74), /* DI */ - COSTS_N_INSNS (74)}, /* other */ - COSTS_N_INSNS (1), /* cost of movsx */ - COSTS_N_INSNS (1), /* cost of movzx */ - 8, /* "large" insn */ - 9, /* MOVE_RATIO */ - 4, /* cost for loading QImode using movzbl */ - {3, 4, 3}, /* cost of loading integer registers - in QImode, HImode and SImode. - Relative to reg-reg move (2). */ - {3, 4, 3}, /* cost of storing integer registers */ - 4, /* cost of reg,reg fld/fst */ - {4, 4, 12}, /* cost of loading fp registers - in SFmode, DFmode and XFmode */ - {6, 6, 8}, /* cost of storing fp registers - in SFmode, DFmode and XFmode */ - 2, /* cost of moving MMX register */ - {3, 3}, /* cost of loading MMX registers - in SImode and DImode */ - {4, 4}, /* cost of storing MMX registers - in SImode and DImode */ - 2, /* cost of moving SSE register */ - {4, 3, 6}, /* cost of loading SSE registers - in SImode, DImode and TImode */ - {4, 4, 5}, /* cost of storing SSE registers - in SImode, DImode and TImode */ - 5, /* MMX or SSE register to integer */ - 64, /* size of l1 cache. */ - 512, /* size of l2 cache. */ - 64, /* size of prefetch block */ - /* New AMD processors never drop prefetches; if they cannot be performed - immediately, they are queued. We set number of simultaneous prefetches - to a large constant to reflect this (it probably is not a good idea not - to limit number of prefetches at all, as their execution also takes some - time). */ - 100, /* number of parallel prefetches */ - 3, /* Branch cost */ - COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */ - COSTS_N_INSNS (4), /* cost of FMUL instruction. */ - COSTS_N_INSNS (19), /* cost of FDIV instruction. */ - COSTS_N_INSNS (2), /* cost of FABS instruction. */ - COSTS_N_INSNS (2), /* cost of FCHS instruction. */ - COSTS_N_INSNS (35), /* cost of FSQRT instruction. */ - /* K8 has optimized REP instruction for medium sized blocks, but for very small - blocks it is better to use loop. For large blocks, libcall can do - nontemporary accesses and beat inline considerably. */ - {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}}, - {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}, - {{libcall, {{8, loop}, {24, unrolled_loop}, - {2048, rep_prefix_4_byte}, {-1, libcall}}}, - {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}, - 4, /* scalar_stmt_cost. */ - 2, /* scalar load_cost. */ - 2, /* scalar_store_cost. */ - 5, /* vec_stmt_cost. */ - 0, /* vec_to_scalar_cost. */ - 2, /* scalar_to_vec_cost. */ - 2, /* vec_align_load_cost. */ - 3, /* vec_unalign_load_cost. */ - 3, /* vec_store_cost. */ - 3, /* cond_taken_branch_cost. */ - 2, /* cond_not_taken_branch_cost. */ -}; - -struct processor_costs amdfam10_cost = { - COSTS_N_INSNS (1), /* cost of an add instruction */ - COSTS_N_INSNS (2), /* cost of a lea instruction */ - COSTS_N_INSNS (1), /* variable shift costs */ - COSTS_N_INSNS (1), /* constant shift costs */ - {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ - COSTS_N_INSNS (4), /* HI */ - COSTS_N_INSNS (3), /* SI */ - COSTS_N_INSNS (4), /* DI */ - COSTS_N_INSNS (5)}, /* other */ - 0, /* cost of multiply per each bit set */ - {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */ - COSTS_N_INSNS (35), /* HI */ - COSTS_N_INSNS (51), /* SI */ - COSTS_N_INSNS (83), /* DI */ - COSTS_N_INSNS (83)}, /* other */ - COSTS_N_INSNS (1), /* cost of movsx */ - COSTS_N_INSNS (1), /* cost of movzx */ - 8, /* "large" insn */ - 9, /* MOVE_RATIO */ - 4, /* cost for loading QImode using movzbl */ - {3, 4, 3}, /* cost of loading integer registers - in QImode, HImode and SImode. - Relative to reg-reg move (2). */ - {3, 4, 3}, /* cost of storing integer registers */ - 4, /* cost of reg,reg fld/fst */ - {4, 4, 12}, /* cost of loading fp registers - in SFmode, DFmode and XFmode */ - {6, 6, 8}, /* cost of storing fp registers - in SFmode, DFmode and XFmode */ - 2, /* cost of moving MMX register */ - {3, 3}, /* cost of loading MMX registers - in SImode and DImode */ - {4, 4}, /* cost of storing MMX registers - in SImode and DImode */ - 2, /* cost of moving SSE register */ - {4, 4, 3}, /* cost of loading SSE registers - in SImode, DImode and TImode */ - {4, 4, 5}, /* cost of storing SSE registers - in SImode, DImode and TImode */ - 3, /* MMX or SSE register to integer */ - /* On K8 - MOVD reg64, xmmreg Double FSTORE 4 - MOVD reg32, xmmreg Double FSTORE 4 - On AMDFAM10 - MOVD reg64, xmmreg Double FADD 3 - 1/1 1/1 - MOVD reg32, xmmreg Double FADD 3 - 1/1 1/1 */ - 64, /* size of l1 cache. */ - 512, /* size of l2 cache. */ - 64, /* size of prefetch block */ - /* New AMD processors never drop prefetches; if they cannot be performed - immediately, they are queued. We set number of simultaneous prefetches - to a large constant to reflect this (it probably is not a good idea not - to limit number of prefetches at all, as their execution also takes some - time). */ - 100, /* number of parallel prefetches */ - 2, /* Branch cost */ - COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */ - COSTS_N_INSNS (4), /* cost of FMUL instruction. */ - COSTS_N_INSNS (19), /* cost of FDIV instruction. */ - COSTS_N_INSNS (2), /* cost of FABS instruction. */ - COSTS_N_INSNS (2), /* cost of FCHS instruction. */ - COSTS_N_INSNS (35), /* cost of FSQRT instruction. */ - - /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for - very small blocks it is better to use loop. For large blocks, libcall can - do nontemporary accesses and beat inline considerably. */ - {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}}, - {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}, - {{libcall, {{8, loop}, {24, unrolled_loop}, - {2048, rep_prefix_4_byte}, {-1, libcall}}}, - {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}, - 4, /* scalar_stmt_cost. */ - 2, /* scalar load_cost. */ - 2, /* scalar_store_cost. */ - 6, /* vec_stmt_cost. */ - 0, /* vec_to_scalar_cost. */ - 2, /* scalar_to_vec_cost. */ - 2, /* vec_align_load_cost. */ - 2, /* vec_unalign_load_cost. */ - 2, /* vec_store_cost. */ - 2, /* cond_taken_branch_cost. */ - 1, /* cond_not_taken_branch_cost. */ -}; - -static const -struct processor_costs pentium4_cost = { - COSTS_N_INSNS (1), /* cost of an add instruction */ - COSTS_N_INSNS (3), /* cost of a lea instruction */ - COSTS_N_INSNS (4), /* variable shift costs */ - COSTS_N_INSNS (4), /* constant shift costs */ - {COSTS_N_INSNS (15), /* cost of starting multiply for QI */ - COSTS_N_INSNS (15), /* HI */ - COSTS_N_INSNS (15), /* SI */ - COSTS_N_INSNS (15), /* DI */ - COSTS_N_INSNS (15)}, /* other */ - 0, /* cost of multiply per each bit set */ - {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */ - COSTS_N_INSNS (56), /* HI */ - COSTS_N_INSNS (56), /* SI */ - COSTS_N_INSNS (56), /* DI */ - COSTS_N_INSNS (56)}, /* other */ - COSTS_N_INSNS (1), /* cost of movsx */ - COSTS_N_INSNS (1), /* cost of movzx */ - 16, /* "large" insn */ - 6, /* MOVE_RATIO */ - 2, /* cost for loading QImode using movzbl */ - {4, 5, 4}, /* cost of loading integer registers - in QImode, HImode and SImode. - Relative to reg-reg move (2). */ - {2, 3, 2}, /* cost of storing integer registers */ - 2, /* cost of reg,reg fld/fst */ - {2, 2, 6}, /* cost of loading fp registers - in SFmode, DFmode and XFmode */ - {4, 4, 6}, /* cost of storing fp registers - in SFmode, DFmode and XFmode */ - 2, /* cost of moving MMX register */ - {2, 2}, /* cost of loading MMX registers - in SImode and DImode */ - {2, 2}, /* cost of storing MMX registers - in SImode and DImode */ - 12, /* cost of moving SSE register */ - {12, 12, 12}, /* cost of loading SSE registers - in SImode, DImode and TImode */ - {2, 2, 8}, /* cost of storing SSE registers - in SImode, DImode and TImode */ - 10, /* MMX or SSE register to integer */ - 8, /* size of l1 cache. */ - 256, /* size of l2 cache. */ - 64, /* size of prefetch block */ - 6, /* number of parallel prefetches */ - 2, /* Branch cost */ - COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */ - COSTS_N_INSNS (7), /* cost of FMUL instruction. */ - COSTS_N_INSNS (43), /* cost of FDIV instruction. */ - COSTS_N_INSNS (2), /* cost of FABS instruction. */ - COSTS_N_INSNS (2), /* cost of FCHS instruction. */ - COSTS_N_INSNS (43), /* cost of FSQRT instruction. */ - {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}}, - DUMMY_STRINGOP_ALGS}, - {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte}, - {-1, libcall}}}, - DUMMY_STRINGOP_ALGS}, - 1, /* scalar_stmt_cost. */ - 1, /* scalar load_cost. */ - 1, /* scalar_store_cost. */ - 1, /* vec_stmt_cost. */ - 1, /* vec_to_scalar_cost. */ - 1, /* scalar_to_vec_cost. */ - 1, /* vec_align_load_cost. */ - 2, /* vec_unalign_load_cost. */ - 1, /* vec_store_cost. */ - 3, /* cond_taken_branch_cost. */ - 1, /* cond_not_taken_branch_cost. */ -}; - -static const -struct processor_costs nocona_cost = { - COSTS_N_INSNS (1), /* cost of an add instruction */ - COSTS_N_INSNS (1), /* cost of a lea instruction */ - COSTS_N_INSNS (1), /* variable shift costs */ - COSTS_N_INSNS (1), /* constant shift costs */ - {COSTS_N_INSNS (10), /* cost of starting multiply for QI */ - COSTS_N_INSNS (10), /* HI */ - COSTS_N_INSNS (10), /* SI */ - COSTS_N_INSNS (10), /* DI */ - COSTS_N_INSNS (10)}, /* other */ - 0, /* cost of multiply per each bit set */ - {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */ - COSTS_N_INSNS (66), /* HI */ - COSTS_N_INSNS (66), /* SI */ - COSTS_N_INSNS (66), /* DI */ - COSTS_N_INSNS (66)}, /* other */ - COSTS_N_INSNS (1), /* cost of movsx */ - COSTS_N_INSNS (1), /* cost of movzx */ - 16, /* "large" insn */ - 17, /* MOVE_RATIO */ - 4, /* cost for loading QImode using movzbl */ - {4, 4, 4}, /* cost of loading integer registers - in QImode, HImode and SImode. - Relative to reg-reg move (2). */ - {4, 4, 4}, /* cost of storing integer registers */ - 3, /* cost of reg,reg fld/fst */ - {12, 12, 12}, /* cost of loading fp registers - in SFmode, DFmode and XFmode */ - {4, 4, 4}, /* cost of storing fp registers - in SFmode, DFmode and XFmode */ - 6, /* cost of moving MMX register */ - {12, 12}, /* cost of loading MMX registers - in SImode and DImode */ - {12, 12}, /* cost of storing MMX registers - in SImode and DImode */ - 6, /* cost of moving SSE register */ - {12, 12, 12}, /* cost of loading SSE registers - in SImode, DImode and TImode */ - {12, 12, 12}, /* cost of storing SSE registers - in SImode, DImode and TImode */ - 8, /* MMX or SSE register to integer */ - 8, /* size of l1 cache. */ - 1024, /* size of l2 cache. */ - 128, /* size of prefetch block */ - 8, /* number of parallel prefetches */ - 1, /* Branch cost */ - COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */ - COSTS_N_INSNS (8), /* cost of FMUL instruction. */ - COSTS_N_INSNS (40), /* cost of FDIV instruction. */ - COSTS_N_INSNS (3), /* cost of FABS instruction. */ - COSTS_N_INSNS (3), /* cost of FCHS instruction. */ - COSTS_N_INSNS (44), /* cost of FSQRT instruction. */ - {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}}, - {libcall, {{32, loop}, {20000, rep_prefix_8_byte}, - {100000, unrolled_loop}, {-1, libcall}}}}, - {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte}, - {-1, libcall}}}, - {libcall, {{24, loop}, {64, unrolled_loop}, - {8192, rep_prefix_8_byte}, {-1, libcall}}}}, - 1, /* scalar_stmt_cost. */ - 1, /* scalar load_cost. */ - 1, /* scalar_store_cost. */ - 1, /* vec_stmt_cost. */ - 1, /* vec_to_scalar_cost. */ - 1, /* scalar_to_vec_cost. */ - 1, /* vec_align_load_cost. */ - 2, /* vec_unalign_load_cost. */ - 1, /* vec_store_cost. */ - 3, /* cond_taken_branch_cost. */ - 1, /* cond_not_taken_branch_cost. */ -}; - -static const -struct processor_costs core2_cost = { - COSTS_N_INSNS (1), /* cost of an add instruction */ - COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */ - COSTS_N_INSNS (1), /* variable shift costs */ - COSTS_N_INSNS (1), /* constant shift costs */ - {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ - COSTS_N_INSNS (3), /* HI */ - COSTS_N_INSNS (3), /* SI */ - COSTS_N_INSNS (3), /* DI */ - COSTS_N_INSNS (3)}, /* other */ - 0, /* cost of multiply per each bit set */ - {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */ - COSTS_N_INSNS (22), /* HI */ - COSTS_N_INSNS (22), /* SI */ - COSTS_N_INSNS (22), /* DI */ - COSTS_N_INSNS (22)}, /* other */ - COSTS_N_INSNS (1), /* cost of movsx */ - COSTS_N_INSNS (1), /* cost of movzx */ - 8, /* "large" insn */ - 16, /* MOVE_RATIO */ - 2, /* cost for loading QImode using movzbl */ - {6, 6, 6}, /* cost of loading integer registers - in QImode, HImode and SImode. - Relative to reg-reg move (2). */ - {4, 4, 4}, /* cost of storing integer registers */ - 2, /* cost of reg,reg fld/fst */ - {6, 6, 6}, /* cost of loading fp registers - in SFmode, DFmode and XFmode */ - {4, 4, 4}, /* cost of storing fp registers - in SFmode, DFmode and XFmode */ - 2, /* cost of moving MMX register */ - {6, 6}, /* cost of loading MMX registers - in SImode and DImode */ - {4, 4}, /* cost of storing MMX registers - in SImode and DImode */ - 2, /* cost of moving SSE register */ - {6, 6, 6}, /* cost of loading SSE registers - in SImode, DImode and TImode */ - {4, 4, 4}, /* cost of storing SSE registers - in SImode, DImode and TImode */ - 2, /* MMX or SSE register to integer */ - 32, /* size of l1 cache. */ - 2048, /* size of l2 cache. */ - 64, /* size of prefetch block */ - 8, /* number of parallel prefetches */ - 3, /* Branch cost */ - COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */ - COSTS_N_INSNS (5), /* cost of FMUL instruction. */ - COSTS_N_INSNS (32), /* cost of FDIV instruction. */ - COSTS_N_INSNS (1), /* cost of FABS instruction. */ - COSTS_N_INSNS (1), /* cost of FCHS instruction. */ - COSTS_N_INSNS (58), /* cost of FSQRT instruction. */ - {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}}, - {libcall, {{32, loop}, {64, rep_prefix_4_byte}, - {8192, rep_prefix_8_byte}, {-1, libcall}}}}, - {{libcall, {{8, loop}, {15, unrolled_loop}, - {2048, rep_prefix_4_byte}, {-1, libcall}}}, - {libcall, {{24, loop}, {32, unrolled_loop}, - {8192, rep_prefix_8_byte}, {-1, libcall}}}}, - 1, /* scalar_stmt_cost. */ - 1, /* scalar load_cost. */ - 1, /* scalar_store_cost. */ - 1, /* vec_stmt_cost. */ - 1, /* vec_to_scalar_cost. */ - 1, /* scalar_to_vec_cost. */ - 1, /* vec_align_load_cost. */ - 2, /* vec_unalign_load_cost. */ - 1, /* vec_store_cost. */ - 3, /* cond_taken_branch_cost. */ - 1, /* cond_not_taken_branch_cost. */ -}; - -static const -struct processor_costs atom_cost = { - COSTS_N_INSNS (1), /* cost of an add instruction */ - COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */ - COSTS_N_INSNS (1), /* variable shift costs */ - COSTS_N_INSNS (1), /* constant shift costs */ - {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ - COSTS_N_INSNS (4), /* HI */ - COSTS_N_INSNS (3), /* SI */ - COSTS_N_INSNS (4), /* DI */ - COSTS_N_INSNS (2)}, /* other */ - 0, /* cost of multiply per each bit set */ - {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ - COSTS_N_INSNS (26), /* HI */ - COSTS_N_INSNS (42), /* SI */ - COSTS_N_INSNS (74), /* DI */ - COSTS_N_INSNS (74)}, /* other */ - COSTS_N_INSNS (1), /* cost of movsx */ - COSTS_N_INSNS (1), /* cost of movzx */ - 8, /* "large" insn */ - 17, /* MOVE_RATIO */ - 2, /* cost for loading QImode using movzbl */ - {4, 4, 4}, /* cost of loading integer registers - in QImode, HImode and SImode. - Relative to reg-reg move (2). */ - {4, 4, 4}, /* cost of storing integer registers */ - 4, /* cost of reg,reg fld/fst */ - {12, 12, 12}, /* cost of loading fp registers - in SFmode, DFmode and XFmode */ - {6, 6, 8}, /* cost of storing fp registers - in SFmode, DFmode and XFmode */ - 2, /* cost of moving MMX register */ - {8, 8}, /* cost of loading MMX registers - in SImode and DImode */ - {8, 8}, /* cost of storing MMX registers - in SImode and DImode */ - 2, /* cost of moving SSE register */ - {8, 8, 8}, /* cost of loading SSE registers - in SImode, DImode and TImode */ - {8, 8, 8}, /* cost of storing SSE registers - in SImode, DImode and TImode */ - 5, /* MMX or SSE register to integer */ - 32, /* size of l1 cache. */ - 256, /* size of l2 cache. */ - 64, /* size of prefetch block */ - 6, /* number of parallel prefetches */ - 3, /* Branch cost */ - COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */ - COSTS_N_INSNS (8), /* cost of FMUL instruction. */ - COSTS_N_INSNS (20), /* cost of FDIV instruction. */ - COSTS_N_INSNS (8), /* cost of FABS instruction. */ - COSTS_N_INSNS (8), /* cost of FCHS instruction. */ - COSTS_N_INSNS (40), /* cost of FSQRT instruction. */ - {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}}, - {libcall, {{32, loop}, {64, rep_prefix_4_byte}, - {8192, rep_prefix_8_byte}, {-1, libcall}}}}, - {{libcall, {{8, loop}, {15, unrolled_loop}, - {2048, rep_prefix_4_byte}, {-1, libcall}}}, - {libcall, {{24, loop}, {32, unrolled_loop}, - {8192, rep_prefix_8_byte}, {-1, libcall}}}}, - 1, /* scalar_stmt_cost. */ - 1, /* scalar load_cost. */ - 1, /* scalar_store_cost. */ - 1, /* vec_stmt_cost. */ - 1, /* vec_to_scalar_cost. */ - 1, /* scalar_to_vec_cost. */ - 1, /* vec_align_load_cost. */ - 2, /* vec_unalign_load_cost. */ - 1, /* vec_store_cost. */ - 3, /* cond_taken_branch_cost. */ - 1, /* cond_not_taken_branch_cost. */ -}; - -/* Generic64 should produce code tuned for Nocona and K8. */ -static const -struct processor_costs generic64_cost = { - COSTS_N_INSNS (1), /* cost of an add instruction */ - /* On all chips taken into consideration lea is 2 cycles and more. With - this cost however our current implementation of synth_mult results in - use of unnecessary temporary registers causing regression on several - SPECfp benchmarks. */ - COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */ - COSTS_N_INSNS (1), /* variable shift costs */ - COSTS_N_INSNS (1), /* constant shift costs */ - {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ - COSTS_N_INSNS (4), /* HI */ - COSTS_N_INSNS (3), /* SI */ - COSTS_N_INSNS (4), /* DI */ - COSTS_N_INSNS (2)}, /* other */ - 0, /* cost of multiply per each bit set */ - {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ - COSTS_N_INSNS (26), /* HI */ - COSTS_N_INSNS (42), /* SI */ - COSTS_N_INSNS (74), /* DI */ - COSTS_N_INSNS (74)}, /* other */ - COSTS_N_INSNS (1), /* cost of movsx */ - COSTS_N_INSNS (1), /* cost of movzx */ - 8, /* "large" insn */ - 17, /* MOVE_RATIO */ - 4, /* cost for loading QImode using movzbl */ - {4, 4, 4}, /* cost of loading integer registers - in QImode, HImode and SImode. - Relative to reg-reg move (2). */ - {4, 4, 4}, /* cost of storing integer registers */ - 4, /* cost of reg,reg fld/fst */ - {12, 12, 12}, /* cost of loading fp registers - in SFmode, DFmode and XFmode */ - {6, 6, 8}, /* cost of storing fp registers - in SFmode, DFmode and XFmode */ - 2, /* cost of moving MMX register */ - {8, 8}, /* cost of loading MMX registers - in SImode and DImode */ - {8, 8}, /* cost of storing MMX registers - in SImode and DImode */ - 2, /* cost of moving SSE register */ - {8, 8, 8}, /* cost of loading SSE registers - in SImode, DImode and TImode */ - {8, 8, 8}, /* cost of storing SSE registers - in SImode, DImode and TImode */ - 5, /* MMX or SSE register to integer */ - 32, /* size of l1 cache. */ - 512, /* size of l2 cache. */ - 64, /* size of prefetch block */ - 6, /* number of parallel prefetches */ - /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value - is increased to perhaps more appropriate value of 5. */ - 3, /* Branch cost */ - COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */ - COSTS_N_INSNS (8), /* cost of FMUL instruction. */ - COSTS_N_INSNS (20), /* cost of FDIV instruction. */ - COSTS_N_INSNS (8), /* cost of FABS instruction. */ - COSTS_N_INSNS (8), /* cost of FCHS instruction. */ - COSTS_N_INSNS (40), /* cost of FSQRT instruction. */ - {DUMMY_STRINGOP_ALGS, - {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}, - {DUMMY_STRINGOP_ALGS, - {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}, - 1, /* scalar_stmt_cost. */ - 1, /* scalar load_cost. */ - 1, /* scalar_store_cost. */ - 1, /* vec_stmt_cost. */ - 1, /* vec_to_scalar_cost. */ - 1, /* scalar_to_vec_cost. */ - 1, /* vec_align_load_cost. */ - 2, /* vec_unalign_load_cost. */ - 1, /* vec_store_cost. */ - 3, /* cond_taken_branch_cost. */ - 1, /* cond_not_taken_branch_cost. */ -}; - -/* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */ -static const -struct processor_costs generic32_cost = { - COSTS_N_INSNS (1), /* cost of an add instruction */ - COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */ - COSTS_N_INSNS (1), /* variable shift costs */ - COSTS_N_INSNS (1), /* constant shift costs */ - {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ - COSTS_N_INSNS (4), /* HI */ - COSTS_N_INSNS (3), /* SI */ - COSTS_N_INSNS (4), /* DI */ - COSTS_N_INSNS (2)}, /* other */ - 0, /* cost of multiply per each bit set */ - {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ - COSTS_N_INSNS (26), /* HI */ - COSTS_N_INSNS (42), /* SI */ - COSTS_N_INSNS (74), /* DI */ - COSTS_N_INSNS (74)}, /* other */ - COSTS_N_INSNS (1), /* cost of movsx */ - COSTS_N_INSNS (1), /* cost of movzx */ - 8, /* "large" insn */ - 17, /* MOVE_RATIO */ - 4, /* cost for loading QImode using movzbl */ - {4, 4, 4}, /* cost of loading integer registers - in QImode, HImode and SImode. - Relative to reg-reg move (2). */ - {4, 4, 4}, /* cost of storing integer registers */ - 4, /* cost of reg,reg fld/fst */ - {12, 12, 12}, /* cost of loading fp registers - in SFmode, DFmode and XFmode */ - {6, 6, 8}, /* cost of storing fp registers - in SFmode, DFmode and XFmode */ - 2, /* cost of moving MMX register */ - {8, 8}, /* cost of loading MMX registers - in SImode and DImode */ - {8, 8}, /* cost of storing MMX registers - in SImode and DImode */ - 2, /* cost of moving SSE register */ - {8, 8, 8}, /* cost of loading SSE registers - in SImode, DImode and TImode */ - {8, 8, 8}, /* cost of storing SSE registers - in SImode, DImode and TImode */ - 5, /* MMX or SSE register to integer */ - 32, /* size of l1 cache. */ - 256, /* size of l2 cache. */ - 64, /* size of prefetch block */ - 6, /* number of parallel prefetches */ - 3, /* Branch cost */ - COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */ - COSTS_N_INSNS (8), /* cost of FMUL instruction. */ - COSTS_N_INSNS (20), /* cost of FDIV instruction. */ - COSTS_N_INSNS (8), /* cost of FABS instruction. */ - COSTS_N_INSNS (8), /* cost of FCHS instruction. */ - COSTS_N_INSNS (40), /* cost of FSQRT instruction. */ - {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}}, - DUMMY_STRINGOP_ALGS}, - {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}}, - DUMMY_STRINGOP_ALGS}, - 1, /* scalar_stmt_cost. */ - 1, /* scalar load_cost. */ - 1, /* scalar_store_cost. */ - 1, /* vec_stmt_cost. */ - 1, /* vec_to_scalar_cost. */ - 1, /* scalar_to_vec_cost. */ - 1, /* vec_align_load_cost. */ - 2, /* vec_unalign_load_cost. */ - 1, /* vec_store_cost. */ - 3, /* cond_taken_branch_cost. */ - 1, /* cond_not_taken_branch_cost. */ -}; - -const struct processor_costs *ix86_cost = &pentium_cost; - -/* Processor feature/optimization bitmasks. */ -#define m_386 (1<<PROCESSOR_I386) -#define m_486 (1<<PROCESSOR_I486) -#define m_PENT (1<<PROCESSOR_PENTIUM) -#define m_PPRO (1<<PROCESSOR_PENTIUMPRO) -#define m_PENT4 (1<<PROCESSOR_PENTIUM4) -#define m_NOCONA (1<<PROCESSOR_NOCONA) -#define m_CORE2 (1<<PROCESSOR_CORE2) -#define m_ATOM (1<<PROCESSOR_ATOM) - -#define m_GEODE (1<<PROCESSOR_GEODE) -#define m_K6 (1<<PROCESSOR_K6) -#define m_K6_GEODE (m_K6 | m_GEODE) -#define m_K8 (1<<PROCESSOR_K8) -#define m_ATHLON (1<<PROCESSOR_ATHLON) -#define m_ATHLON_K8 (m_K8 | m_ATHLON) -#define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10) -#define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10) - -#define m_GENERIC32 (1<<PROCESSOR_GENERIC32) -#define m_GENERIC64 (1<<PROCESSOR_GENERIC64) - -/* Generic instruction choice should be common subset of supported CPUs - (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */ -#define m_GENERIC (m_GENERIC32 | m_GENERIC64) - -/* Feature tests against the various tunings. */ -unsigned char ix86_tune_features[X86_TUNE_LAST]; - -/* Feature tests against the various tunings used to create ix86_tune_features - based on the processor mask. */ -static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = { - /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results - negatively, so enabling for Generic64 seems like good code size - tradeoff. We can't enable it for 32bit generic because it does not - work well with PPro base chips. */ - m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64, - - /* X86_TUNE_PUSH_MEMORY */ - m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 - | m_NOCONA | m_CORE2 | m_GENERIC, - - /* X86_TUNE_ZERO_EXTEND_WITH_AND */ - m_486 | m_PENT, - - /* X86_TUNE_UNROLL_STRLEN */ - m_486 | m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_K6 - | m_CORE2 | m_GENERIC, - - /* X86_TUNE_DEEP_BRANCH_PREDICTION */ - m_ATOM | m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC, - - /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based - on simulation result. But after P4 was made, no performance benefit - was observed with branch hints. It also increases the code size. - As a result, icc never generates branch hints. */ - 0, - - /* X86_TUNE_DOUBLE_WITH_ADD */ - ~m_386, - - /* X86_TUNE_USE_SAHF */ - m_ATOM | m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4 - | m_NOCONA | m_CORE2 | m_GENERIC, - - /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid - partial dependencies. */ - m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA - | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */, - - /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial - register stalls on Generic32 compilation setting as well. However - in current implementation the partial register stalls are not eliminated - very well - they can be introduced via subregs synthesized by combine - and can happen in caller/callee saving sequences. Because this option - pays back little on PPro based chips and is in conflict with partial reg - dependencies used by Athlon/P4 based chips, it is better to leave it off - for generic32 for now. */ - m_PPRO, - - /* X86_TUNE_PARTIAL_FLAG_REG_STALL */ - m_CORE2 | m_GENERIC, - - /* X86_TUNE_USE_HIMODE_FIOP */ - m_386 | m_486 | m_K6_GEODE, - - /* X86_TUNE_USE_SIMODE_FIOP */ - ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_ATOM | m_CORE2 | m_GENERIC), - - /* X86_TUNE_USE_MOV0 */ - m_K6, - - /* X86_TUNE_USE_CLTD */ - ~(m_PENT | m_ATOM | m_K6 | m_CORE2 | m_GENERIC), - - /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */ - m_PENT4, - - /* X86_TUNE_SPLIT_LONG_MOVES */ - m_PPRO, - - /* X86_TUNE_READ_MODIFY_WRITE */ - ~m_PENT, - - /* X86_TUNE_READ_MODIFY */ - ~(m_PENT | m_PPRO), - - /* X86_TUNE_PROMOTE_QIMODE */ - m_K6_GEODE | m_PENT | m_ATOM | m_386 | m_486 | m_AMD_MULTIPLE - | m_CORE2 | m_GENERIC /* | m_PENT4 ? */, - - /* X86_TUNE_FAST_PREFIX */ - ~(m_PENT | m_486 | m_386), - - /* X86_TUNE_SINGLE_STRINGOP */ - m_386 | m_PENT4 | m_NOCONA, - - /* X86_TUNE_QIMODE_MATH */ - ~0, - - /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial - register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option - might be considered for Generic32 if our scheme for avoiding partial - stalls was more effective. */ - ~m_PPRO, - - /* X86_TUNE_PROMOTE_QI_REGS */ - 0, - - /* X86_TUNE_PROMOTE_HI_REGS */ - m_PPRO, - - /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */ - m_ATOM | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA - | m_CORE2 | m_GENERIC, - - /* X86_TUNE_ADD_ESP_8 */ - m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_K6_GEODE | m_386 - | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC, - - /* X86_TUNE_SUB_ESP_4 */ - m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 - | m_GENERIC, - - /* X86_TUNE_SUB_ESP_8 */ - m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_386 | m_486 - | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC, - - /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred - for DFmode copies */ - ~(m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 - | m_GENERIC | m_GEODE), - - /* X86_TUNE_PARTIAL_REG_DEPENDENCY */ - m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC, - - /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a - conflict here in between PPro/Pentium4 based chips that thread 128bit - SSE registers as single units versus K8 based chips that divide SSE - registers to two 64bit halves. This knob promotes all store destinations - to be 128bit to allow register renaming on 128bit SSE units, but usually - results in one extra microop on 64bit SSE units. Experimental results - shows that disabling this option on P4 brings over 20% SPECfp regression, - while enabling it on K8 brings roughly 2.4% regression that can be partly - masked by careful scheduling of moves. */ - m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC - | m_AMDFAM10, - - /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */ - m_AMDFAM10, - - /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies - are resolved on SSE register parts instead of whole registers, so we may - maintain just lower part of scalar values in proper format leaving the - upper part undefined. */ - m_ATHLON_K8, - - /* X86_TUNE_SSE_TYPELESS_STORES */ - m_AMD_MULTIPLE, - - /* X86_TUNE_SSE_LOAD0_BY_PXOR */ - m_PPRO | m_PENT4 | m_NOCONA, - - /* X86_TUNE_MEMORY_MISMATCH_STALL */ - m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC, - - /* X86_TUNE_PROLOGUE_USING_MOVE */ - m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC, - - /* X86_TUNE_EPILOGUE_USING_MOVE */ - m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC, - - /* X86_TUNE_SHIFT1 */ - ~m_486, - - /* X86_TUNE_USE_FFREEP */ - m_AMD_MULTIPLE, - - /* X86_TUNE_INTER_UNIT_MOVES */ - ~(m_AMD_MULTIPLE | m_GENERIC), - - /* X86_TUNE_INTER_UNIT_CONVERSIONS */ - ~(m_AMDFAM10), - - /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more - than 4 branch instructions in the 16 byte window. */ - m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 - | m_GENERIC, - - /* X86_TUNE_SCHEDULE */ - m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_ATOM | m_CORE2 - | m_GENERIC, - - /* X86_TUNE_USE_BT */ - m_AMD_MULTIPLE | m_ATOM | m_CORE2 | m_GENERIC, - - /* X86_TUNE_USE_INCDEC */ - ~(m_PENT4 | m_NOCONA | m_GENERIC | m_ATOM), - - /* X86_TUNE_PAD_RETURNS */ - m_AMD_MULTIPLE | m_CORE2 | m_GENERIC, - - /* X86_TUNE_PAD_SHORT_FUNCTION: Pad short funtion. */ - m_ATOM, - - /* X86_TUNE_EXT_80387_CONSTANTS */ - m_K6_GEODE | m_ATHLON_K8 | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO - | m_CORE2 | m_GENERIC, - - /* X86_TUNE_SHORTEN_X87_SSE */ - ~m_K8, - - /* X86_TUNE_AVOID_VECTOR_DECODE */ - m_K8 | m_GENERIC64, - - /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode - and SImode multiply, but 386 and 486 do HImode multiply faster. */ - ~(m_386 | m_486), - - /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is - vector path on AMD machines. */ - m_K8 | m_GENERIC64 | m_AMDFAM10, - - /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD - machines. */ - m_K8 | m_GENERIC64 | m_AMDFAM10, - - /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR - than a MOV. */ - m_PENT, - - /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is, - but one byte longer. */ - m_PENT, - - /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory - operand that cannot be represented using a modRM byte. The XOR - replacement is long decoded, so this split helps here as well. */ - m_K6, - - /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion - from FP to FP. */ - m_AMDFAM10 | m_GENERIC, - - /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion - from integer to FP. */ - m_AMDFAM10, - - /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction - with a subsequent conditional jump instruction into a single - compare-and-branch uop. */ - m_CORE2, - - /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag - will impact LEA instruction selection. */ - m_ATOM, -}; - -/* Feature tests against the various architecture variations. */ -unsigned char ix86_arch_features[X86_ARCH_LAST]; - -/* Feature tests against the various architecture variations, used to create - ix86_arch_features based on the processor mask. */ -static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = { - /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */ - ~(m_386 | m_486 | m_PENT | m_K6), - - /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */ - ~m_386, - - /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */ - ~(m_386 | m_486), - - /* X86_ARCH_XADD: Exchange and add was added for 80486. */ - ~m_386, - - /* X86_ARCH_BSWAP: Byteswap was added for 80486. */ - ~m_386, -}; - -static const unsigned int x86_accumulate_outgoing_args - = m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 - | m_GENERIC; - -static const unsigned int x86_arch_always_fancy_math_387 - = m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4 - | m_NOCONA | m_CORE2 | m_GENERIC; - -static enum stringop_alg stringop_alg = no_stringop; - -/* In case the average insn count for single function invocation is - lower than this constant, emit fast (but longer) prologue and - epilogue code. */ -#define FAST_PROLOGUE_INSN_COUNT 20 - -/* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */ -static const char *const qi_reg_name[] = QI_REGISTER_NAMES; -static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES; -static const char *const hi_reg_name[] = HI_REGISTER_NAMES; - -/* Array of the smallest class containing reg number REGNO, indexed by - REGNO. Used by REGNO_REG_CLASS in i386.h. */ - -enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] = -{ - /* ax, dx, cx, bx */ - AREG, DREG, CREG, BREG, - /* si, di, bp, sp */ - SIREG, DIREG, NON_Q_REGS, NON_Q_REGS, - /* FP registers */ - FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS, - FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, - /* arg pointer */ - NON_Q_REGS, - /* flags, fpsr, fpcr, frame */ - NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS, - /* SSE registers */ - SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, - SSE_REGS, SSE_REGS, - /* MMX registers */ - MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, - MMX_REGS, MMX_REGS, - /* REX registers */ - NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, - NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, - /* SSE REX registers */ - SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, - SSE_REGS, SSE_REGS, -}; - -/* The "default" register map used in 32bit mode. */ - -int const dbx_register_map[FIRST_PSEUDO_REGISTER] = -{ - 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */ - 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */ - -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */ - 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */ - 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */ - -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */ - -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */ -}; - -/* The "default" register map used in 64bit mode. */ - -int const dbx64_register_map[FIRST_PSEUDO_REGISTER] = -{ - 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */ - 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */ - -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */ - 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */ - 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */ - 8,9,10,11,12,13,14,15, /* extended integer registers */ - 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */ -}; - -/* Define the register numbers to be used in Dwarf debugging information. - The SVR4 reference port C compiler uses the following register numbers - in its Dwarf output code: - 0 for %eax (gcc regno = 0) - 1 for %ecx (gcc regno = 2) - 2 for %edx (gcc regno = 1) - 3 for %ebx (gcc regno = 3) - 4 for %esp (gcc regno = 7) - 5 for %ebp (gcc regno = 6) - 6 for %esi (gcc regno = 4) - 7 for %edi (gcc regno = 5) - The following three DWARF register numbers are never generated by - the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4 - believes these numbers have these meanings. - 8 for %eip (no gcc equivalent) - 9 for %eflags (gcc regno = 17) - 10 for %trapno (no gcc equivalent) - It is not at all clear how we should number the FP stack registers - for the x86 architecture. If the version of SDB on x86/svr4 were - a bit less brain dead with respect to floating-point then we would - have a precedent to follow with respect to DWARF register numbers - for x86 FP registers, but the SDB on x86/svr4 is so completely - broken with respect to FP registers that it is hardly worth thinking - of it as something to strive for compatibility with. - The version of x86/svr4 SDB I have at the moment does (partially) - seem to believe that DWARF register number 11 is associated with - the x86 register %st(0), but that's about all. Higher DWARF - register numbers don't seem to be associated with anything in - particular, and even for DWARF regno 11, SDB only seems to under- - stand that it should say that a variable lives in %st(0) (when - asked via an `=' command) if we said it was in DWARF regno 11, - but SDB still prints garbage when asked for the value of the - variable in question (via a `/' command). - (Also note that the labels SDB prints for various FP stack regs - when doing an `x' command are all wrong.) - Note that these problems generally don't affect the native SVR4 - C compiler because it doesn't allow the use of -O with -g and - because when it is *not* optimizing, it allocates a memory - location for each floating-point variable, and the memory - location is what gets described in the DWARF AT_location - attribute for the variable in question. - Regardless of the severe mental illness of the x86/svr4 SDB, we - do something sensible here and we use the following DWARF - register numbers. Note that these are all stack-top-relative - numbers. - 11 for %st(0) (gcc regno = 8) - 12 for %st(1) (gcc regno = 9) - 13 for %st(2) (gcc regno = 10) - 14 for %st(3) (gcc regno = 11) - 15 for %st(4) (gcc regno = 12) - 16 for %st(5) (gcc regno = 13) - 17 for %st(6) (gcc regno = 14) - 18 for %st(7) (gcc regno = 15) -*/ -int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] = -{ - 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */ - 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */ - -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */ - 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */ - 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */ - -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */ - -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */ -}; - -/* Test and compare insns in i386.md store the information needed to - generate branch and scc insns here. */ - -rtx ix86_compare_op0 = NULL_RTX; -rtx ix86_compare_op1 = NULL_RTX; -rtx ix86_compare_emitted = NULL_RTX; - -/* Define parameter passing and return registers. */ - -static int const x86_64_int_parameter_registers[6] = -{ - DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG -}; - -static int const x86_64_ms_abi_int_parameter_registers[4] = -{ - CX_REG, DX_REG, R8_REG, R9_REG -}; - -static int const x86_64_int_return_registers[4] = -{ - AX_REG, DX_REG, DI_REG, SI_REG -}; - -/* Define the structure for the machine field in struct function. */ - -struct stack_local_entry GTY(()) -{ - unsigned short mode; - unsigned short n; - rtx rtl; - struct stack_local_entry *next; -}; - -/* Structure describing stack frame layout. - Stack grows downward: - - [arguments] - <- ARG_POINTER - saved pc - - saved frame pointer if frame_pointer_needed - <- HARD_FRAME_POINTER - [saved regs] - - [padding0] - - [saved SSE regs] - - [padding1] \ - ) - [va_arg registers] ( - > to_allocate <- FRAME_POINTER - [frame] ( - ) - [padding2] / - */ -struct ix86_frame -{ - int padding0; - int nsseregs; - int nregs; - int padding1; - int va_arg_size; - HOST_WIDE_INT frame; - int padding2; - int outgoing_arguments_size; - int red_zone_size; - - HOST_WIDE_INT to_allocate; - /* The offsets relative to ARG_POINTER. */ - HOST_WIDE_INT frame_pointer_offset; - HOST_WIDE_INT hard_frame_pointer_offset; - HOST_WIDE_INT stack_pointer_offset; - - /* When save_regs_using_mov is set, emit prologue using - move instead of push instructions. */ - bool save_regs_using_mov; -}; - -/* Code model option. */ -enum cmodel ix86_cmodel; -/* Asm dialect. */ -enum asm_dialect ix86_asm_dialect = ASM_ATT; -/* TLS dialects. */ -enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU; - -/* Which unit we are generating floating point math for. */ -enum fpmath_unit ix86_fpmath; - -/* Which cpu are we scheduling for. */ -enum attr_cpu ix86_schedule; - -/* Which cpu are we optimizing for. */ -enum processor_type ix86_tune; - -/* Which instruction set architecture to use. */ -enum processor_type ix86_arch; - -/* true if sse prefetch instruction is not NOOP. */ -int x86_prefetch_sse; - -/* ix86_regparm_string as a number */ -static int ix86_regparm; - -/* -mstackrealign option */ -extern int ix86_force_align_arg_pointer; -static const char ix86_force_align_arg_pointer_string[] - = "force_align_arg_pointer"; - -/* Stack protector option. */ -enum stack_protector_guard ix86_stack_protector_guard; - -static rtx (*ix86_gen_leave) (void); -static rtx (*ix86_gen_pop1) (rtx); -static rtx (*ix86_gen_add3) (rtx, rtx, rtx); -static rtx (*ix86_gen_sub3) (rtx, rtx, rtx); -static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx); -static rtx (*ix86_gen_one_cmpl2) (rtx, rtx); -static rtx (*ix86_gen_monitor) (rtx, rtx, rtx); -static rtx (*ix86_gen_andsp) (rtx, rtx, rtx); - -/* Preferred alignment for stack boundary in bits. */ -unsigned int ix86_preferred_stack_boundary; - -/* Alignment for incoming stack boundary in bits specified at - command line. */ -static unsigned int ix86_user_incoming_stack_boundary; - -/* Default alignment for incoming stack boundary in bits. */ -static unsigned int ix86_default_incoming_stack_boundary; - -/* Alignment for incoming stack boundary in bits. */ -unsigned int ix86_incoming_stack_boundary; - -/* The abi used by target. */ -enum calling_abi ix86_abi; - -/* Values 1-5: see jump.c */ -int ix86_branch_cost; - -/* Calling abi specific va_list type nodes. */ -static GTY(()) tree sysv_va_list_type_node; -static GTY(()) tree ms_va_list_type_node; - -/* Variables which are this size or smaller are put in the data/bss - or ldata/lbss sections. */ - -int ix86_section_threshold = 65536; - -/* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */ -char internal_label_prefix[16]; -int internal_label_prefix_len; - -/* Fence to use after loop using movnt. */ -tree x86_mfence; - -/* Register class used for passing given 64bit part of the argument. - These represent classes as documented by the PS ABI, with the exception - of SSESF, SSEDF classes, that are basically SSE class, just gcc will - use SF or DFmode move instead of DImode to avoid reformatting penalties. - - Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves - whenever possible (upper half does contain padding). */ -enum x86_64_reg_class - { - X86_64_NO_CLASS, - X86_64_INTEGER_CLASS, - X86_64_INTEGERSI_CLASS, - X86_64_SSE_CLASS, - X86_64_SSESF_CLASS, - X86_64_SSEDF_CLASS, - X86_64_SSEUP_CLASS, - X86_64_X87_CLASS, - X86_64_X87UP_CLASS, - X86_64_COMPLEX_X87_CLASS, - X86_64_MEMORY_CLASS - }; - -#define MAX_CLASSES 4 - -/* Table of constants used by fldpi, fldln2, etc.... */ -static REAL_VALUE_TYPE ext_80387_constants_table [5]; -static bool ext_80387_constants_init = 0; - - -static struct machine_function * ix86_init_machine_status (void); -static rtx ix86_function_value (const_tree, const_tree, bool); -static int ix86_function_regparm (const_tree, const_tree); -static void ix86_compute_frame_layout (struct ix86_frame *); -static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode, - rtx, rtx, int); -static void ix86_add_new_builtins (int); - -enum ix86_function_specific_strings -{ - IX86_FUNCTION_SPECIFIC_ARCH, - IX86_FUNCTION_SPECIFIC_TUNE, - IX86_FUNCTION_SPECIFIC_FPMATH, - IX86_FUNCTION_SPECIFIC_MAX -}; - -static char *ix86_target_string (int, int, const char *, const char *, - const char *, bool); -static void ix86_debug_options (void) ATTRIBUTE_UNUSED; -static void ix86_function_specific_save (struct cl_target_option *); -static void ix86_function_specific_restore (struct cl_target_option *); -static void ix86_function_specific_print (FILE *, int, - struct cl_target_option *); -static bool ix86_valid_target_attribute_p (tree, tree, tree, int); -static bool ix86_valid_target_attribute_inner_p (tree, char *[]); -static bool ix86_can_inline_p (tree, tree); -static void ix86_set_current_function (tree); -static unsigned int ix86_minimum_incoming_stack_boundary (bool); - -static enum calling_abi ix86_function_abi (const_tree); - - -/* The svr4 ABI for the i386 says that records and unions are returned - in memory. */ -#ifndef DEFAULT_PCC_STRUCT_RETURN -#define DEFAULT_PCC_STRUCT_RETURN 1 -#endif - -/* Whether -mtune= or -march= were specified */ -static int ix86_tune_defaulted; -static int ix86_arch_specified; - -/* Bit flags that specify the ISA we are compiling for. */ -int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT; - -/* A mask of ix86_isa_flags that includes bit X if X - was set or cleared on the command line. */ -static int ix86_isa_flags_explicit; - -/* Define a set of ISAs which are available when a given ISA is - enabled. MMX and SSE ISAs are handled separately. */ - -#define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX -#define OPTION_MASK_ISA_3DNOW_SET \ - (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET) - -#define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE -#define OPTION_MASK_ISA_SSE2_SET \ - (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET) -#define OPTION_MASK_ISA_SSE3_SET \ - (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET) -#define OPTION_MASK_ISA_SSSE3_SET \ - (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET) -#define OPTION_MASK_ISA_SSE4_1_SET \ - (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET) -#define OPTION_MASK_ISA_SSE4_2_SET \ - (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET) -#define OPTION_MASK_ISA_AVX_SET \ - (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_SSE4_2_SET) -#define OPTION_MASK_ISA_FMA_SET \ - (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_AVX_SET) - -/* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same - as -msse4.2. */ -#define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET - -#define OPTION_MASK_ISA_SSE4A_SET \ - (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET) -#define OPTION_MASK_ISA_SSE5_SET \ - (OPTION_MASK_ISA_SSE5 | OPTION_MASK_ISA_SSE4A_SET) -#define OPTION_MASK_ISA_LWP_SET \ - OPTION_MASK_ISA_LWP - -/* AES and PCLMUL need SSE2 because they use xmm registers */ -#define OPTION_MASK_ISA_AES_SET \ - (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET) -#define OPTION_MASK_ISA_PCLMUL_SET \ - (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET) - -#define OPTION_MASK_ISA_ABM_SET \ - (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT) - -#define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT -#define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16 -#define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF -#define OPTION_MASK_ISA_MOVBE_SET OPTION_MASK_ISA_MOVBE - -/* Define a set of ISAs which aren't available when a given ISA is - disabled. MMX and SSE ISAs are handled separately. */ - -#define OPTION_MASK_ISA_MMX_UNSET \ - (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET) -#define OPTION_MASK_ISA_3DNOW_UNSET \ - (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET) -#define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A - -#define OPTION_MASK_ISA_SSE_UNSET \ - (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET) -#define OPTION_MASK_ISA_SSE2_UNSET \ - (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET) -#define OPTION_MASK_ISA_SSE3_UNSET \ - (OPTION_MASK_ISA_SSE3 \ - | OPTION_MASK_ISA_SSSE3_UNSET \ - | OPTION_MASK_ISA_SSE4A_UNSET ) -#define OPTION_MASK_ISA_SSSE3_UNSET \ - (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET) -#define OPTION_MASK_ISA_SSE4_1_UNSET \ - (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET) -#define OPTION_MASK_ISA_SSE4_2_UNSET \ - (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET ) -#define OPTION_MASK_ISA_AVX_UNSET \ - (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET) -#define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA - -/* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same - as -mno-sse4.1. */ -#define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET - -#define OPTION_MASK_ISA_SSE4A_UNSET \ - (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE5_UNSET) -#define OPTION_MASK_ISA_SSE5_UNSET OPTION_MASK_ISA_SSE5 -#define OPTION_MASK_ISA_LWP_UNSET OPTION_MASK_ISA_LWP -#define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES -#define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL -#define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM -#define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT -#define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16 -#define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF -#define OPTION_MASK_ISA_MOVBE_UNSET OPTION_MASK_ISA_MOVBE - -/* Vectorization library interface and handlers. */ -tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL; -static tree ix86_veclibabi_svml (enum built_in_function, tree, tree); -static tree ix86_veclibabi_acml (enum built_in_function, tree, tree); - -/* Processor target table, indexed by processor number */ -struct ptt -{ - const struct processor_costs *cost; /* Processor costs */ - const int align_loop; /* Default alignments. */ - const int align_loop_max_skip; - const int align_jump; - const int align_jump_max_skip; - const int align_func; -}; - -static const struct ptt processor_target_table[PROCESSOR_max] = -{ - {&i386_cost, 4, 3, 4, 3, 4}, - {&i486_cost, 16, 15, 16, 15, 16}, - {&pentium_cost, 16, 7, 16, 7, 16}, - {&pentiumpro_cost, 16, 15, 16, 10, 16}, - {&geode_cost, 0, 0, 0, 0, 0}, - {&k6_cost, 32, 7, 32, 7, 32}, - {&athlon_cost, 16, 7, 16, 7, 16}, - {&pentium4_cost, 0, 0, 0, 0, 0}, - {&k8_cost, 16, 7, 16, 7, 16}, - {&nocona_cost, 0, 0, 0, 0, 0}, - {&core2_cost, 16, 10, 16, 10, 16}, - {&generic32_cost, 16, 7, 16, 7, 16}, - {&generic64_cost, 16, 10, 16, 10, 16}, - {&amdfam10_cost, 32, 24, 32, 7, 32}, - {&atom_cost, 16, 7, 16, 7, 16} -}; - -static const char *const cpu_names[TARGET_CPU_DEFAULT_max] = -{ - "generic", - "i386", - "i486", - "pentium", - "pentium-mmx", - "pentiumpro", - "pentium2", - "pentium3", - "pentium4", - "pentium-m", - "prescott", - "nocona", - "core2", - "atom", - "geode", - "k6", - "k6-2", - "k6-3", - "athlon", - "athlon-4", - "k8", - "amdfam10" -}; - -/* Implement TARGET_HANDLE_OPTION. */ - -static bool -ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value) -{ - switch (code) - { - case OPT_mmmx: - if (value) - { - ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET; - ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET; - } - else - { - ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET; - ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET; - } - return true; - - case OPT_m3dnow: - if (value) - { - ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET; - ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET; - } - else - { - ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET; - ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET; - } - return true; - - case OPT_m3dnowa: - return false; - - case OPT_msse: - if (value) - { - ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET; - ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET; - } - else - { - ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET; - ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET; - } - return true; - - case OPT_msse2: - if (value) - { - ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET; - ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET; - } - else - { - ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET; - ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET; - } - return true; - - case OPT_msse3: - if (value) - { - ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET; - ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET; - } - else - { - ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET; - ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET; - } - return true; - - case OPT_mssse3: - if (value) - { - ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET; - ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET; - } - else - { - ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET; - ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET; - } - return true; - - case OPT_msse4_1: - if (value) - { - ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET; - ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET; - } - else - { - ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET; - ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET; - } - return true; - - case OPT_msse4_2: - if (value) - { - ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET; - ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET; - } - else - { - ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET; - ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET; - } - return true; - - case OPT_mavx: - if (value) - { - ix86_isa_flags |= OPTION_MASK_ISA_AVX_SET; - ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_SET; - } - else - { - ix86_isa_flags &= ~OPTION_MASK_ISA_AVX_UNSET; - ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_UNSET; - } - return true; - - case OPT_mfma: - if (value) - { - ix86_isa_flags |= OPTION_MASK_ISA_FMA_SET; - ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_SET; - } - else - { - ix86_isa_flags &= ~OPTION_MASK_ISA_FMA_UNSET; - ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_UNSET; - } - return true; - - case OPT_msse4: - ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET; - ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET; - return true; - - case OPT_mno_sse4: - ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET; - ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET; - return true; - - case OPT_msse4a: - if (value) - { - ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET; - ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET; - } - else - { - ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET; - ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET; - } - return true; - - case OPT_msse5: - if (value) - { - ix86_isa_flags |= OPTION_MASK_ISA_SSE5_SET; - ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_SET; - } - else - { - ix86_isa_flags &= ~OPTION_MASK_ISA_SSE5_UNSET; - ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_UNSET; - } - return true; - - case OPT_mlwp: - if (value) - { - ix86_isa_flags |= OPTION_MASK_ISA_LWP_SET; - ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_SET; - } - else - { - ix86_isa_flags &= ~OPTION_MASK_ISA_LWP_UNSET; - ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_UNSET; - } - return true; - - case OPT_mabm: - if (value) - { - ix86_isa_flags |= OPTION_MASK_ISA_ABM_SET; - ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_SET; - } - else - { - ix86_isa_flags &= ~OPTION_MASK_ISA_ABM_UNSET; - ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_UNSET; - } - return true; - - case OPT_mpopcnt: - if (value) - { - ix86_isa_flags |= OPTION_MASK_ISA_POPCNT_SET; - ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_SET; - } - else - { - ix86_isa_flags &= ~OPTION_MASK_ISA_POPCNT_UNSET; - ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_UNSET; - } - return true; - - case OPT_msahf: - if (value) - { - ix86_isa_flags |= OPTION_MASK_ISA_SAHF_SET; - ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_SET; - } - else - { - ix86_isa_flags &= ~OPTION_MASK_ISA_SAHF_UNSET; - ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_UNSET; - } - return true; - - case OPT_mcx16: - if (value) - { - ix86_isa_flags |= OPTION_MASK_ISA_CX16_SET; - ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_SET; - } - else - { - ix86_isa_flags &= ~OPTION_MASK_ISA_CX16_UNSET; - ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_UNSET; - } - return true; - - case OPT_mmovbe: - if (value) - { - ix86_isa_flags |= OPTION_MASK_ISA_MOVBE_SET; - ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_SET; - } - else - { - ix86_isa_flags &= ~OPTION_MASK_ISA_MOVBE_UNSET; - ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_UNSET; - } - return true; - - case OPT_maes: - if (value) - { - ix86_isa_flags |= OPTION_MASK_ISA_AES_SET; - ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_SET; - } - else - { - ix86_isa_flags &= ~OPTION_MASK_ISA_AES_UNSET; - ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_UNSET; - } - return true; - - case OPT_mpclmul: - if (value) - { - ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL_SET; - ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_SET; - } - else - { - ix86_isa_flags &= ~OPTION_MASK_ISA_PCLMUL_UNSET; - ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_UNSET; - } - return true; - - default: - return true; - } -} - -/* Return a string the documents the current -m options. The caller is - responsible for freeing the string. */ - -static char * -ix86_target_string (int isa, int flags, const char *arch, const char *tune, - const char *fpmath, bool add_nl_p) -{ - struct ix86_target_opts - { - const char *option; /* option string */ - int mask; /* isa mask options */ - }; - - /* This table is ordered so that options like -msse5 or -msse4.2 that imply - preceding options while match those first. */ - static struct ix86_target_opts isa_opts[] = - { - { "-m64", OPTION_MASK_ISA_64BIT }, - { "-msse5", OPTION_MASK_ISA_SSE5 }, - { "-mlwp", OPTION_MASK_ISA_LWP }, - { "-msse4a", OPTION_MASK_ISA_SSE4A }, - { "-msse4.2", OPTION_MASK_ISA_SSE4_2 }, - { "-msse4.1", OPTION_MASK_ISA_SSE4_1 }, - { "-mssse3", OPTION_MASK_ISA_SSSE3 }, - { "-msse3", OPTION_MASK_ISA_SSE3 }, - { "-msse2", OPTION_MASK_ISA_SSE2 }, - { "-msse", OPTION_MASK_ISA_SSE }, - { "-m3dnow", OPTION_MASK_ISA_3DNOW }, - { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A }, - { "-mmmx", OPTION_MASK_ISA_MMX }, - { "-mabm", OPTION_MASK_ISA_ABM }, - { "-mpopcnt", OPTION_MASK_ISA_POPCNT }, - { "-mmovbe", OPTION_MASK_ISA_MOVBE }, - { "-maes", OPTION_MASK_ISA_AES }, - { "-mpclmul", OPTION_MASK_ISA_PCLMUL }, - }; - - /* Flag options. */ - static struct ix86_target_opts flag_opts[] = - { - { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE }, - { "-m80387", MASK_80387 }, - { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS }, - { "-malign-double", MASK_ALIGN_DOUBLE }, - { "-mcld", MASK_CLD }, - { "-mfp-ret-in-387", MASK_FLOAT_RETURNS }, - { "-mieee-fp", MASK_IEEE_FP }, - { "-minline-compares", MASK_INLINE_COMPARES }, - { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS }, - { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY }, - { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT }, - { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS }, - { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 }, - { "-mno-fused-madd", MASK_NO_FUSED_MADD }, - { "-mno-push-args", MASK_NO_PUSH_ARGS }, - { "-mno-red-zone", MASK_NO_RED_ZONE }, - { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER }, - { "-mrecip", MASK_RECIP }, - { "-mrtd", MASK_RTD }, - { "-msseregparm", MASK_SSEREGPARM }, - { "-mstack-arg-probe", MASK_STACK_PROBE }, - { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS }, - }; - - const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2]; - - char isa_other[40]; - char target_other[40]; - unsigned num = 0; - unsigned i, j; - char *ret; - char *ptr; - size_t len; - size_t line_len; - size_t sep_len; - - memset (opts, '\0', sizeof (opts)); - - /* Add -march= option. */ - if (arch) - { - opts[num][0] = "-march="; - opts[num++][1] = arch; - } - - /* Add -mtune= option. */ - if (tune) - { - opts[num][0] = "-mtune="; - opts[num++][1] = tune; - } - - /* Pick out the options in isa options. */ - for (i = 0; i < ARRAY_SIZE (isa_opts); i++) - { - if ((isa & isa_opts[i].mask) != 0) - { - opts[num++][0] = isa_opts[i].option; - isa &= ~ isa_opts[i].mask; - } - } - - if (isa && add_nl_p) - { - opts[num++][0] = isa_other; - sprintf (isa_other, "(other isa: 0x%x)", isa); - } - - /* Add flag options. */ - for (i = 0; i < ARRAY_SIZE (flag_opts); i++) - { - if ((flags & flag_opts[i].mask) != 0) - { - opts[num++][0] = flag_opts[i].option; - flags &= ~ flag_opts[i].mask; - } - } - - if (flags && add_nl_p) - { - opts[num++][0] = target_other; - sprintf (target_other, "(other flags: 0x%x)", isa); - } - - /* Add -fpmath= option. */ - if (fpmath) - { - opts[num][0] = "-mfpmath="; - opts[num++][1] = fpmath; - } - - /* Any options? */ - if (num == 0) - return NULL; - - gcc_assert (num < ARRAY_SIZE (opts)); - - /* Size the string. */ - len = 0; - sep_len = (add_nl_p) ? 3 : 1; - for (i = 0; i < num; i++) - { - len += sep_len; - for (j = 0; j < 2; j++) - if (opts[i][j]) - len += strlen (opts[i][j]); - } - - /* Build the string. */ - ret = ptr = (char *) xmalloc (len); - line_len = 0; - - for (i = 0; i < num; i++) - { - size_t len2[2]; - - for (j = 0; j < 2; j++) - len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0; - - if (i != 0) - { - *ptr++ = ' '; - line_len++; - - if (add_nl_p && line_len + len2[0] + len2[1] > 70) - { - *ptr++ = '\\'; - *ptr++ = '\n'; - line_len = 0; - } - } - - for (j = 0; j < 2; j++) - if (opts[i][j]) - { - memcpy (ptr, opts[i][j], len2[j]); - ptr += len2[j]; - line_len += len2[j]; - } - } - - *ptr = '\0'; - gcc_assert (ret + len >= ptr); - - return ret; -} - -/* Function that is callable from the debugger to print the current - options. */ -void -ix86_debug_options (void) -{ - char *opts = ix86_target_string (ix86_isa_flags, target_flags, - ix86_arch_string, ix86_tune_string, - ix86_fpmath_string, true); - - if (opts) - { - fprintf (stderr, "%s\n\n", opts); - free (opts); - } - else - fprintf (stderr, "<no options>\n\n"); - - return; -} - -/* Sometimes certain combinations of command options do not make - sense on a particular target machine. You can define a macro - `OVERRIDE_OPTIONS' to take account of this. This macro, if - defined, is executed once just after all the command options have - been parsed. - - Don't use this macro to turn on various extra optimizations for - `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */ - -void -override_options (bool main_args_p) -{ - int i; - unsigned int ix86_arch_mask, ix86_tune_mask; - const char *prefix; - const char *suffix; - const char *sw; - - /* Comes from final.c -- no real reason to change it. */ -#define MAX_CODE_ALIGN 16 - - enum pta_flags - { - PTA_SSE = 1 << 0, - PTA_SSE2 = 1 << 1, - PTA_SSE3 = 1 << 2, - PTA_MMX = 1 << 3, - PTA_PREFETCH_SSE = 1 << 4, - PTA_3DNOW = 1 << 5, - PTA_3DNOW_A = 1 << 6, - PTA_64BIT = 1 << 7, - PTA_SSSE3 = 1 << 8, - PTA_CX16 = 1 << 9, - PTA_POPCNT = 1 << 10, - PTA_ABM = 1 << 11, - PTA_SSE4A = 1 << 12, - PTA_NO_SAHF = 1 << 13, - PTA_SSE4_1 = 1 << 14, - PTA_SSE4_2 = 1 << 15, - PTA_SSE5 = 1 << 16, - PTA_AES = 1 << 17, - PTA_PCLMUL = 1 << 18, - PTA_AVX = 1 << 19, - PTA_FMA = 1 << 20, - PTA_LWP = 1 << 21, - PTA_MOVBE = 1 << 22 - }; - - static struct pta - { - const char *const name; /* processor name or nickname. */ - const enum processor_type processor; - const enum attr_cpu schedule; - const unsigned /*enum pta_flags*/ flags; - } - const processor_alias_table[] = - { - {"i386", PROCESSOR_I386, CPU_NONE, 0}, - {"i486", PROCESSOR_I486, CPU_NONE, 0}, - {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0}, - {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0}, - {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX}, - {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX}, - {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW}, - {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW}, - {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE}, - {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0}, - {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0}, - {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX}, - {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, - PTA_MMX | PTA_SSE}, - {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, - PTA_MMX | PTA_SSE}, - {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, - PTA_MMX | PTA_SSE | PTA_SSE2}, - {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE, - PTA_MMX |PTA_SSE | PTA_SSE2}, - {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE, - PTA_MMX | PTA_SSE | PTA_SSE2}, - {"prescott", PROCESSOR_NOCONA, CPU_NONE, - PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3}, - {"nocona", PROCESSOR_NOCONA, CPU_NONE, - PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 - | PTA_CX16 | PTA_NO_SAHF}, - {"core2", PROCESSOR_CORE2, CPU_CORE2, - PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 - | PTA_SSSE3 | PTA_CX16}, - {"atom", PROCESSOR_ATOM, CPU_ATOM, - PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 - | PTA_SSSE3 | PTA_CX16 | PTA_MOVBE}, - {"geode", PROCESSOR_GEODE, CPU_GEODE, - PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE}, - {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX}, - {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW}, - {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW}, - {"athlon", PROCESSOR_ATHLON, CPU_ATHLON, - PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE}, - {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON, - PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE}, - {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON, - PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE}, - {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON, - PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE}, - {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON, - PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE}, - {"x86-64", PROCESSOR_K8, CPU_K8, - PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF}, - {"k8", PROCESSOR_K8, CPU_K8, - PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE - | PTA_SSE2 | PTA_NO_SAHF}, - {"k8-sse3", PROCESSOR_K8, CPU_K8, - PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE - | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF}, - {"opteron", PROCESSOR_K8, CPU_K8, - PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE - | PTA_SSE2 | PTA_NO_SAHF}, - {"opteron-sse3", PROCESSOR_K8, CPU_K8, - PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE - | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF}, - {"athlon64", PROCESSOR_K8, CPU_K8, - PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE - | PTA_SSE2 | PTA_NO_SAHF}, - {"athlon64-sse3", PROCESSOR_K8, CPU_K8, - PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE - | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF}, - {"athlon-fx", PROCESSOR_K8, CPU_K8, - PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE - | PTA_SSE2 | PTA_NO_SAHF}, - {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10, - PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE - | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM}, - {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10, - PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE - | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM}, - {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO, - 0 /* flags are only used for -march switch. */ }, - {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64, - PTA_64BIT /* flags are only used for -march switch. */ }, - }; - - int const pta_size = ARRAY_SIZE (processor_alias_table); - - /* Set up prefix/suffix so the error messages refer to either the command - line argument, or the attribute(target). */ - if (main_args_p) - { - prefix = "-m"; - suffix = ""; - sw = "switch"; - } - else - { - prefix = "option(\""; - suffix = "\")"; - sw = "attribute"; - } - -#ifdef SUBTARGET_OVERRIDE_OPTIONS - SUBTARGET_OVERRIDE_OPTIONS; -#endif - -#ifdef SUBSUBTARGET_OVERRIDE_OPTIONS - SUBSUBTARGET_OVERRIDE_OPTIONS; -#endif - - /* -fPIC is the default for x86_64. */ - if (TARGET_MACHO && TARGET_64BIT) - flag_pic = 2; - - /* Set the default values for switches whose default depends on TARGET_64BIT - in case they weren't overwritten by command line options. */ - if (TARGET_64BIT) - { - /* Mach-O doesn't support omitting the frame pointer for now. */ - if (flag_omit_frame_pointer == 2) - flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1); - if (flag_asynchronous_unwind_tables == 2) - flag_asynchronous_unwind_tables = 1; - if (flag_pcc_struct_return == 2) - flag_pcc_struct_return = 0; - } - else - { - if (flag_omit_frame_pointer == 2) - flag_omit_frame_pointer = 0; - if (flag_asynchronous_unwind_tables == 2) - flag_asynchronous_unwind_tables = 0; - if (flag_pcc_struct_return == 2) - flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN; - } - - /* Need to check -mtune=generic first. */ - if (ix86_tune_string) - { - if (!strcmp (ix86_tune_string, "generic") - || !strcmp (ix86_tune_string, "i686") - /* As special support for cross compilers we read -mtune=native - as -mtune=generic. With native compilers we won't see the - -mtune=native, as it was changed by the driver. */ - || !strcmp (ix86_tune_string, "native")) - { - if (TARGET_64BIT) - ix86_tune_string = "generic64"; - else - ix86_tune_string = "generic32"; - } - /* If this call is for setting the option attribute, allow the - generic32/generic64 that was previously set. */ - else if (!main_args_p - && (!strcmp (ix86_tune_string, "generic32") - || !strcmp (ix86_tune_string, "generic64"))) - ; - else if (!strncmp (ix86_tune_string, "generic", 7)) - error ("bad value (%s) for %stune=%s %s", - ix86_tune_string, prefix, suffix, sw); - } - else - { - if (ix86_arch_string) - ix86_tune_string = ix86_arch_string; - if (!ix86_tune_string) - { - ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT]; - ix86_tune_defaulted = 1; - } - - /* ix86_tune_string is set to ix86_arch_string or defaulted. We - need to use a sensible tune option. */ - if (!strcmp (ix86_tune_string, "generic") - || !strcmp (ix86_tune_string, "x86-64") - || !strcmp (ix86_tune_string, "i686")) - { - if (TARGET_64BIT) - ix86_tune_string = "generic64"; - else - ix86_tune_string = "generic32"; - } - } - if (ix86_stringop_string) - { - if (!strcmp (ix86_stringop_string, "rep_byte")) - stringop_alg = rep_prefix_1_byte; - else if (!strcmp (ix86_stringop_string, "libcall")) - stringop_alg = libcall; - else if (!strcmp (ix86_stringop_string, "rep_4byte")) - stringop_alg = rep_prefix_4_byte; - else if (!strcmp (ix86_stringop_string, "rep_8byte") - && TARGET_64BIT) - /* rep; movq isn't available in 32-bit code. */ - stringop_alg = rep_prefix_8_byte; - else if (!strcmp (ix86_stringop_string, "byte_loop")) - stringop_alg = loop_1_byte; - else if (!strcmp (ix86_stringop_string, "loop")) - stringop_alg = loop; - else if (!strcmp (ix86_stringop_string, "unrolled_loop")) - stringop_alg = unrolled_loop; - else - error ("bad value (%s) for %sstringop-strategy=%s %s", - ix86_stringop_string, prefix, suffix, sw); - } - if (!strcmp (ix86_tune_string, "x86-64")) - warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated. Use " - "%stune=k8%s or %stune=generic%s instead as appropriate.", - prefix, suffix, prefix, suffix, prefix, suffix); - - if (!ix86_arch_string) - ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386"; - else - ix86_arch_specified = 1; - - if (!strcmp (ix86_arch_string, "generic")) - error ("generic CPU can be used only for %stune=%s %s", - prefix, suffix, sw); - if (!strncmp (ix86_arch_string, "generic", 7)) - error ("bad value (%s) for %sarch=%s %s", - ix86_arch_string, prefix, suffix, sw); - - /* Validate -mabi= value. */ - if (ix86_abi_string) - { - if (strcmp (ix86_abi_string, "sysv") == 0) - ix86_abi = SYSV_ABI; - else if (strcmp (ix86_abi_string, "ms") == 0) - ix86_abi = MS_ABI; - else - error ("unknown ABI (%s) for %sabi=%s %s", - ix86_abi_string, prefix, suffix, sw); - } - else - ix86_abi = DEFAULT_ABI; - - if (ix86_cmodel_string != 0) - { - if (!strcmp (ix86_cmodel_string, "small")) - ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL; - else if (!strcmp (ix86_cmodel_string, "medium")) - ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM; - else if (!strcmp (ix86_cmodel_string, "large")) - ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE; - else if (flag_pic) - error ("code model %s does not support PIC mode", ix86_cmodel_string); - else if (!strcmp (ix86_cmodel_string, "32")) - ix86_cmodel = CM_32; - else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic) - ix86_cmodel = CM_KERNEL; - else - error ("bad value (%s) for %scmodel=%s %s", - ix86_cmodel_string, prefix, suffix, sw); - } - else - { - /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the - use of rip-relative addressing. This eliminates fixups that - would otherwise be needed if this object is to be placed in a - DLL, and is essentially just as efficient as direct addressing. */ - if (TARGET_64BIT && DEFAULT_ABI == MS_ABI) - ix86_cmodel = CM_SMALL_PIC, flag_pic = 1; - else if (TARGET_64BIT) - ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL; - else - ix86_cmodel = CM_32; - } - if (ix86_asm_string != 0) - { - if (! TARGET_MACHO - && !strcmp (ix86_asm_string, "intel")) - ix86_asm_dialect = ASM_INTEL; - else if (!strcmp (ix86_asm_string, "att")) - ix86_asm_dialect = ASM_ATT; - else - error ("bad value (%s) for %sasm=%s %s", - ix86_asm_string, prefix, suffix, sw); - } - if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32)) - error ("code model %qs not supported in the %s bit mode", - ix86_cmodel_string, TARGET_64BIT ? "64" : "32"); - if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0)) - sorry ("%i-bit mode not compiled in", - (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32); - - for (i = 0; i < pta_size; i++) - if (! strcmp (ix86_arch_string, processor_alias_table[i].name)) - { - ix86_schedule = processor_alias_table[i].schedule; - ix86_arch = processor_alias_table[i].processor; - /* Default cpu tuning to the architecture. */ - ix86_tune = ix86_arch; - - if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT)) - error ("CPU you selected does not support x86-64 " - "instruction set"); - - if (processor_alias_table[i].flags & PTA_MMX - && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX)) - ix86_isa_flags |= OPTION_MASK_ISA_MMX; - if (processor_alias_table[i].flags & PTA_3DNOW - && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW)) - ix86_isa_flags |= OPTION_MASK_ISA_3DNOW; - if (processor_alias_table[i].flags & PTA_3DNOW_A - && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A)) - ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A; - if (processor_alias_table[i].flags & PTA_SSE - && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE)) - ix86_isa_flags |= OPTION_MASK_ISA_SSE; - if (processor_alias_table[i].flags & PTA_SSE2 - && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2)) - ix86_isa_flags |= OPTION_MASK_ISA_SSE2; - if (processor_alias_table[i].flags & PTA_SSE3 - && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3)) - ix86_isa_flags |= OPTION_MASK_ISA_SSE3; - if (processor_alias_table[i].flags & PTA_SSSE3 - && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3)) - ix86_isa_flags |= OPTION_MASK_ISA_SSSE3; - if (processor_alias_table[i].flags & PTA_SSE4_1 - && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1)) - ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1; - if (processor_alias_table[i].flags & PTA_SSE4_2 - && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2)) - ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2; - if (processor_alias_table[i].flags & PTA_AVX - && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX)) - ix86_isa_flags |= OPTION_MASK_ISA_AVX; - if (processor_alias_table[i].flags & PTA_FMA - && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA)) - ix86_isa_flags |= OPTION_MASK_ISA_FMA; - if (processor_alias_table[i].flags & PTA_SSE4A - && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A)) - ix86_isa_flags |= OPTION_MASK_ISA_SSE4A; - if (processor_alias_table[i].flags & PTA_SSE5 - && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE5)) - ix86_isa_flags |= OPTION_MASK_ISA_SSE5; - if (processor_alias_table[i].flags & PTA_LWP - && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP)) - ix86_isa_flags |= OPTION_MASK_ISA_LWP; - if (processor_alias_table[i].flags & PTA_ABM - && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM)) - ix86_isa_flags |= OPTION_MASK_ISA_ABM; - if (processor_alias_table[i].flags & PTA_CX16 - && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16)) - ix86_isa_flags |= OPTION_MASK_ISA_CX16; - if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM) - && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT)) - ix86_isa_flags |= OPTION_MASK_ISA_POPCNT; - if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF)) - && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF)) - ix86_isa_flags |= OPTION_MASK_ISA_SAHF; - if (processor_alias_table[i].flags & PTA_MOVBE - && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE)) - ix86_isa_flags |= OPTION_MASK_ISA_MOVBE; - if (processor_alias_table[i].flags & PTA_AES - && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES)) - ix86_isa_flags |= OPTION_MASK_ISA_AES; - if (processor_alias_table[i].flags & PTA_PCLMUL - && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL)) - ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL; - if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)) - x86_prefetch_sse = true; - - break; - } - - if (i == pta_size) - error ("bad value (%s) for %sarch=%s %s", - ix86_arch_string, prefix, suffix, sw); - - ix86_arch_mask = 1u << ix86_arch; - for (i = 0; i < X86_ARCH_LAST; ++i) - ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask); - - for (i = 0; i < pta_size; i++) - if (! strcmp (ix86_tune_string, processor_alias_table[i].name)) - { - ix86_schedule = processor_alias_table[i].schedule; - ix86_tune = processor_alias_table[i].processor; - if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT)) - { - if (ix86_tune_defaulted) - { - ix86_tune_string = "x86-64"; - for (i = 0; i < pta_size; i++) - if (! strcmp (ix86_tune_string, - processor_alias_table[i].name)) - break; - ix86_schedule = processor_alias_table[i].schedule; - ix86_tune = processor_alias_table[i].processor; - } - else - error ("CPU you selected does not support x86-64 " - "instruction set"); - } - /* Intel CPUs have always interpreted SSE prefetch instructions as - NOPs; so, we can enable SSE prefetch instructions even when - -mtune (rather than -march) points us to a processor that has them. - However, the VIA C3 gives a SIGILL, so we only do that for i686 and - higher processors. */ - if (TARGET_CMOVE - && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))) - x86_prefetch_sse = true; - break; - } - if (i == pta_size) - error ("bad value (%s) for %stune=%s %s", - ix86_tune_string, prefix, suffix, sw); - - ix86_tune_mask = 1u << ix86_tune; - for (i = 0; i < X86_TUNE_LAST; ++i) - ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask); - - if (optimize_size) - ix86_cost = &ix86_size_cost; - else - ix86_cost = processor_target_table[ix86_tune].cost; - - /* Arrange to set up i386_stack_locals for all functions. */ - init_machine_status = ix86_init_machine_status; - - /* Validate -mregparm= value. */ - if (ix86_regparm_string) - { - if (TARGET_64BIT) - warning (0, "%sregparm%s is ignored in 64-bit mode", prefix, suffix); - i = atoi (ix86_regparm_string); - if (i < 0 || i > REGPARM_MAX) - error ("%sregparm=%d%s is not between 0 and %d", - prefix, i, suffix, REGPARM_MAX); - else - ix86_regparm = i; - } - if (TARGET_64BIT) - ix86_regparm = REGPARM_MAX; - - /* If the user has provided any of the -malign-* options, - warn and use that value only if -falign-* is not set. - Remove this code in GCC 3.2 or later. */ - if (ix86_align_loops_string) - { - warning (0, "%salign-loops%s is obsolete, use -falign-loops%s", - prefix, suffix, suffix); - if (align_loops == 0) - { - i = atoi (ix86_align_loops_string); - if (i < 0 || i > MAX_CODE_ALIGN) - error ("%salign-loops=%d%s is not between 0 and %d", - prefix, i, suffix, MAX_CODE_ALIGN); - else - align_loops = 1 << i; - } - } - - if (ix86_align_jumps_string) - { - warning (0, "%salign-jumps%s is obsolete, use -falign-jumps%s", - prefix, suffix, suffix); - if (align_jumps == 0) - { - i = atoi (ix86_align_jumps_string); - if (i < 0 || i > MAX_CODE_ALIGN) - error ("%salign-loops=%d%s is not between 0 and %d", - prefix, i, suffix, MAX_CODE_ALIGN); - else - align_jumps = 1 << i; - } - } - - if (ix86_align_funcs_string) - { - warning (0, "%salign-functions%s is obsolete, use -falign-functions%s", - prefix, suffix, suffix); - if (align_functions == 0) - { - i = atoi (ix86_align_funcs_string); - if (i < 0 || i > MAX_CODE_ALIGN) - error ("%salign-loops=%d%s is not between 0 and %d", - prefix, i, suffix, MAX_CODE_ALIGN); - else - align_functions = 1 << i; - } - } - - /* Default align_* from the processor table. */ - if (align_loops == 0) - { - align_loops = processor_target_table[ix86_tune].align_loop; - align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip; - } - if (align_jumps == 0) - { - align_jumps = processor_target_table[ix86_tune].align_jump; - align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip; - } - if (align_functions == 0) - { - align_functions = processor_target_table[ix86_tune].align_func; - } - - /* Validate -mbranch-cost= value, or provide default. */ - ix86_branch_cost = ix86_cost->branch_cost; - if (ix86_branch_cost_string) - { - i = atoi (ix86_branch_cost_string); - if (i < 0 || i > 5) - error ("%sbranch-cost=%d%s is not between 0 and 5", prefix, i, suffix); - else - ix86_branch_cost = i; - } - if (ix86_section_threshold_string) - { - i = atoi (ix86_section_threshold_string); - if (i < 0) - error ("%slarge-data-threshold=%d%s is negative", prefix, i, suffix); - else - ix86_section_threshold = i; - } - - if (ix86_tls_dialect_string) - { - if (strcmp (ix86_tls_dialect_string, "gnu") == 0) - ix86_tls_dialect = TLS_DIALECT_GNU; - else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0) - ix86_tls_dialect = TLS_DIALECT_GNU2; - else if (strcmp (ix86_tls_dialect_string, "sun") == 0) - ix86_tls_dialect = TLS_DIALECT_SUN; - else - error ("bad value (%s) for %stls-dialect=%s %s", - ix86_tls_dialect_string, prefix, suffix, sw); - } - - if (ix87_precision_string) - { - i = atoi (ix87_precision_string); - if (i != 32 && i != 64 && i != 80) - error ("pc%d is not valid precision setting (32, 64 or 80)", i); - } - - if (TARGET_64BIT) - { - target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit; - - /* Enable by default the SSE and MMX builtins. Do allow the user to - explicitly disable any of these. In particular, disabling SSE and - MMX for kernel code is extremely useful. */ - if (!ix86_arch_specified) - ix86_isa_flags - |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX - | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit); - - if (TARGET_RTD) - warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix); - } - else - { - target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit; - - if (!ix86_arch_specified) - ix86_isa_flags - |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit; - - /* i386 ABI does not specify red zone. It still makes sense to use it - when programmer takes care to stack from being destroyed. */ - if (!(target_flags_explicit & MASK_NO_RED_ZONE)) - target_flags |= MASK_NO_RED_ZONE; - } - - /* Keep nonleaf frame pointers. */ - if (flag_omit_frame_pointer) - target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER; - else if (TARGET_OMIT_LEAF_FRAME_POINTER) - flag_omit_frame_pointer = 1; - - /* If we're doing fast math, we don't care about comparison order - wrt NaNs. This lets us use a shorter comparison sequence. */ - if (flag_finite_math_only) - target_flags &= ~MASK_IEEE_FP; - - /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387, - since the insns won't need emulation. */ - if (x86_arch_always_fancy_math_387 & ix86_arch_mask) - target_flags &= ~MASK_NO_FANCY_MATH_387; - - /* Likewise, if the target doesn't have a 387, or we've specified - software floating point, don't use 387 inline intrinsics. */ - if (!TARGET_80387) - target_flags |= MASK_NO_FANCY_MATH_387; - - /* Turn on MMX builtins for -msse. */ - if (TARGET_SSE) - { - ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit; - x86_prefetch_sse = true; - } - - /* Turn on popcnt instruction for -msse4.2 or -mabm. */ - if (TARGET_SSE4_2 || TARGET_ABM) - ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit; - - /* Validate -mpreferred-stack-boundary= value or default it to - PREFERRED_STACK_BOUNDARY_DEFAULT. */ - ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT; - if (ix86_preferred_stack_boundary_string) - { - i = atoi (ix86_preferred_stack_boundary_string); - if (i < (TARGET_64BIT ? 4 : 2) || i > 12) - error ("%spreferred-stack-boundary=%d%s is not between %d and 12", - prefix, i, suffix, TARGET_64BIT ? 4 : 2); - else - ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT; - } - - /* Set the default value for -mstackrealign. */ - if (ix86_force_align_arg_pointer == -1) - ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT; - - ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY; - - /* Validate -mincoming-stack-boundary= value or default it to - MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */ - ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary; - if (ix86_incoming_stack_boundary_string) - { - i = atoi (ix86_incoming_stack_boundary_string); - if (i < (TARGET_64BIT ? 4 : 2) || i > 12) - error ("-mincoming-stack-boundary=%d is not between %d and 12", - i, TARGET_64BIT ? 4 : 2); - else - { - ix86_user_incoming_stack_boundary = (1 << i) * BITS_PER_UNIT; - ix86_incoming_stack_boundary - = ix86_user_incoming_stack_boundary; - } - } - - /* Accept -msseregparm only if at least SSE support is enabled. */ - if (TARGET_SSEREGPARM - && ! TARGET_SSE) - error ("%ssseregparm%s used without SSE enabled", prefix, suffix); - - ix86_fpmath = TARGET_FPMATH_DEFAULT; - if (ix86_fpmath_string != 0) - { - if (! strcmp (ix86_fpmath_string, "387")) - ix86_fpmath = FPMATH_387; - else if (! strcmp (ix86_fpmath_string, "sse")) - { - if (!TARGET_SSE) - { - warning (0, "SSE instruction set disabled, using 387 arithmetics"); - ix86_fpmath = FPMATH_387; - } - else - ix86_fpmath = FPMATH_SSE; - } - else if (! strcmp (ix86_fpmath_string, "387,sse") - || ! strcmp (ix86_fpmath_string, "387+sse") - || ! strcmp (ix86_fpmath_string, "sse,387") - || ! strcmp (ix86_fpmath_string, "sse+387") - || ! strcmp (ix86_fpmath_string, "both")) - { - if (!TARGET_SSE) - { - warning (0, "SSE instruction set disabled, using 387 arithmetics"); - ix86_fpmath = FPMATH_387; - } - else if (!TARGET_80387) - { - warning (0, "387 instruction set disabled, using SSE arithmetics"); - ix86_fpmath = FPMATH_SSE; - } - else - ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387); - } - else - error ("bad value (%s) for %sfpmath=%s %s", - ix86_fpmath_string, prefix, suffix, sw); - } - - /* If the i387 is disabled, then do not return values in it. */ - if (!TARGET_80387) - target_flags &= ~MASK_FLOAT_RETURNS; - - /* Use external vectorized library in vectorizing intrinsics. */ - if (ix86_veclibabi_string) - { - if (strcmp (ix86_veclibabi_string, "svml") == 0) - ix86_veclib_handler = ix86_veclibabi_svml; - else if (strcmp (ix86_veclibabi_string, "acml") == 0) - ix86_veclib_handler = ix86_veclibabi_acml; - else - error ("unknown vectorization library ABI type (%s) for " - "%sveclibabi=%s %s", ix86_veclibabi_string, - prefix, suffix, sw); - } - - if ((x86_accumulate_outgoing_args & ix86_tune_mask) - && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS) - && !optimize_size) - target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS; - - /* ??? Unwind info is not correct around the CFG unless either a frame - pointer is present or M_A_O_A is set. Fixing this requires rewriting - unwind info generation to be aware of the CFG and propagating states - around edges. */ - if ((flag_unwind_tables || flag_asynchronous_unwind_tables - || flag_exceptions || flag_non_call_exceptions) - && flag_omit_frame_pointer - && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)) - { - if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS) - warning (0, "unwind tables currently require either a frame pointer " - "or %saccumulate-outgoing-args%s for correctness", - prefix, suffix); - target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS; - } - - /* If stack probes are required, the space used for large function - arguments on the stack must also be probed, so enable - -maccumulate-outgoing-args so this happens in the prologue. */ - if (TARGET_STACK_PROBE - && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)) - { - if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS) - warning (0, "stack probing requires %saccumulate-outgoing-args%s " - "for correctness", prefix, suffix); - target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS; - } - - /* For sane SSE instruction set generation we need fcomi instruction. - It is safe to enable all CMOVE instructions. */ - if (TARGET_SSE) - TARGET_CMOVE = 1; - - /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */ - { - char *p; - ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0); - p = strchr (internal_label_prefix, 'X'); - internal_label_prefix_len = p - internal_label_prefix; - *p = '\0'; - } - - /* When scheduling description is not available, disable scheduler pass - so it won't slow down the compilation and make x87 code slower. */ - if (!TARGET_SCHEDULE) - flag_schedule_insns_after_reload = flag_schedule_insns = 0; - - if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES)) - set_param_value ("simultaneous-prefetches", - ix86_cost->simultaneous_prefetches); - if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE)) - set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block); - if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE)) - set_param_value ("l1-cache-size", ix86_cost->l1_cache_size); - if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE)) - set_param_value ("l2-cache-size", ix86_cost->l2_cache_size); - - /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0) - can be optimized to ap = __builtin_next_arg (0). */ - if (!TARGET_64BIT) - targetm.expand_builtin_va_start = NULL; - - if (TARGET_64BIT) - { - ix86_gen_leave = gen_leave_rex64; - ix86_gen_pop1 = gen_popdi1; - ix86_gen_add3 = gen_adddi3; - ix86_gen_sub3 = gen_subdi3; - ix86_gen_sub3_carry = gen_subdi3_carry_rex64; - ix86_gen_one_cmpl2 = gen_one_cmpldi2; - ix86_gen_monitor = gen_sse3_monitor64; - ix86_gen_andsp = gen_anddi3; - } - else - { - ix86_gen_leave = gen_leave; - ix86_gen_pop1 = gen_popsi1; - ix86_gen_add3 = gen_addsi3; - ix86_gen_sub3 = gen_subsi3; - ix86_gen_sub3_carry = gen_subsi3_carry; - ix86_gen_one_cmpl2 = gen_one_cmplsi2; - ix86_gen_monitor = gen_sse3_monitor; - ix86_gen_andsp = gen_andsi3; - } - -#ifdef USE_IX86_CLD - /* Use -mcld by default for 32-bit code if configured with --enable-cld. */ - if (!TARGET_64BIT) - target_flags |= MASK_CLD & ~target_flags_explicit; -#endif - - /* Save the initial options in case the user does function specific options */ - if (main_args_p) - target_option_default_node = target_option_current_node - = build_target_option_node (); - - /* Change -fstack-check to mean -mstack-arg-probe on x86. */ - if (flag_stack_check) - { - flag_stack_check = 0; - target_flags |= MASK_STACK_PROBE; - } - - /* Handle stack protector */ - if (ix86_stack_protector_guard_string != 0) - { - if (!strcmp (ix86_stack_protector_guard_string, "tls")) - ix86_stack_protector_guard = SSP_TLS; - else if (!strcmp (ix86_stack_protector_guard_string, "global")) - ix86_stack_protector_guard = SSP_GLOBAL; - else - error ("bad value (%s) for %sstack-protector-guard=%s %s", - ix86_stack_protector_guard, prefix, suffix, sw); - } - else - { - ix86_stack_protector_guard = TARGET_HAS_BIONIC? SSP_GLOBAL : SSP_TLS; - } -} - -/* Update register usage after having seen the compiler flags. */ - -void -ix86_conditional_register_usage (void) -{ - int i; - unsigned int j; - - for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) - { - if (fixed_regs[i] > 1) - fixed_regs[i] = (fixed_regs[i] == (TARGET_64BIT ? 3 : 2)); - if (call_used_regs[i] > 1) - call_used_regs[i] = (call_used_regs[i] == (TARGET_64BIT ? 3 : 2)); - } - - /* The PIC register, if it exists, is fixed. */ - j = PIC_OFFSET_TABLE_REGNUM; - if (j != INVALID_REGNUM) - fixed_regs[j] = call_used_regs[j] = 1; - - /* The MS_ABI changes the set of call-used registers. */ - if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI) - { - call_used_regs[SI_REG] = 0; - call_used_regs[DI_REG] = 0; - call_used_regs[XMM6_REG] = 0; - call_used_regs[XMM7_REG] = 0; - for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++) - call_used_regs[i] = 0; - } - - /* The default setting of CLOBBERED_REGS is for 32-bit; add in the - other call-clobbered regs for 64-bit. */ - if (TARGET_64BIT) - { - CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]); - - for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) - if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i) - && call_used_regs[i]) - SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i); - } - - /* If MMX is disabled, squash the registers. */ - if (! TARGET_MMX) - for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) - if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i)) - fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = ""; - - /* If SSE is disabled, squash the registers. */ - if (! TARGET_SSE) - for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) - if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i)) - fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = ""; - - /* If the FPU is disabled, squash the registers. */ - if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387)) - for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) - if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i)) - fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = ""; - - /* If 32-bit, squash the 64-bit registers. */ - if (! TARGET_64BIT) - { - for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++) - reg_names[i] = ""; - for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++) - reg_names[i] = ""; - } -} - - -/* Save the current options */ - -static void -ix86_function_specific_save (struct cl_target_option *ptr) -{ - gcc_assert (IN_RANGE (ix86_arch, 0, 255)); - gcc_assert (IN_RANGE (ix86_schedule, 0, 255)); - gcc_assert (IN_RANGE (ix86_tune, 0, 255)); - gcc_assert (IN_RANGE (ix86_fpmath, 0, 255)); - gcc_assert (IN_RANGE (ix86_branch_cost, 0, 255)); - - ptr->arch = ix86_arch; - ptr->schedule = ix86_schedule; - ptr->tune = ix86_tune; - ptr->fpmath = ix86_fpmath; - ptr->branch_cost = ix86_branch_cost; - ptr->tune_defaulted = ix86_tune_defaulted; - ptr->arch_specified = ix86_arch_specified; - ptr->ix86_isa_flags_explicit = ix86_isa_flags_explicit; - ptr->target_flags_explicit = target_flags_explicit; -} - -/* Restore the current options */ - -static void -ix86_function_specific_restore (struct cl_target_option *ptr) -{ - enum processor_type old_tune = ix86_tune; - enum processor_type old_arch = ix86_arch; - unsigned int ix86_arch_mask, ix86_tune_mask; - int i; - - ix86_arch = ptr->arch; - ix86_schedule = ptr->schedule; - ix86_tune = ptr->tune; - ix86_fpmath = ptr->fpmath; - ix86_branch_cost = ptr->branch_cost; - ix86_tune_defaulted = ptr->tune_defaulted; - ix86_arch_specified = ptr->arch_specified; - ix86_isa_flags_explicit = ptr->ix86_isa_flags_explicit; - target_flags_explicit = ptr->target_flags_explicit; - - /* Recreate the arch feature tests if the arch changed */ - if (old_arch != ix86_arch) - { - ix86_arch_mask = 1u << ix86_arch; - for (i = 0; i < X86_ARCH_LAST; ++i) - ix86_arch_features[i] - = !!(initial_ix86_arch_features[i] & ix86_arch_mask); - } - - /* Recreate the tune optimization tests */ - if (old_tune != ix86_tune) - { - ix86_tune_mask = 1u << ix86_tune; - for (i = 0; i < X86_TUNE_LAST; ++i) - ix86_tune_features[i] - = !!(initial_ix86_tune_features[i] & ix86_tune_mask); - } -} - -/* Print the current options */ - -static void -ix86_function_specific_print (FILE *file, int indent, - struct cl_target_option *ptr) -{ - char *target_string - = ix86_target_string (ptr->ix86_isa_flags, ptr->target_flags, - NULL, NULL, NULL, false); - - fprintf (file, "%*sarch = %d (%s)\n", - indent, "", - ptr->arch, - ((ptr->arch < TARGET_CPU_DEFAULT_max) - ? cpu_names[ptr->arch] - : "<unknown>")); - - fprintf (file, "%*stune = %d (%s)\n", - indent, "", - ptr->tune, - ((ptr->tune < TARGET_CPU_DEFAULT_max) - ? cpu_names[ptr->tune] - : "<unknown>")); - - fprintf (file, "%*sfpmath = %d%s%s\n", indent, "", ptr->fpmath, - (ptr->fpmath & FPMATH_387) ? ", 387" : "", - (ptr->fpmath & FPMATH_SSE) ? ", sse" : ""); - fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost); - - if (target_string) - { - fprintf (file, "%*s%s\n", indent, "", target_string); - free (target_string); - } -} - - -/* Inner function to process the attribute((target(...))), take an argument and - set the current options from the argument. If we have a list, recursively go - over the list. */ - -static bool -ix86_valid_target_attribute_inner_p (tree args, char *p_strings[]) -{ - char *next_optstr; - bool ret = true; - -#define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 } -#define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 } -#define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M } -#define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M } - - enum ix86_opt_type - { - ix86_opt_unknown, - ix86_opt_yes, - ix86_opt_no, - ix86_opt_str, - ix86_opt_isa - }; - - static const struct - { - const char *string; - size_t len; - enum ix86_opt_type type; - int opt; - int mask; - } attrs[] = { - /* isa options */ - IX86_ATTR_ISA ("3dnow", OPT_m3dnow), - IX86_ATTR_ISA ("abm", OPT_mabm), - IX86_ATTR_ISA ("aes", OPT_maes), - IX86_ATTR_ISA ("avx", OPT_mavx), - IX86_ATTR_ISA ("mmx", OPT_mmmx), - IX86_ATTR_ISA ("pclmul", OPT_mpclmul), - IX86_ATTR_ISA ("popcnt", OPT_mpopcnt), - IX86_ATTR_ISA ("sse", OPT_msse), - IX86_ATTR_ISA ("sse2", OPT_msse2), - IX86_ATTR_ISA ("sse3", OPT_msse3), - IX86_ATTR_ISA ("sse4", OPT_msse4), - IX86_ATTR_ISA ("sse4.1", OPT_msse4_1), - IX86_ATTR_ISA ("sse4.2", OPT_msse4_2), - IX86_ATTR_ISA ("sse4a", OPT_msse4a), - IX86_ATTR_ISA ("sse5", OPT_msse5), - IX86_ATTR_ISA ("ssse3", OPT_mssse3), - IX86_ATTR_ISA ("lwp", OPT_mlwp), - - /* string options */ - IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH), - IX86_ATTR_STR ("fpmath=", IX86_FUNCTION_SPECIFIC_FPMATH), - IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE), - - /* flag options */ - IX86_ATTR_YES ("cld", - OPT_mcld, - MASK_CLD), - - IX86_ATTR_NO ("fancy-math-387", - OPT_mfancy_math_387, - MASK_NO_FANCY_MATH_387), - - IX86_ATTR_NO ("fused-madd", - OPT_mfused_madd, - MASK_NO_FUSED_MADD), - - IX86_ATTR_YES ("ieee-fp", - OPT_mieee_fp, - MASK_IEEE_FP), - - IX86_ATTR_YES ("inline-compares", - OPT_minline_compares, - MASK_INLINE_COMPARES), - - IX86_ATTR_YES ("inline-all-stringops", - OPT_minline_all_stringops, - MASK_INLINE_ALL_STRINGOPS), - - IX86_ATTR_YES ("inline-stringops-dynamically", - OPT_minline_stringops_dynamically, - MASK_INLINE_STRINGOPS_DYNAMICALLY), - - IX86_ATTR_NO ("align-stringops", - OPT_mno_align_stringops, - MASK_NO_ALIGN_STRINGOPS), - - IX86_ATTR_YES ("recip", - OPT_mrecip, - MASK_RECIP), - - }; - - /* If this is a list, recurse to get the options. */ - if (TREE_CODE (args) == TREE_LIST) - { - bool ret = true; - - for (; args; args = TREE_CHAIN (args)) - if (TREE_VALUE (args) - && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args), p_strings)) - ret = false; - - return ret; - } - - else if (TREE_CODE (args) != STRING_CST) - gcc_unreachable (); - - /* Handle multiple arguments separated by commas. */ - next_optstr = ASTRDUP (TREE_STRING_POINTER (args)); - - while (next_optstr && *next_optstr != '\0') - { - char *p = next_optstr; - char *orig_p = p; - char *comma = strchr (next_optstr, ','); - const char *opt_string; - size_t len, opt_len; - int opt; - bool opt_set_p; - char ch; - unsigned i; - enum ix86_opt_type type = ix86_opt_unknown; - int mask = 0; - - if (comma) - { - *comma = '\0'; - len = comma - next_optstr; - next_optstr = comma + 1; - } - else - { - len = strlen (p); - next_optstr = NULL; - } - - /* Recognize no-xxx. */ - if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-') - { - opt_set_p = false; - p += 3; - len -= 3; - } - else - opt_set_p = true; - - /* Find the option. */ - ch = *p; - opt = N_OPTS; - for (i = 0; i < ARRAY_SIZE (attrs); i++) - { - type = attrs[i].type; - opt_len = attrs[i].len; - if (ch == attrs[i].string[0] - && ((type != ix86_opt_str) ? len == opt_len : len > opt_len) - && memcmp (p, attrs[i].string, opt_len) == 0) - { - opt = attrs[i].opt; - mask = attrs[i].mask; - opt_string = attrs[i].string; - break; - } - } - - /* Process the option. */ - if (opt == N_OPTS) - { - error ("attribute(target(\"%s\")) is unknown", orig_p); - ret = false; - } - - else if (type == ix86_opt_isa) - ix86_handle_option (opt, p, opt_set_p); - - else if (type == ix86_opt_yes || type == ix86_opt_no) - { - if (type == ix86_opt_no) - opt_set_p = !opt_set_p; - - if (opt_set_p) - target_flags |= mask; - else - target_flags &= ~mask; - } - - else if (type == ix86_opt_str) - { - if (p_strings[opt]) - { - error ("option(\"%s\") was already specified", opt_string); - ret = false; - } - else - p_strings[opt] = xstrdup (p + opt_len); - } - - else - gcc_unreachable (); - } - - return ret; -} - -/* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */ - -tree -ix86_valid_target_attribute_tree (tree args) -{ - const char *orig_arch_string = ix86_arch_string; - const char *orig_tune_string = ix86_tune_string; - const char *orig_fpmath_string = ix86_fpmath_string; - int orig_tune_defaulted = ix86_tune_defaulted; - int orig_arch_specified = ix86_arch_specified; - char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL, NULL }; - tree t = NULL_TREE; - int i; - struct cl_target_option *def - = TREE_TARGET_OPTION (target_option_default_node); - - /* Process each of the options on the chain. */ - if (! ix86_valid_target_attribute_inner_p (args, option_strings)) - return NULL_TREE; - - /* If the changed options are different from the default, rerun override_options, - and then save the options away. The string options are are attribute options, - and will be undone when we copy the save structure. */ - if (ix86_isa_flags != def->ix86_isa_flags - || target_flags != def->target_flags - || option_strings[IX86_FUNCTION_SPECIFIC_ARCH] - || option_strings[IX86_FUNCTION_SPECIFIC_TUNE] - || option_strings[IX86_FUNCTION_SPECIFIC_FPMATH]) - { - /* If we are using the default tune= or arch=, undo the string assigned, - and use the default. */ - if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH]) - ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH]; - else if (!orig_arch_specified) - ix86_arch_string = NULL; - - if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE]) - ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE]; - else if (orig_tune_defaulted) - ix86_tune_string = NULL; - - /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */ - if (option_strings[IX86_FUNCTION_SPECIFIC_FPMATH]) - ix86_fpmath_string = option_strings[IX86_FUNCTION_SPECIFIC_FPMATH]; - else if (!TARGET_64BIT && TARGET_SSE) - ix86_fpmath_string = "sse,387"; - - /* Do any overrides, such as arch=xxx, or tune=xxx support. */ - override_options (false); - - /* Add any builtin functions with the new isa if any. */ - ix86_add_new_builtins (ix86_isa_flags); - - /* Save the current options unless we are validating options for - #pragma. */ - t = build_target_option_node (); - - ix86_arch_string = orig_arch_string; - ix86_tune_string = orig_tune_string; - ix86_fpmath_string = orig_fpmath_string; - - /* Free up memory allocated to hold the strings */ - for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++) - if (option_strings[i]) - free (option_strings[i]); - } - - return t; -} - -/* Hook to validate attribute((target("string"))). */ - -static bool -ix86_valid_target_attribute_p (tree fndecl, - tree ARG_UNUSED (name), - tree args, - int ARG_UNUSED (flags)) -{ - struct cl_target_option cur_target; - bool ret = true; - tree old_optimize = build_optimization_node (); - tree new_target, new_optimize; - tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl); - - /* If the function changed the optimization levels as well as setting target - options, start with the optimizations specified. */ - if (func_optimize && func_optimize != old_optimize) - cl_optimization_restore (TREE_OPTIMIZATION (func_optimize)); - - /* The target attributes may also change some optimization flags, so update - the optimization options if necessary. */ - cl_target_option_save (&cur_target); - new_target = ix86_valid_target_attribute_tree (args); - new_optimize = build_optimization_node (); - - if (!new_target) - ret = false; - - else if (fndecl) - { - DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target; - - if (old_optimize != new_optimize) - DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize; - } - - cl_target_option_restore (&cur_target); - - if (old_optimize != new_optimize) - cl_optimization_restore (TREE_OPTIMIZATION (old_optimize)); - - return ret; -} - - -/* Hook to determine if one function can safely inline another. */ - -static bool -ix86_can_inline_p (tree caller, tree callee) -{ - bool ret = false; - tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller); - tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee); - - /* If callee has no option attributes, then it is ok to inline. */ - if (!callee_tree) - ret = true; - - /* If caller has no option attributes, but callee does then it is not ok to - inline. */ - else if (!caller_tree) - ret = false; - - else - { - struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree); - struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree); - - /* Callee's isa options should a subset of the caller's, i.e. a SSE5 function - can inline a SSE2 function but a SSE2 function can't inline a SSE5 - function. */ - if ((caller_opts->ix86_isa_flags & callee_opts->ix86_isa_flags) - != callee_opts->ix86_isa_flags) - ret = false; - - /* See if we have the same non-isa options. */ - else if (caller_opts->target_flags != callee_opts->target_flags) - ret = false; - - /* See if arch, tune, etc. are the same. */ - else if (caller_opts->arch != callee_opts->arch) - ret = false; - - else if (caller_opts->tune != callee_opts->tune) - ret = false; - - else if (caller_opts->fpmath != callee_opts->fpmath) - ret = false; - - else if (caller_opts->branch_cost != callee_opts->branch_cost) - ret = false; - - else - ret = true; - } - - return ret; -} - - -/* Remember the last target of ix86_set_current_function. */ -static GTY(()) tree ix86_previous_fndecl; - -/* Establish appropriate back-end context for processing the function - FNDECL. The argument might be NULL to indicate processing at top - level, outside of any function scope. */ -static void -ix86_set_current_function (tree fndecl) -{ - /* Only change the context if the function changes. This hook is called - several times in the course of compiling a function, and we don't want to - slow things down too much or call target_reinit when it isn't safe. */ - if (fndecl && fndecl != ix86_previous_fndecl) - { - tree old_tree = (ix86_previous_fndecl - ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl) - : NULL_TREE); - - tree new_tree = (fndecl - ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl) - : NULL_TREE); - - ix86_previous_fndecl = fndecl; - if (old_tree == new_tree) - ; - - else if (new_tree) - { - cl_target_option_restore (TREE_TARGET_OPTION (new_tree)); - target_reinit (); - } - - else if (old_tree) - { - struct cl_target_option *def - = TREE_TARGET_OPTION (target_option_current_node); - - cl_target_option_restore (def); - target_reinit (); - } - } -} - - -/* Return true if this goes in large data/bss. */ - -static bool -ix86_in_large_data_p (tree exp) -{ - if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC) - return false; - - /* Functions are never large data. */ - if (TREE_CODE (exp) == FUNCTION_DECL) - return false; - - if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp)) - { - const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp)); - if (strcmp (section, ".ldata") == 0 - || strcmp (section, ".lbss") == 0) - return true; - return false; - } - else - { - HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp)); - - /* If this is an incomplete type with size 0, then we can't put it - in data because it might be too big when completed. */ - if (!size || size > ix86_section_threshold) - return true; - } - - return false; -} - -/* Switch to the appropriate section for output of DECL. - DECL is either a `VAR_DECL' node or a constant of some sort. - RELOC indicates whether forming the initial value of DECL requires - link-time relocations. */ - -static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT) - ATTRIBUTE_UNUSED; - -static section * -x86_64_elf_select_section (tree decl, int reloc, - unsigned HOST_WIDE_INT align) -{ - if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC) - && ix86_in_large_data_p (decl)) - { - const char *sname = NULL; - unsigned int flags = SECTION_WRITE; - switch (categorize_decl_for_section (decl, reloc)) - { - case SECCAT_DATA: - sname = ".ldata"; - break; - case SECCAT_DATA_REL: - sname = ".ldata.rel"; - break; - case SECCAT_DATA_REL_LOCAL: - sname = ".ldata.rel.local"; - break; - case SECCAT_DATA_REL_RO: - sname = ".ldata.rel.ro"; - break; - case SECCAT_DATA_REL_RO_LOCAL: - sname = ".ldata.rel.ro.local"; - break; - case SECCAT_BSS: - sname = ".lbss"; - flags |= SECTION_BSS; - break; - case SECCAT_RODATA: - case SECCAT_RODATA_MERGE_STR: - case SECCAT_RODATA_MERGE_STR_INIT: - case SECCAT_RODATA_MERGE_CONST: - sname = ".lrodata"; - flags = 0; - break; - case SECCAT_SRODATA: - case SECCAT_SDATA: - case SECCAT_SBSS: - gcc_unreachable (); - case SECCAT_TEXT: - case SECCAT_TDATA: - case SECCAT_TBSS: - /* We don't split these for medium model. Place them into - default sections and hope for best. */ - break; - case SECCAT_EMUTLS_VAR: - case SECCAT_EMUTLS_TMPL: - gcc_unreachable (); - } - if (sname) - { - /* We might get called with string constants, but get_named_section - doesn't like them as they are not DECLs. Also, we need to set - flags in that case. */ - if (!DECL_P (decl)) - return get_section (sname, flags, NULL); - return get_named_section (decl, sname, reloc); - } - } - return default_elf_select_section (decl, reloc, align); -} - -/* Build up a unique section name, expressed as a - STRING_CST node, and assign it to DECL_SECTION_NAME (decl). - RELOC indicates whether the initial value of EXP requires - link-time relocations. */ - -static void ATTRIBUTE_UNUSED -x86_64_elf_unique_section (tree decl, int reloc) -{ - if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC) - && ix86_in_large_data_p (decl)) - { - const char *prefix = NULL; - /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */ - bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP; - - switch (categorize_decl_for_section (decl, reloc)) - { - case SECCAT_DATA: - case SECCAT_DATA_REL: - case SECCAT_DATA_REL_LOCAL: - case SECCAT_DATA_REL_RO: - case SECCAT_DATA_REL_RO_LOCAL: - prefix = one_only ? ".ld" : ".ldata"; - break; - case SECCAT_BSS: - prefix = one_only ? ".lb" : ".lbss"; - break; - case SECCAT_RODATA: - case SECCAT_RODATA_MERGE_STR: - case SECCAT_RODATA_MERGE_STR_INIT: - case SECCAT_RODATA_MERGE_CONST: - prefix = one_only ? ".lr" : ".lrodata"; - break; - case SECCAT_SRODATA: - case SECCAT_SDATA: - case SECCAT_SBSS: - gcc_unreachable (); - case SECCAT_TEXT: - case SECCAT_TDATA: - case SECCAT_TBSS: - /* We don't split these for medium model. Place them into - default sections and hope for best. */ - break; - case SECCAT_EMUTLS_VAR: - prefix = targetm.emutls.var_section; - break; - case SECCAT_EMUTLS_TMPL: - prefix = targetm.emutls.tmpl_section; - break; - } - if (prefix) - { - const char *name, *linkonce; - char *string; - - name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)); - name = targetm.strip_name_encoding (name); - - /* If we're using one_only, then there needs to be a .gnu.linkonce - prefix to the section name. */ - linkonce = one_only ? ".gnu.linkonce" : ""; - - string = ACONCAT ((linkonce, prefix, ".", name, NULL)); - - DECL_SECTION_NAME (decl) = build_string (strlen (string), string); - return; - } - } - default_unique_section (decl, reloc); -} - -#ifdef COMMON_ASM_OP -/* This says how to output assembler code to declare an - uninitialized external linkage data object. - - For medium model x86-64 we need to use .largecomm opcode for - large objects. */ -void -x86_elf_aligned_common (FILE *file, - const char *name, unsigned HOST_WIDE_INT size, - int align) -{ - if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC) - && size > (unsigned int)ix86_section_threshold) - fprintf (file, ".largecomm\t"); - else - fprintf (file, "%s", COMMON_ASM_OP); - assemble_name (file, name); - fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n", - size, align / BITS_PER_UNIT); -} -#endif - -/* Utility function for targets to use in implementing - ASM_OUTPUT_ALIGNED_BSS. */ - -void -x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED, - const char *name, unsigned HOST_WIDE_INT size, - int align) -{ - if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC) - && size > (unsigned int)ix86_section_threshold) - switch_to_section (get_named_section (decl, ".lbss", 0)); - else - switch_to_section (bss_section); - ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT)); -#ifdef ASM_DECLARE_OBJECT_NAME - last_assemble_variable_decl = decl; - ASM_DECLARE_OBJECT_NAME (file, name, decl); -#else - /* Standard thing is just output label for the object. */ - ASM_OUTPUT_LABEL (file, name); -#endif /* ASM_DECLARE_OBJECT_NAME */ - ASM_OUTPUT_SKIP (file, size ? size : 1); -} - -void -optimization_options (int level, int size ATTRIBUTE_UNUSED) -{ - /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to - make the problem with not enough registers even worse. */ -#ifdef INSN_SCHEDULING - if (level > 1) - flag_schedule_insns = 0; -#endif - - /* For -O2 and beyond, turn on -fzee for x86_64 target. */ - if (level > 1 && TARGET_64BIT) - flag_zee = 1; - - if (TARGET_MACHO) - /* The Darwin libraries never set errno, so we might as well - avoid calling them when that's the only reason we would. */ - flag_errno_math = 0; - - /* The default values of these switches depend on the TARGET_64BIT - that is not known at this moment. Mark these values with 2 and - let user the to override these. In case there is no command line option - specifying them, we will set the defaults in override_options. */ - if (optimize >= 1) - flag_omit_frame_pointer = 2; - flag_pcc_struct_return = 2; - flag_asynchronous_unwind_tables = 2; - flag_vect_cost_model = 1; -#ifdef SUBTARGET_OPTIMIZATION_OPTIONS - SUBTARGET_OPTIMIZATION_OPTIONS; -#endif -} - -/* Decide whether we can make a sibling call to a function. DECL is the - declaration of the function being targeted by the call and EXP is the - CALL_EXPR representing the call. */ - -static bool -ix86_function_ok_for_sibcall (tree decl, tree exp) -{ - tree type, decl_or_type; - rtx a, b; - - /* If we are generating position-independent code, we cannot sibcall - optimize any indirect call, or a direct call to a global function, - as the PLT requires %ebx be live. */ - if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl))) - return false; - - /* If we need to align the outgoing stack, then sibcalling would - unalign the stack, which may break the called function. */ - if (ix86_minimum_incoming_stack_boundary (true) - < PREFERRED_STACK_BOUNDARY) - return false; - - if (decl) - { - decl_or_type = decl; - type = TREE_TYPE (decl); - } - else - { - /* We're looking at the CALL_EXPR, we need the type of the function. */ - type = CALL_EXPR_FN (exp); /* pointer expression */ - type = TREE_TYPE (type); /* pointer type */ - type = TREE_TYPE (type); /* function type */ - decl_or_type = type; - } - - /* Check that the return value locations are the same. Like - if we are returning floats on the 80387 register stack, we cannot - make a sibcall from a function that doesn't return a float to a - function that does or, conversely, from a function that does return - a float to a function that doesn't; the necessary stack adjustment - would not be executed. This is also the place we notice - differences in the return value ABI. Note that it is ok for one - of the functions to have void return type as long as the return - value of the other is passed in a register. */ - a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false); - b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)), - cfun->decl, false); - if (STACK_REG_P (a) || STACK_REG_P (b)) - { - if (!rtx_equal_p (a, b)) - return false; - } - else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl)))) - ; - else if (!rtx_equal_p (a, b)) - return false; - - if (TARGET_64BIT) - { - /* The SYSV ABI has more call-clobbered registers; - disallow sibcalls from MS to SYSV. */ - if (cfun->machine->call_abi == MS_ABI - && ix86_function_type_abi (type) == SYSV_ABI) - return false; - } - else - { - /* If this call is indirect, we'll need to be able to use a - call-clobbered register for the address of the target function. - Make sure that all such registers are not used for passing - parameters. Note that DLLIMPORT functions are indirect. */ - if (!decl - || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl))) - { - if (ix86_function_regparm (type, NULL) >= 3) - { - /* ??? Need to count the actual number of registers to be used, - not the possible number of registers. Fix later. */ - return false; - } - } - } - - /* Otherwise okay. That also includes certain types of indirect calls. */ - return true; -} - -/* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm" - calling convention attributes; - arguments as in struct attribute_spec.handler. */ - -static tree -ix86_handle_cconv_attribute (tree *node, tree name, - tree args, - int flags ATTRIBUTE_UNUSED, - bool *no_add_attrs) -{ - if (TREE_CODE (*node) != FUNCTION_TYPE - && TREE_CODE (*node) != METHOD_TYPE - && TREE_CODE (*node) != FIELD_DECL - && TREE_CODE (*node) != TYPE_DECL) - { - warning (OPT_Wattributes, "%qs attribute only applies to functions", - IDENTIFIER_POINTER (name)); - *no_add_attrs = true; - return NULL_TREE; - } - - /* Can combine regparm with all attributes but fastcall. */ - if (is_attribute_p ("regparm", name)) - { - tree cst; - - if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node))) - { - error ("fastcall and regparm attributes are not compatible"); - } - - cst = TREE_VALUE (args); - if (TREE_CODE (cst) != INTEGER_CST) - { - warning (OPT_Wattributes, - "%qs attribute requires an integer constant argument", - IDENTIFIER_POINTER (name)); - *no_add_attrs = true; - } - else if (compare_tree_int (cst, REGPARM_MAX) > 0) - { - warning (OPT_Wattributes, "argument to %qs attribute larger than %d", - IDENTIFIER_POINTER (name), REGPARM_MAX); - *no_add_attrs = true; - } - - return NULL_TREE; - } - - if (TARGET_64BIT) - { - /* Do not warn when emulating the MS ABI. */ - if (TREE_CODE (*node) != FUNCTION_TYPE || ix86_function_type_abi (*node)!=MS_ABI) - warning (OPT_Wattributes, "%qs attribute ignored", - IDENTIFIER_POINTER (name)); - *no_add_attrs = true; - return NULL_TREE; - } - - /* Can combine fastcall with stdcall (redundant) and sseregparm. */ - if (is_attribute_p ("fastcall", name)) - { - if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node))) - { - error ("fastcall and cdecl attributes are not compatible"); - } - if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node))) - { - error ("fastcall and stdcall attributes are not compatible"); - } - if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node))) - { - error ("fastcall and regparm attributes are not compatible"); - } - } - - /* Can combine stdcall with fastcall (redundant), regparm and - sseregparm. */ - else if (is_attribute_p ("stdcall", name)) - { - if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node))) - { - error ("stdcall and cdecl attributes are not compatible"); - } - if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node))) - { - error ("stdcall and fastcall attributes are not compatible"); - } - } - - /* Can combine cdecl with regparm and sseregparm. */ - else if (is_attribute_p ("cdecl", name)) - { - if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node))) - { - error ("stdcall and cdecl attributes are not compatible"); - } - if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node))) - { - error ("fastcall and cdecl attributes are not compatible"); - } - } - - /* Can combine sseregparm with all attributes. */ - - return NULL_TREE; -} - -/* Return 0 if the attributes for two types are incompatible, 1 if they - are compatible, and 2 if they are nearly compatible (which causes a - warning to be generated). */ - -static int -ix86_comp_type_attributes (const_tree type1, const_tree type2) -{ - /* Check for mismatch of non-default calling convention. */ - const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall"; - - if (TREE_CODE (type1) != FUNCTION_TYPE - && TREE_CODE (type1) != METHOD_TYPE) - return 1; - - /* Check for mismatched fastcall/regparm types. */ - if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1)) - != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2))) - || (ix86_function_regparm (type1, NULL) - != ix86_function_regparm (type2, NULL))) - return 0; - - /* Check for mismatched sseregparm types. */ - if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1)) - != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2))) - return 0; - - /* Check for mismatched return types (cdecl vs stdcall). */ - if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1)) - != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2))) - return 0; - - return 1; -} - -/* Return the regparm value for a function with the indicated TYPE and DECL. - DECL may be NULL when calling function indirectly - or considering a libcall. */ - -static int -ix86_function_regparm (const_tree type, const_tree decl) -{ - tree attr; - int regparm; - - static bool error_issued; - - if (TARGET_64BIT) - return (ix86_function_type_abi (type) == SYSV_ABI - ? X86_64_REGPARM_MAX : X64_REGPARM_MAX); - - regparm = ix86_regparm; - attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type)); - if (attr) - { - regparm - = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))); - - if (decl && TREE_CODE (decl) == FUNCTION_DECL) - { - /* We can't use regparm(3) for nested functions because - these pass static chain pointer in %ecx register. */ - if (!error_issued && regparm == 3 - && decl_function_context (decl) - && !DECL_NO_STATIC_CHAIN (decl)) - { - error ("nested functions are limited to 2 register parameters"); - error_issued = true; - return 0; - } - } - - return regparm; - } - - if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type))) - return 2; - - /* Use register calling convention for local functions when possible. */ - if (decl - && TREE_CODE (decl) == FUNCTION_DECL - && optimize - && !profile_flag) - { - /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */ - struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl)); - if (i && i->local) - { - int local_regparm, globals = 0, regno; - struct function *f; - - /* Make sure no regparm register is taken by a - fixed register variable. */ - for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++) - if (fixed_regs[local_regparm]) - break; - - /* We can't use regparm(3) for nested functions as these use - static chain pointer in third argument. */ - if (local_regparm == 3 - && decl_function_context (decl) - && !DECL_NO_STATIC_CHAIN (decl)) - local_regparm = 2; - - /* If the function realigns its stackpointer, the prologue will - clobber %ecx. If we've already generated code for the callee, - the callee DECL_STRUCT_FUNCTION is gone, so we fall back to - scanning the attributes for the self-realigning property. */ - f = DECL_STRUCT_FUNCTION (decl); - /* Since current internal arg pointer won't conflict with - parameter passing regs, so no need to change stack - realignment and adjust regparm number. - - Each fixed register usage increases register pressure, - so less registers should be used for argument passing. - This functionality can be overriden by an explicit - regparm value. */ - for (regno = 0; regno <= DI_REG; regno++) - if (fixed_regs[regno]) - globals++; - - local_regparm - = globals < local_regparm ? local_regparm - globals : 0; - - if (local_regparm > regparm) - regparm = local_regparm; - } - } - - return regparm; -} - -/* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and - DFmode (2) arguments in SSE registers for a function with the - indicated TYPE and DECL. DECL may be NULL when calling function - indirectly or considering a libcall. Otherwise return 0. */ - -static int -ix86_function_sseregparm (const_tree type, const_tree decl, bool warn) -{ - gcc_assert (!TARGET_64BIT); - - /* Use SSE registers to pass SFmode and DFmode arguments if requested - by the sseregparm attribute. */ - if (TARGET_SSEREGPARM - || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type)))) - { - if (!TARGET_SSE) - { - if (warn) - { - if (decl) - error ("Calling %qD with attribute sseregparm without " - "SSE/SSE2 enabled", decl); - else - error ("Calling %qT with attribute sseregparm without " - "SSE/SSE2 enabled", type); - } - return 0; - } - - return 2; - } - - /* For local functions, pass up to SSE_REGPARM_MAX SFmode - (and DFmode for SSE2) arguments in SSE registers. */ - if (decl && TARGET_SSE_MATH && optimize && !profile_flag) - { - /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */ - struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl)); - if (i && i->local) - return TARGET_SSE2 ? 2 : 1; - } - - return 0; -} - -/* Return true if EAX is live at the start of the function. Used by - ix86_expand_prologue to determine if we need special help before - calling allocate_stack_worker. */ - -static bool -ix86_eax_live_at_start_p (void) -{ - /* Cheat. Don't bother working forward from ix86_function_regparm - to the function type to whether an actual argument is located in - eax. Instead just look at cfg info, which is still close enough - to correct at this point. This gives false positives for broken - functions that might use uninitialized data that happens to be - allocated in eax, but who cares? */ - return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0); -} - -/* Value is the number of bytes of arguments automatically - popped when returning from a subroutine call. - FUNDECL is the declaration node of the function (as a tree), - FUNTYPE is the data type of the function (as a tree), - or for a library call it is an identifier node for the subroutine name. - SIZE is the number of bytes of arguments passed on the stack. - - On the 80386, the RTD insn may be used to pop them if the number - of args is fixed, but if the number is variable then the caller - must pop them all. RTD can't be used for library calls now - because the library is compiled with the Unix compiler. - Use of RTD is a selectable option, since it is incompatible with - standard Unix calling sequences. If the option is not selected, - the caller must always pop the args. - - The attribute stdcall is equivalent to RTD on a per module basis. */ - -int -ix86_return_pops_args (tree fundecl, tree funtype, int size) -{ - int rtd; - - /* None of the 64-bit ABIs pop arguments. */ - if (TARGET_64BIT) - return 0; - - rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE); - - /* Cdecl functions override -mrtd, and never pop the stack. */ - if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) - { - /* Stdcall and fastcall functions will pop the stack if not - variable args. */ - if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)) - || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype))) - rtd = 1; - - if (rtd && ! stdarg_p (funtype)) - return size; - } - - /* Lose any fake structure return argument if it is passed on the stack. */ - if (aggregate_value_p (TREE_TYPE (funtype), fundecl) - && !KEEP_AGGREGATE_RETURN_POINTER) - { - int nregs = ix86_function_regparm (funtype, fundecl); - if (nregs == 0) - return GET_MODE_SIZE (Pmode); - } - - return 0; -} - -/* Argument support functions. */ - -/* Return true when register may be used to pass function parameters. */ -bool -ix86_function_arg_regno_p (int regno) -{ - int i; - const int *parm_regs; - - if (!TARGET_64BIT) - { - if (TARGET_MACHO) - return (regno < REGPARM_MAX - || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno])); - else - return (regno < REGPARM_MAX - || (TARGET_MMX && MMX_REGNO_P (regno) - && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX)) - || (TARGET_SSE && SSE_REGNO_P (regno) - && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))); - } - - if (TARGET_MACHO) - { - if (SSE_REGNO_P (regno) && TARGET_SSE) - return true; - } - else - { - if (TARGET_SSE && SSE_REGNO_P (regno) - && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)) - return true; - } - - /* TODO: The function should depend on current function ABI but - builtins.c would need updating then. Therefore we use the - default ABI. */ - - /* RAX is used as hidden argument to va_arg functions. */ - if (ix86_abi == SYSV_ABI && regno == AX_REG) - return true; - - if (ix86_abi == MS_ABI) - parm_regs = x86_64_ms_abi_int_parameter_registers; - else - parm_regs = x86_64_int_parameter_registers; - for (i = 0; i < (ix86_abi == MS_ABI ? X64_REGPARM_MAX - : X86_64_REGPARM_MAX); i++) - if (regno == parm_regs[i]) - return true; - return false; -} - -/* Return if we do not know how to pass TYPE solely in registers. */ - -static bool -ix86_must_pass_in_stack (enum machine_mode mode, const_tree type) -{ - if (must_pass_in_stack_var_size_or_pad (mode, type)) - return true; - - /* For 32-bit, we want TImode aggregates to go on the stack. But watch out! - The layout_type routine is crafty and tries to trick us into passing - currently unsupported vector types on the stack by using TImode. */ - return (!TARGET_64BIT && mode == TImode - && type && TREE_CODE (type) != VECTOR_TYPE); -} - -/* It returns the size, in bytes, of the area reserved for arguments passed - in registers for the function represented by fndecl dependent to the used - abi format. */ -int -ix86_reg_parm_stack_space (const_tree fndecl) -{ - enum calling_abi call_abi = SYSV_ABI; - if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL) - call_abi = ix86_function_abi (fndecl); - else - call_abi = ix86_function_type_abi (fndecl); - if (call_abi == MS_ABI) - return 32; - return 0; -} - -/* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the - call abi used. */ -enum calling_abi -ix86_function_type_abi (const_tree fntype) -{ - if (TARGET_64BIT && fntype != NULL) - { - enum calling_abi abi = ix86_abi; - if (abi == SYSV_ABI) - { - if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype))) - abi = MS_ABI; - } - else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype))) - abi = SYSV_ABI; - return abi; - } - return ix86_abi; -} - -static enum calling_abi -ix86_function_abi (const_tree fndecl) -{ - if (! fndecl) - return ix86_abi; - return ix86_function_type_abi (TREE_TYPE (fndecl)); -} - -/* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the - call abi used. */ -enum calling_abi -ix86_cfun_abi (void) -{ - if (! cfun || ! TARGET_64BIT) - return ix86_abi; - return cfun->machine->call_abi; -} - -/* regclass.c */ -extern void init_regs (void); - -/* Implementation of call abi switching target hook. Specific to FNDECL - the specific call register sets are set. See also CONDITIONAL_REGISTER_USAGE - for more details. */ -void -ix86_call_abi_override (const_tree fndecl) -{ - if (fndecl == NULL_TREE) - cfun->machine->call_abi = ix86_abi; - else - cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl)); -} - -/* MS and SYSV ABI have different set of call used registers. Avoid expensive - re-initialization of init_regs each time we switch function context since - this is needed only during RTL expansion. */ -static void -ix86_maybe_switch_abi (void) -{ - if (TARGET_64BIT && - call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI)) - reinit_regs (); -} - -/* Initialize a variable CUM of type CUMULATIVE_ARGS - for a call to a function whose data type is FNTYPE. - For a library call, FNTYPE is 0. */ - -void -init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */ - tree fntype, /* tree ptr for function decl */ - rtx libname, /* SYMBOL_REF of library name or 0 */ - tree fndecl) -{ - struct cgraph_local_info *i = fndecl ? cgraph_local_info (fndecl) : NULL; - memset (cum, 0, sizeof (*cum)); - - if (fndecl) - cum->call_abi = ix86_function_abi (fndecl); - else - cum->call_abi = ix86_function_type_abi (fntype); - /* Set up the number of registers to use for passing arguments. */ - - if (cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS) - sorry ("ms_abi attribute requires -maccumulate-outgoing-args " - "or subtarget optimization implying it"); - cum->nregs = ix86_regparm; - if (TARGET_64BIT) - { - if (cum->call_abi != ix86_abi) - cum->nregs = ix86_abi != SYSV_ABI ? X86_64_REGPARM_MAX - : X64_REGPARM_MAX; - } - if (TARGET_SSE) - { - cum->sse_nregs = SSE_REGPARM_MAX; - if (TARGET_64BIT) - { - if (cum->call_abi != ix86_abi) - cum->sse_nregs = ix86_abi != SYSV_ABI ? X86_64_SSE_REGPARM_MAX - : X64_SSE_REGPARM_MAX; - } - } - if (TARGET_MMX) - cum->mmx_nregs = MMX_REGPARM_MAX; - cum->warn_avx = true; - cum->warn_sse = true; - cum->warn_mmx = true; - - /* Because type might mismatch in between caller and callee, we need to - use actual type of function for local calls. - FIXME: cgraph_analyze can be told to actually record if function uses - va_start so for local functions maybe_vaarg can be made aggressive - helping K&R code. - FIXME: once typesytem is fixed, we won't need this code anymore. */ - if (i && i->local) - fntype = TREE_TYPE (fndecl); - cum->maybe_vaarg = (fntype - ? (!prototype_p (fntype) || stdarg_p (fntype)) - : !libname); - - if (!TARGET_64BIT) - { - /* If there are variable arguments, then we won't pass anything - in registers in 32-bit mode. */ - if (stdarg_p (fntype)) - { - cum->nregs = 0; - cum->sse_nregs = 0; - cum->mmx_nregs = 0; - cum->warn_avx = 0; - cum->warn_sse = 0; - cum->warn_mmx = 0; - return; - } - - /* Use ecx and edx registers if function has fastcall attribute, - else look for regparm information. */ - if (fntype) - { - if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype))) - { - cum->nregs = 2; - cum->fastcall = 1; - } - else - cum->nregs = ix86_function_regparm (fntype, fndecl); - } - - /* Set up the number of SSE registers used for passing SFmode - and DFmode arguments. Warn for mismatching ABI. */ - cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true); - } -} - -/* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE. - But in the case of vector types, it is some vector mode. - - When we have only some of our vector isa extensions enabled, then there - are some modes for which vector_mode_supported_p is false. For these - modes, the generic vector support in gcc will choose some non-vector mode - in order to implement the type. By computing the natural mode, we'll - select the proper ABI location for the operand and not depend on whatever - the middle-end decides to do with these vector types. - - The midde-end can't deal with the vector types > 16 bytes. In this - case, we return the original mode and warn ABI change if CUM isn't - NULL. */ - -static enum machine_mode -type_natural_mode (const_tree type, CUMULATIVE_ARGS *cum) -{ - enum machine_mode mode = TYPE_MODE (type); - - if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode)) - { - HOST_WIDE_INT size = int_size_in_bytes (type); - if ((size == 8 || size == 16 || size == 32) - /* ??? Generic code allows us to create width 1 vectors. Ignore. */ - && TYPE_VECTOR_SUBPARTS (type) > 1) - { - enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type)); - - if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE) - mode = MIN_MODE_VECTOR_FLOAT; - else - mode = MIN_MODE_VECTOR_INT; - - /* Get the mode which has this inner mode and number of units. */ - for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode)) - if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type) - && GET_MODE_INNER (mode) == innermode) - { - if (size == 32 && !TARGET_AVX) - { - static bool warnedavx; - - if (cum - && !warnedavx - && cum->warn_avx) - { - warnedavx = true; - warning (0, "AVX vector argument without AVX " - "enabled changes the ABI"); - } - return TYPE_MODE (type); - } - else - return mode; - } - - gcc_unreachable (); - } - } - - return mode; -} - -/* We want to pass a value in REGNO whose "natural" mode is MODE. However, - this may not agree with the mode that the type system has chosen for the - register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can - go ahead and use it. Otherwise we have to build a PARALLEL instead. */ - -static rtx -gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode, - unsigned int regno) -{ - rtx tmp; - - if (orig_mode != BLKmode) - tmp = gen_rtx_REG (orig_mode, regno); - else - { - tmp = gen_rtx_REG (mode, regno); - tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx); - tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp)); - } - - return tmp; -} - -/* x86-64 register passing implementation. See x86-64 ABI for details. Goal - of this code is to classify each 8bytes of incoming argument by the register - class and assign registers accordingly. */ - -/* Return the union class of CLASS1 and CLASS2. - See the x86-64 PS ABI for details. */ - -static enum x86_64_reg_class -merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2) -{ - /* Rule #1: If both classes are equal, this is the resulting class. */ - if (class1 == class2) - return class1; - - /* Rule #2: If one of the classes is NO_CLASS, the resulting class is - the other class. */ - if (class1 == X86_64_NO_CLASS) - return class2; - if (class2 == X86_64_NO_CLASS) - return class1; - - /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */ - if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS) - return X86_64_MEMORY_CLASS; - - /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */ - if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS) - || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS)) - return X86_64_INTEGERSI_CLASS; - if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS - || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS) - return X86_64_INTEGER_CLASS; - - /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class, - MEMORY is used. */ - if (class1 == X86_64_X87_CLASS - || class1 == X86_64_X87UP_CLASS - || class1 == X86_64_COMPLEX_X87_CLASS - || class2 == X86_64_X87_CLASS - || class2 == X86_64_X87UP_CLASS - || class2 == X86_64_COMPLEX_X87_CLASS) - return X86_64_MEMORY_CLASS; - - /* Rule #6: Otherwise class SSE is used. */ - return X86_64_SSE_CLASS; -} - -/* Classify the argument of type TYPE and mode MODE. - CLASSES will be filled by the register class used to pass each word - of the operand. The number of words is returned. In case the parameter - should be passed in memory, 0 is returned. As a special case for zero - sized containers, classes[0] will be NO_CLASS and 1 is returned. - - BIT_OFFSET is used internally for handling records and specifies offset - of the offset in bits modulo 256 to avoid overflow cases. - - See the x86-64 PS ABI for details. -*/ - -static int -classify_argument (enum machine_mode mode, const_tree type, - enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset) -{ - HOST_WIDE_INT bytes = - (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); - int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD; - - /* Variable sized entities are always passed/returned in memory. */ - if (bytes < 0) - return 0; - - if (mode != VOIDmode - && targetm.calls.must_pass_in_stack (mode, type)) - return 0; - - if (type && AGGREGATE_TYPE_P (type)) - { - int i; - tree field; - enum x86_64_reg_class subclasses[MAX_CLASSES]; - - /* On x86-64 we pass structures larger than 32 bytes on the stack. */ - if (bytes > 32) - return 0; - - for (i = 0; i < words; i++) - classes[i] = X86_64_NO_CLASS; - - /* Zero sized arrays or structures are NO_CLASS. We return 0 to - signalize memory class, so handle it as special case. */ - if (!words) - { - classes[0] = X86_64_NO_CLASS; - return 1; - } - - /* Classify each field of record and merge classes. */ - switch (TREE_CODE (type)) - { - case RECORD_TYPE: - /* And now merge the fields of structure. */ - for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) - { - if (TREE_CODE (field) == FIELD_DECL) - { - int num; - - if (TREE_TYPE (field) == error_mark_node) - continue; - - /* Bitfields are always classified as integer. Handle them - early, since later code would consider them to be - misaligned integers. */ - if (DECL_BIT_FIELD (field)) - { - for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8; - i < ((int_bit_position (field) + (bit_offset % 64)) - + tree_low_cst (DECL_SIZE (field), 0) - + 63) / 8 / 8; i++) - classes[i] = - merge_classes (X86_64_INTEGER_CLASS, - classes[i]); - } - else - { - type = TREE_TYPE (field); - - /* Flexible array member is ignored. */ - if (TYPE_MODE (type) == BLKmode - && TREE_CODE (type) == ARRAY_TYPE - && TYPE_SIZE (type) == NULL_TREE - && TYPE_DOMAIN (type) != NULL_TREE - && (TYPE_MAX_VALUE (TYPE_DOMAIN (type)) - == NULL_TREE)) - { - static bool warned; - - if (!warned && warn_psabi) - { - warned = true; - inform (input_location, - "The ABI of passing struct with" - " a flexible array member has" - " changed in GCC 4.4"); - } - continue; - } - num = classify_argument (TYPE_MODE (type), type, - subclasses, - (int_bit_position (field) - + bit_offset) % 256); - if (!num) - return 0; - for (i = 0; i < num; i++) - { - int pos = - (int_bit_position (field) + (bit_offset % 64)) / 8 / 8; - classes[i + pos] = - merge_classes (subclasses[i], classes[i + pos]); - } - } - } - } - break; - - case ARRAY_TYPE: - /* Arrays are handled as small records. */ - { - int num; - num = classify_argument (TYPE_MODE (TREE_TYPE (type)), - TREE_TYPE (type), subclasses, bit_offset); - if (!num) - return 0; - - /* The partial classes are now full classes. */ - if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4) - subclasses[0] = X86_64_SSE_CLASS; - if (subclasses[0] == X86_64_INTEGERSI_CLASS - && !((bit_offset % 64) == 0 && bytes == 4)) - subclasses[0] = X86_64_INTEGER_CLASS; - - for (i = 0; i < words; i++) - classes[i] = subclasses[i % num]; - - break; - } - case UNION_TYPE: - case QUAL_UNION_TYPE: - /* Unions are similar to RECORD_TYPE but offset is always 0. - */ - for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) - { - if (TREE_CODE (field) == FIELD_DECL) - { - int num; - - if (TREE_TYPE (field) == error_mark_node) - continue; - - num = classify_argument (TYPE_MODE (TREE_TYPE (field)), - TREE_TYPE (field), subclasses, - bit_offset); - if (!num) - return 0; - for (i = 0; i < num; i++) - classes[i] = merge_classes (subclasses[i], classes[i]); - } - } - break; - - default: - gcc_unreachable (); - } - - if (words > 2) - { - /* When size > 16 bytes, if the first one isn't - X86_64_SSE_CLASS or any other ones aren't - X86_64_SSEUP_CLASS, everything should be passed in - memory. */ - if (classes[0] != X86_64_SSE_CLASS) - return 0; - - for (i = 1; i < words; i++) - if (classes[i] != X86_64_SSEUP_CLASS) - return 0; - } - - /* Final merger cleanup. */ - for (i = 0; i < words; i++) - { - /* If one class is MEMORY, everything should be passed in - memory. */ - if (classes[i] == X86_64_MEMORY_CLASS) - return 0; - - /* The X86_64_SSEUP_CLASS should be always preceded by - X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */ - if (classes[i] == X86_64_SSEUP_CLASS - && classes[i - 1] != X86_64_SSE_CLASS - && classes[i - 1] != X86_64_SSEUP_CLASS) - { - /* The first one should never be X86_64_SSEUP_CLASS. */ - gcc_assert (i != 0); - classes[i] = X86_64_SSE_CLASS; - } - - /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS, - everything should be passed in memory. */ - if (classes[i] == X86_64_X87UP_CLASS - && (classes[i - 1] != X86_64_X87_CLASS)) - { - static bool warned; - - /* The first one should never be X86_64_X87UP_CLASS. */ - gcc_assert (i != 0); - if (!warned && warn_psabi) - { - warned = true; - inform (input_location, - "The ABI of passing union with long double" - " has changed in GCC 4.4"); - } - return 0; - } - } - return words; - } - - /* Compute alignment needed. We align all types to natural boundaries with - exception of XFmode that is aligned to 64bits. */ - if (mode != VOIDmode && mode != BLKmode) - { - int mode_alignment = GET_MODE_BITSIZE (mode); - - if (mode == XFmode) - mode_alignment = 128; - else if (mode == XCmode) - mode_alignment = 256; - if (COMPLEX_MODE_P (mode)) - mode_alignment /= 2; - /* Misaligned fields are always returned in memory. */ - if (bit_offset % mode_alignment) - return 0; - } - - /* for V1xx modes, just use the base mode */ - if (VECTOR_MODE_P (mode) && mode != V1DImode - && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes) - mode = GET_MODE_INNER (mode); - - /* Classification of atomic types. */ - switch (mode) - { - case SDmode: - case DDmode: - classes[0] = X86_64_SSE_CLASS; - return 1; - case TDmode: - classes[0] = X86_64_SSE_CLASS; - classes[1] = X86_64_SSEUP_CLASS; - return 2; - case DImode: - case SImode: - case HImode: - case QImode: - case CSImode: - case CHImode: - case CQImode: - { - int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode); - - if (size <= 32) - { - classes[0] = X86_64_INTEGERSI_CLASS; - return 1; - } - else if (size <= 64) - { - classes[0] = X86_64_INTEGER_CLASS; - return 1; - } - else if (size <= 64+32) - { - classes[0] = X86_64_INTEGER_CLASS; - classes[1] = X86_64_INTEGERSI_CLASS; - return 2; - } - else if (size <= 64+64) - { - classes[0] = classes[1] = X86_64_INTEGER_CLASS; - return 2; - } - else - gcc_unreachable (); - } - case CDImode: - case TImode: - classes[0] = classes[1] = X86_64_INTEGER_CLASS; - return 2; - case COImode: - case OImode: - /* OImode shouldn't be used directly. */ - gcc_unreachable (); - case CTImode: - return 0; - case SFmode: - if (!(bit_offset % 64)) - classes[0] = X86_64_SSESF_CLASS; - else - classes[0] = X86_64_SSE_CLASS; - return 1; - case DFmode: - classes[0] = X86_64_SSEDF_CLASS; - return 1; - case XFmode: - classes[0] = X86_64_X87_CLASS; - classes[1] = X86_64_X87UP_CLASS; - return 2; - case TFmode: - classes[0] = X86_64_SSE_CLASS; - classes[1] = X86_64_SSEUP_CLASS; - return 2; - case SCmode: - classes[0] = X86_64_SSE_CLASS; - if (!(bit_offset % 64)) - return 1; - else - { - static bool warned; - - if (!warned && warn_psabi) - { - warned = true; - inform (input_location, - "The ABI of passing structure with complex float" - " member has changed in GCC 4.4"); - } - classes[1] = X86_64_SSESF_CLASS; - return 2; - } - case DCmode: - classes[0] = X86_64_SSEDF_CLASS; - classes[1] = X86_64_SSEDF_CLASS; - return 2; - case XCmode: - classes[0] = X86_64_COMPLEX_X87_CLASS; - return 1; - case TCmode: - /* This modes is larger than 16 bytes. */ - return 0; - case V8SFmode: - case V8SImode: - case V32QImode: - case V16HImode: - case V4DFmode: - case V4DImode: - classes[0] = X86_64_SSE_CLASS; - classes[1] = X86_64_SSEUP_CLASS; - classes[2] = X86_64_SSEUP_CLASS; - classes[3] = X86_64_SSEUP_CLASS; - return 4; - case V4SFmode: - case V4SImode: - case V16QImode: - case V8HImode: - case V2DFmode: - case V2DImode: - classes[0] = X86_64_SSE_CLASS; - classes[1] = X86_64_SSEUP_CLASS; - return 2; - case V1DImode: - case V2SFmode: - case V2SImode: - case V4HImode: - case V8QImode: - classes[0] = X86_64_SSE_CLASS; - return 1; - case BLKmode: - case VOIDmode: - return 0; - default: - gcc_assert (VECTOR_MODE_P (mode)); - - if (bytes > 16) - return 0; - - gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT); - - if (bit_offset + GET_MODE_BITSIZE (mode) <= 32) - classes[0] = X86_64_INTEGERSI_CLASS; - else - classes[0] = X86_64_INTEGER_CLASS; - classes[1] = X86_64_INTEGER_CLASS; - return 1 + (bytes > 8); - } -} - -/* Examine the argument and return set number of register required in each - class. Return 0 iff parameter should be passed in memory. */ -static int -examine_argument (enum machine_mode mode, const_tree type, int in_return, - int *int_nregs, int *sse_nregs) -{ - enum x86_64_reg_class regclass[MAX_CLASSES]; - int n = classify_argument (mode, type, regclass, 0); - - *int_nregs = 0; - *sse_nregs = 0; - if (!n) - return 0; - for (n--; n >= 0; n--) - switch (regclass[n]) - { - case X86_64_INTEGER_CLASS: - case X86_64_INTEGERSI_CLASS: - (*int_nregs)++; - break; - case X86_64_SSE_CLASS: - case X86_64_SSESF_CLASS: - case X86_64_SSEDF_CLASS: - (*sse_nregs)++; - break; - case X86_64_NO_CLASS: - case X86_64_SSEUP_CLASS: - break; - case X86_64_X87_CLASS: - case X86_64_X87UP_CLASS: - if (!in_return) - return 0; - break; - case X86_64_COMPLEX_X87_CLASS: - return in_return ? 2 : 0; - case X86_64_MEMORY_CLASS: - gcc_unreachable (); - } - return 1; -} - -/* Construct container for the argument used by GCC interface. See - FUNCTION_ARG for the detailed description. */ - -static rtx -construct_container (enum machine_mode mode, enum machine_mode orig_mode, - const_tree type, int in_return, int nintregs, int nsseregs, - const int *intreg, int sse_regno) -{ - /* The following variables hold the static issued_error state. */ - static bool issued_sse_arg_error; - static bool issued_sse_ret_error; - static bool issued_x87_ret_error; - - enum machine_mode tmpmode; - int bytes = - (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); - enum x86_64_reg_class regclass[MAX_CLASSES]; - int n; - int i; - int nexps = 0; - int needed_sseregs, needed_intregs; - rtx exp[MAX_CLASSES]; - rtx ret; - - n = classify_argument (mode, type, regclass, 0); - if (!n) - return NULL; - if (!examine_argument (mode, type, in_return, &needed_intregs, - &needed_sseregs)) - return NULL; - if (needed_intregs > nintregs || needed_sseregs > nsseregs) - return NULL; - - /* We allowed the user to turn off SSE for kernel mode. Don't crash if - some less clueful developer tries to use floating-point anyway. */ - if (needed_sseregs && !TARGET_SSE) - { - if (in_return) - { - if (!issued_sse_ret_error) - { - error ("SSE register return with SSE disabled"); - issued_sse_ret_error = true; - } - } - else if (!issued_sse_arg_error) - { - error ("SSE register argument with SSE disabled"); - issued_sse_arg_error = true; - } - return NULL; - } - - /* Likewise, error if the ABI requires us to return values in the - x87 registers and the user specified -mno-80387. */ - if (!TARGET_80387 && in_return) - for (i = 0; i < n; i++) - if (regclass[i] == X86_64_X87_CLASS - || regclass[i] == X86_64_X87UP_CLASS - || regclass[i] == X86_64_COMPLEX_X87_CLASS) - { - if (!issued_x87_ret_error) - { - error ("x87 register return with x87 disabled"); - issued_x87_ret_error = true; - } - return NULL; - } - - /* First construct simple cases. Avoid SCmode, since we want to use - single register to pass this type. */ - if (n == 1 && mode != SCmode) - switch (regclass[0]) - { - case X86_64_INTEGER_CLASS: - case X86_64_INTEGERSI_CLASS: - return gen_rtx_REG (mode, intreg[0]); - case X86_64_SSE_CLASS: - case X86_64_SSESF_CLASS: - case X86_64_SSEDF_CLASS: - if (mode != BLKmode) - return gen_reg_or_parallel (mode, orig_mode, - SSE_REGNO (sse_regno)); - break; - case X86_64_X87_CLASS: - case X86_64_COMPLEX_X87_CLASS: - return gen_rtx_REG (mode, FIRST_STACK_REG); - case X86_64_NO_CLASS: - /* Zero sized array, struct or class. */ - return NULL; - default: - gcc_unreachable (); - } - if (n == 2 && regclass[0] == X86_64_SSE_CLASS - && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode) - return gen_rtx_REG (mode, SSE_REGNO (sse_regno)); - if (n == 4 - && regclass[0] == X86_64_SSE_CLASS - && regclass[1] == X86_64_SSEUP_CLASS - && regclass[2] == X86_64_SSEUP_CLASS - && regclass[3] == X86_64_SSEUP_CLASS - && mode != BLKmode) - return gen_rtx_REG (mode, SSE_REGNO (sse_regno)); - - if (n == 2 - && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS) - return gen_rtx_REG (XFmode, FIRST_STACK_REG); - if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS - && regclass[1] == X86_64_INTEGER_CLASS - && (mode == CDImode || mode == TImode || mode == TFmode) - && intreg[0] + 1 == intreg[1]) - return gen_rtx_REG (mode, intreg[0]); - - /* Otherwise figure out the entries of the PARALLEL. */ - for (i = 0; i < n; i++) - { - int pos; - - switch (regclass[i]) - { - case X86_64_NO_CLASS: - break; - case X86_64_INTEGER_CLASS: - case X86_64_INTEGERSI_CLASS: - /* Merge TImodes on aligned occasions here too. */ - if (i * 8 + 8 > bytes) - tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0); - else if (regclass[i] == X86_64_INTEGERSI_CLASS) - tmpmode = SImode; - else - tmpmode = DImode; - /* We've requested 24 bytes we don't have mode for. Use DImode. */ - if (tmpmode == BLKmode) - tmpmode = DImode; - exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode, - gen_rtx_REG (tmpmode, *intreg), - GEN_INT (i*8)); - intreg++; - break; - case X86_64_SSESF_CLASS: - exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode, - gen_rtx_REG (SFmode, - SSE_REGNO (sse_regno)), - GEN_INT (i*8)); - sse_regno++; - break; - case X86_64_SSEDF_CLASS: - exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode, - gen_rtx_REG (DFmode, - SSE_REGNO (sse_regno)), - GEN_INT (i*8)); - sse_regno++; - break; - case X86_64_SSE_CLASS: - pos = i; - switch (n) - { - case 1: - tmpmode = DImode; - break; - case 2: - if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS) - { - tmpmode = TImode; - i++; - } - else - tmpmode = DImode; - break; - case 4: - gcc_assert (i == 0 - && regclass[1] == X86_64_SSEUP_CLASS - && regclass[2] == X86_64_SSEUP_CLASS - && regclass[3] == X86_64_SSEUP_CLASS); - tmpmode = OImode; - i += 3; - break; - default: - gcc_unreachable (); - } - exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode, - gen_rtx_REG (tmpmode, - SSE_REGNO (sse_regno)), - GEN_INT (pos*8)); - sse_regno++; - break; - default: - gcc_unreachable (); - } - } - - /* Empty aligned struct, union or class. */ - if (nexps == 0) - return NULL; - - ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps)); - for (i = 0; i < nexps; i++) - XVECEXP (ret, 0, i) = exp [i]; - return ret; -} - -/* Update the data in CUM to advance over an argument of mode MODE - and data type TYPE. (TYPE is null for libcalls where that information - may not be available.) */ - -static void -function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode, - tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words) -{ - switch (mode) - { - default: - break; - - case BLKmode: - if (bytes < 0) - break; - /* FALLTHRU */ - - case DImode: - case SImode: - case HImode: - case QImode: - cum->words += words; - cum->nregs -= words; - cum->regno += words; - - if (cum->nregs <= 0) - { - cum->nregs = 0; - cum->regno = 0; - } - break; - - case OImode: - /* OImode shouldn't be used directly. */ - gcc_unreachable (); - - case DFmode: - if (cum->float_in_sse < 2) - break; - case SFmode: - if (cum->float_in_sse < 1) - break; - /* FALLTHRU */ - - case V8SFmode: - case V8SImode: - case V32QImode: - case V16HImode: - case V4DFmode: - case V4DImode: - case TImode: - case V16QImode: - case V8HImode: - case V4SImode: - case V2DImode: - case V4SFmode: - case V2DFmode: - if (!type || !AGGREGATE_TYPE_P (type)) - { - cum->sse_words += words; - cum->sse_nregs -= 1; - cum->sse_regno += 1; - if (cum->sse_nregs <= 0) - { - cum->sse_nregs = 0; - cum->sse_regno = 0; - } - } - break; - - case V8QImode: - case V4HImode: - case V2SImode: - case V2SFmode: - case V1DImode: - if (!type || !AGGREGATE_TYPE_P (type)) - { - cum->mmx_words += words; - cum->mmx_nregs -= 1; - cum->mmx_regno += 1; - if (cum->mmx_nregs <= 0) - { - cum->mmx_nregs = 0; - cum->mmx_regno = 0; - } - } - break; - } -} - -static void -function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode, - tree type, HOST_WIDE_INT words, int named) -{ - int int_nregs, sse_nregs; - - /* Unnamed 256bit vector mode parameters are passed on stack. */ - if (!named && VALID_AVX256_REG_MODE (mode)) - return; - - if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)) - cum->words += words; - else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs) - { - cum->nregs -= int_nregs; - cum->sse_nregs -= sse_nregs; - cum->regno += int_nregs; - cum->sse_regno += sse_nregs; - } - else - cum->words += words; -} - -static void -function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes, - HOST_WIDE_INT words) -{ - /* Otherwise, this should be passed indirect. */ - gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8); - - cum->words += words; - if (cum->nregs > 0) - { - cum->nregs -= 1; - cum->regno += 1; - } -} - -void -function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode, - tree type, int named) -{ - HOST_WIDE_INT bytes, words; - - if (mode == BLKmode) - bytes = int_size_in_bytes (type); - else - bytes = GET_MODE_SIZE (mode); - words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD; - - if (type) - mode = type_natural_mode (type, NULL); - - if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI) - function_arg_advance_ms_64 (cum, bytes, words); - else if (TARGET_64BIT) - function_arg_advance_64 (cum, mode, type, words, named); - else - function_arg_advance_32 (cum, mode, type, bytes, words); -} - -/* Define where to put the arguments to a function. - Value is zero to push the argument on the stack, - or a hard register in which to store the argument. - - MODE is the argument's machine mode. - TYPE is the data type of the argument (as a tree). - This is null for libcalls where that information may - not be available. - CUM is a variable of type CUMULATIVE_ARGS which gives info about - the preceding args and about the function being called. - NAMED is nonzero if this argument is a named parameter - (otherwise it is an extra parameter matching an ellipsis). */ - -static rtx -function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode, - enum machine_mode orig_mode, tree type, - HOST_WIDE_INT bytes, HOST_WIDE_INT words) -{ - static bool warnedsse, warnedmmx; - - /* Avoid the AL settings for the Unix64 ABI. */ - if (mode == VOIDmode) - return constm1_rtx; - - switch (mode) - { - default: - break; - - case BLKmode: - if (bytes < 0) - break; - /* FALLTHRU */ - case DImode: - case SImode: - case HImode: - case QImode: - if (words <= cum->nregs) - { - int regno = cum->regno; - - /* Fastcall allocates the first two DWORD (SImode) or - smaller arguments to ECX and EDX if it isn't an - aggregate type . */ - if (cum->fastcall) - { - if (mode == BLKmode - || mode == DImode - || (type && AGGREGATE_TYPE_P (type))) - break; - - /* ECX not EAX is the first allocated register. */ - if (regno == AX_REG) - regno = CX_REG; - } - return gen_rtx_REG (mode, regno); - } - break; - - case DFmode: - if (cum->float_in_sse < 2) - break; - case SFmode: - if (cum->float_in_sse < 1) - break; - /* FALLTHRU */ - case TImode: - /* In 32bit, we pass TImode in xmm registers. */ - case V16QImode: - case V8HImode: - case V4SImode: - case V2DImode: - case V4SFmode: - case V2DFmode: - if (!type || !AGGREGATE_TYPE_P (type)) - { - if (!TARGET_SSE && !warnedsse && cum->warn_sse) - { - warnedsse = true; - warning (0, "SSE vector argument without SSE enabled " - "changes the ABI"); - } - if (cum->sse_nregs) - return gen_reg_or_parallel (mode, orig_mode, - cum->sse_regno + FIRST_SSE_REG); - } - break; - - case OImode: - /* OImode shouldn't be used directly. */ - gcc_unreachable (); - - case V8SFmode: - case V8SImode: - case V32QImode: - case V16HImode: - case V4DFmode: - case V4DImode: - if (!type || !AGGREGATE_TYPE_P (type)) - { - if (cum->sse_nregs) - return gen_reg_or_parallel (mode, orig_mode, - cum->sse_regno + FIRST_SSE_REG); - } - break; - - case V8QImode: - case V4HImode: - case V2SImode: - case V2SFmode: - case V1DImode: - if (!type || !AGGREGATE_TYPE_P (type)) - { - if (!TARGET_MMX && !warnedmmx && cum->warn_mmx) - { - warnedmmx = true; - warning (0, "MMX vector argument without MMX enabled " - "changes the ABI"); - } - if (cum->mmx_nregs) - return gen_reg_or_parallel (mode, orig_mode, - cum->mmx_regno + FIRST_MMX_REG); - } - break; - } - - return NULL_RTX; -} - -static rtx -function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode, - enum machine_mode orig_mode, tree type, int named) -{ - /* Handle a hidden AL argument containing number of registers - for varargs x86-64 functions. */ - if (mode == VOIDmode) - return GEN_INT (cum->maybe_vaarg - ? (cum->sse_nregs < 0 - ? (cum->call_abi == ix86_abi - ? SSE_REGPARM_MAX - : (ix86_abi != SYSV_ABI ? X86_64_SSE_REGPARM_MAX - : X64_SSE_REGPARM_MAX)) - : cum->sse_regno) - : -1); - - switch (mode) - { - default: - break; - - case V8SFmode: - case V8SImode: - case V32QImode: - case V16HImode: - case V4DFmode: - case V4DImode: - /* Unnamed 256bit vector mode parameters are passed on stack. */ - if (!named) - return NULL; - break; - } - - return construct_container (mode, orig_mode, type, 0, cum->nregs, - cum->sse_nregs, - &x86_64_int_parameter_registers [cum->regno], - cum->sse_regno); -} - -static rtx -function_arg_ms_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode, - enum machine_mode orig_mode, int named, - HOST_WIDE_INT bytes) -{ - unsigned int regno; - - /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call. - We use value of -2 to specify that current function call is MSABI. */ - if (mode == VOIDmode) - return GEN_INT (-2); - - /* If we've run out of registers, it goes on the stack. */ - if (cum->nregs == 0) - return NULL_RTX; - - regno = x86_64_ms_abi_int_parameter_registers[cum->regno]; - - /* Only floating point modes are passed in anything but integer regs. */ - if (TARGET_SSE && (mode == SFmode || mode == DFmode)) - { - if (named) - regno = cum->regno + FIRST_SSE_REG; - else - { - rtx t1, t2; - - /* Unnamed floating parameters are passed in both the - SSE and integer registers. */ - t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG); - t2 = gen_rtx_REG (mode, regno); - t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx); - t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx); - return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2)); - } - } - /* Handle aggregated types passed in register. */ - if (orig_mode == BLKmode) - { - if (bytes > 0 && bytes <= 8) - mode = (bytes > 4 ? DImode : SImode); - if (mode == BLKmode) - mode = DImode; - } - - return gen_reg_or_parallel (mode, orig_mode, regno); -} - -rtx -function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode, - tree type, int named) -{ - enum machine_mode mode = omode; - HOST_WIDE_INT bytes, words; - - if (mode == BLKmode) - bytes = int_size_in_bytes (type); - else - bytes = GET_MODE_SIZE (mode); - words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD; - - /* To simplify the code below, represent vector types with a vector mode - even if MMX/SSE are not active. */ - if (type && TREE_CODE (type) == VECTOR_TYPE) - mode = type_natural_mode (type, cum); - - if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI) - return function_arg_ms_64 (cum, mode, omode, named, bytes); - else if (TARGET_64BIT) - return function_arg_64 (cum, mode, omode, type, named); - else - return function_arg_32 (cum, mode, omode, type, bytes, words); -} - -/* A C expression that indicates when an argument must be passed by - reference. If nonzero for an argument, a copy of that argument is - made in memory and a pointer to the argument is passed instead of - the argument itself. The pointer is passed in whatever way is - appropriate for passing a pointer to that type. */ - -static bool -ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED, - enum machine_mode mode ATTRIBUTE_UNUSED, - const_tree type, bool named ATTRIBUTE_UNUSED) -{ - /* See Windows x64 Software Convention. */ - if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI) - { - int msize = (int) GET_MODE_SIZE (mode); - if (type) - { - /* Arrays are passed by reference. */ - if (TREE_CODE (type) == ARRAY_TYPE) - return true; - - if (AGGREGATE_TYPE_P (type)) - { - /* Structs/unions of sizes other than 8, 16, 32, or 64 bits - are passed by reference. */ - msize = int_size_in_bytes (type); - } - } - - /* __m128 is passed by reference. */ - switch (msize) { - case 1: case 2: case 4: case 8: - break; - default: - return true; - } - } - else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1) - return 1; - - return 0; -} - -/* Return true when TYPE should be 128bit aligned for 32bit argument passing - ABI. */ -static bool -contains_aligned_value_p (tree type) -{ - enum machine_mode mode = TYPE_MODE (type); - if (((TARGET_SSE && SSE_REG_MODE_P (mode)) - || mode == TDmode - || mode == TFmode - || mode == TCmode) - && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128)) - return true; - if (TYPE_ALIGN (type) < 128) - return false; - - if (AGGREGATE_TYPE_P (type)) - { - /* Walk the aggregates recursively. */ - switch (TREE_CODE (type)) - { - case RECORD_TYPE: - case UNION_TYPE: - case QUAL_UNION_TYPE: - { - tree field; - - /* Walk all the structure fields. */ - for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) - { - if (TREE_CODE (field) == FIELD_DECL - && contains_aligned_value_p (TREE_TYPE (field))) - return true; - } - break; - } - - case ARRAY_TYPE: - /* Just for use if some languages passes arrays by value. */ - if (contains_aligned_value_p (TREE_TYPE (type))) - return true; - break; - - default: - gcc_unreachable (); - } - } - return false; -} - -/* Gives the alignment boundary, in bits, of an argument with the - specified mode and type. */ - -int -ix86_function_arg_boundary (enum machine_mode mode, tree type) -{ - int align; - if (type) - { - /* Since canonical type is used for call, we convert it to - canonical type if needed. */ - if (!TYPE_STRUCTURAL_EQUALITY_P (type)) - type = TYPE_CANONICAL (type); - align = TYPE_ALIGN (type); - } - else - align = GET_MODE_ALIGNMENT (mode); - if (align < PARM_BOUNDARY) - align = PARM_BOUNDARY; - /* In 32bit, only _Decimal128 and __float128 are aligned to their - natural boundaries. */ - if (!TARGET_64BIT && mode != TDmode && mode != TFmode) - { - /* i386 ABI defines all arguments to be 4 byte aligned. We have to - make an exception for SSE modes since these require 128bit - alignment. - - The handling here differs from field_alignment. ICC aligns MMX - arguments to 4 byte boundaries, while structure fields are aligned - to 8 byte boundaries. */ - if (!type) - { - if (!(TARGET_SSE && SSE_REG_MODE_P (mode))) - align = PARM_BOUNDARY; - } - else - { - if (!contains_aligned_value_p (type)) - align = PARM_BOUNDARY; - } - } - if (align > BIGGEST_ALIGNMENT) - align = BIGGEST_ALIGNMENT; - return align; -} - -/* Return true if N is a possible register number of function value. */ - -bool -ix86_function_value_regno_p (int regno) -{ - switch (regno) - { - case 0: - return true; - - case FIRST_FLOAT_REG: - /* TODO: The function should depend on current function ABI but - builtins.c would need updating then. Therefore we use the - default ABI. */ - if (TARGET_64BIT && ix86_abi == MS_ABI) - return false; - return TARGET_FLOAT_RETURNS_IN_80387; - - case FIRST_SSE_REG: - return TARGET_SSE; - - case FIRST_MMX_REG: - if (TARGET_MACHO || TARGET_64BIT) - return false; - return TARGET_MMX; - } - - return false; -} - -/* Define how to find the value returned by a function. - VALTYPE is the data type of the value (as a tree). - If the precise function being called is known, FUNC is its FUNCTION_DECL; - otherwise, FUNC is 0. */ - -static rtx -function_value_32 (enum machine_mode orig_mode, enum machine_mode mode, - const_tree fntype, const_tree fn) -{ - unsigned int regno; - - /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where - we normally prevent this case when mmx is not available. However - some ABIs may require the result to be returned like DImode. */ - if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8) - regno = TARGET_MMX ? FIRST_MMX_REG : 0; - - /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where - we prevent this case when sse is not available. However some ABIs - may require the result to be returned like integer TImode. */ - else if (mode == TImode - || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16)) - regno = TARGET_SSE ? FIRST_SSE_REG : 0; - - /* 32-byte vector modes in %ymm0. */ - else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32) - regno = TARGET_AVX ? FIRST_SSE_REG : 0; - - /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */ - else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387) - regno = FIRST_FLOAT_REG; - else - /* Most things go in %eax. */ - regno = AX_REG; - - /* Override FP return register with %xmm0 for local functions when - SSE math is enabled or for functions with sseregparm attribute. */ - if ((fn || fntype) && (mode == SFmode || mode == DFmode)) - { - int sse_level = ix86_function_sseregparm (fntype, fn, false); - if ((sse_level >= 1 && mode == SFmode) - || (sse_level == 2 && mode == DFmode)) - regno = FIRST_SSE_REG; - } - - /* OImode shouldn't be used directly. */ - gcc_assert (mode != OImode); - - return gen_rtx_REG (orig_mode, regno); -} - -static rtx -function_value_64 (enum machine_mode orig_mode, enum machine_mode mode, - const_tree valtype) -{ - rtx ret; - - /* Handle libcalls, which don't provide a type node. */ - if (valtype == NULL) - { - switch (mode) - { - case SFmode: - case SCmode: - case DFmode: - case DCmode: - case TFmode: - case SDmode: - case DDmode: - case TDmode: - return gen_rtx_REG (mode, FIRST_SSE_REG); - case XFmode: - case XCmode: - return gen_rtx_REG (mode, FIRST_FLOAT_REG); - case TCmode: - return NULL; - default: - return gen_rtx_REG (mode, AX_REG); - } - } - - ret = construct_container (mode, orig_mode, valtype, 1, - X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX, - x86_64_int_return_registers, 0); - - /* For zero sized structures, construct_container returns NULL, but we - need to keep rest of compiler happy by returning meaningful value. */ - if (!ret) - ret = gen_rtx_REG (orig_mode, AX_REG); - - return ret; -} - -static rtx -function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode) -{ - unsigned int regno = AX_REG; - - if (TARGET_SSE) - { - switch (GET_MODE_SIZE (mode)) - { - case 16: - if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode)) - && !COMPLEX_MODE_P (mode)) - regno = FIRST_SSE_REG; - break; - case 8: - case 4: - if (mode == SFmode || mode == DFmode) - regno = FIRST_SSE_REG; - break; - default: - break; - } - } - return gen_rtx_REG (orig_mode, regno); -} - -static rtx -ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl, - enum machine_mode orig_mode, enum machine_mode mode) -{ - const_tree fn, fntype; - - fn = NULL_TREE; - if (fntype_or_decl && DECL_P (fntype_or_decl)) - fn = fntype_or_decl; - fntype = fn ? TREE_TYPE (fn) : fntype_or_decl; - - if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI) - return function_value_ms_64 (orig_mode, mode); - else if (TARGET_64BIT) - return function_value_64 (orig_mode, mode, valtype); - else - return function_value_32 (orig_mode, mode, fntype, fn); -} - -static rtx -ix86_function_value (const_tree valtype, const_tree fntype_or_decl, - bool outgoing ATTRIBUTE_UNUSED) -{ - enum machine_mode mode, orig_mode; - - orig_mode = TYPE_MODE (valtype); - mode = type_natural_mode (valtype, NULL); - return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode); -} - -rtx -ix86_libcall_value (enum machine_mode mode) -{ - return ix86_function_value_1 (NULL, NULL, mode, mode); -} - -/* Return true iff type is returned in memory. */ - -static int ATTRIBUTE_UNUSED -return_in_memory_32 (const_tree type, enum machine_mode mode) -{ - HOST_WIDE_INT size; - - if (mode == BLKmode) - return 1; - - size = int_size_in_bytes (type); - - if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8) - return 0; - - if (VECTOR_MODE_P (mode) || mode == TImode) - { - /* User-created vectors small enough to fit in EAX. */ - if (size < 8) - return 0; - - /* MMX/3dNow values are returned in MM0, - except when it doesn't exits. */ - if (size == 8) - return (TARGET_MMX ? 0 : 1); - - /* SSE values are returned in XMM0, except when it doesn't exist. */ - if (size == 16) - return (TARGET_SSE ? 0 : 1); - - /* AVX values are returned in YMM0, except when it doesn't exist. */ - if (size == 32) - return TARGET_AVX ? 0 : 1; - } - - if (mode == XFmode) - return 0; - - if (size > 12) - return 1; - - /* OImode shouldn't be used directly. */ - gcc_assert (mode != OImode); - - return 0; -} - -static int ATTRIBUTE_UNUSED -return_in_memory_64 (const_tree type, enum machine_mode mode) -{ - int needed_intregs, needed_sseregs; - return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs); -} - -static int ATTRIBUTE_UNUSED -return_in_memory_ms_64 (const_tree type, enum machine_mode mode) -{ - HOST_WIDE_INT size = int_size_in_bytes (type); - - /* __m128 is returned in xmm0. */ - if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode)) - && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16)) - return 0; - - /* Otherwise, the size must be exactly in [1248]. */ - return (size != 1 && size != 2 && size != 4 && size != 8); -} - -static bool -ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED) -{ -#ifdef SUBTARGET_RETURN_IN_MEMORY - return SUBTARGET_RETURN_IN_MEMORY (type, fntype); -#else - const enum machine_mode mode = type_natural_mode (type, NULL); - - if (TARGET_64BIT) - { - if (ix86_function_type_abi (fntype) == MS_ABI) - return return_in_memory_ms_64 (type, mode); - else - return return_in_memory_64 (type, mode); - } - else - return return_in_memory_32 (type, mode); -#endif -} - -/* Return false iff TYPE is returned in memory. This version is used - on Solaris 10. It is similar to the generic ix86_return_in_memory, - but differs notably in that when MMX is available, 8-byte vectors - are returned in memory, rather than in MMX registers. */ - -bool -ix86_sol10_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED) -{ - int size; - enum machine_mode mode = type_natural_mode (type, NULL); - - if (TARGET_64BIT) - return return_in_memory_64 (type, mode); - - if (mode == BLKmode) - return 1; - - size = int_size_in_bytes (type); - - if (VECTOR_MODE_P (mode)) - { - /* Return in memory only if MMX registers *are* available. This - seems backwards, but it is consistent with the existing - Solaris x86 ABI. */ - if (size == 8) - return TARGET_MMX; - if (size == 16) - return !TARGET_SSE; - } - else if (mode == TImode) - return !TARGET_SSE; - else if (mode == XFmode) - return 0; - - return size > 12; -} - -/* When returning SSE vector types, we have a choice of either - (1) being abi incompatible with a -march switch, or - (2) generating an error. - Given no good solution, I think the safest thing is one warning. - The user won't be able to use -Werror, but.... - - Choose the STRUCT_VALUE_RTX hook because that's (at present) only - called in response to actually generating a caller or callee that - uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called - via aggregate_value_p for general type probing from tree-ssa. */ - -static rtx -ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED) -{ - static bool warnedsse, warnedmmx; - - if (!TARGET_64BIT && type) - { - /* Look at the return type of the function, not the function type. */ - enum machine_mode mode = TYPE_MODE (TREE_TYPE (type)); - - if (!TARGET_SSE && !warnedsse) - { - if (mode == TImode - || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16)) - { - warnedsse = true; - warning (0, "SSE vector return without SSE enabled " - "changes the ABI"); - } - } - - if (!TARGET_MMX && !warnedmmx) - { - if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8) - { - warnedmmx = true; - warning (0, "MMX vector return without MMX enabled " - "changes the ABI"); - } - } - } - - return NULL; -} - - -/* Create the va_list data type. */ - -/* Returns the calling convention specific va_list date type. - The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */ - -static tree -ix86_build_builtin_va_list_abi (enum calling_abi abi) -{ - tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl; - - /* For i386 we use plain pointer to argument area. */ - if (!TARGET_64BIT || abi == MS_ABI) - return build_pointer_type (char_type_node); - - record = (*lang_hooks.types.make_type) (RECORD_TYPE); - type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record); - - f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"), - unsigned_type_node); - f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"), - unsigned_type_node); - f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"), - ptr_type_node); - f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"), - ptr_type_node); - - va_list_gpr_counter_field = f_gpr; - va_list_fpr_counter_field = f_fpr; - - DECL_FIELD_CONTEXT (f_gpr) = record; - DECL_FIELD_CONTEXT (f_fpr) = record; - DECL_FIELD_CONTEXT (f_ovf) = record; - DECL_FIELD_CONTEXT (f_sav) = record; - - TREE_CHAIN (record) = type_decl; - TYPE_NAME (record) = type_decl; - TYPE_FIELDS (record) = f_gpr; - TREE_CHAIN (f_gpr) = f_fpr; - TREE_CHAIN (f_fpr) = f_ovf; - TREE_CHAIN (f_ovf) = f_sav; - - layout_type (record); - - /* The correct type is an array type of one element. */ - return build_array_type (record, build_index_type (size_zero_node)); -} - -/* Setup the builtin va_list data type and for 64-bit the additional - calling convention specific va_list data types. */ - -static tree -ix86_build_builtin_va_list (void) -{ - tree ret = ix86_build_builtin_va_list_abi (ix86_abi); - - /* Initialize abi specific va_list builtin types. */ - if (TARGET_64BIT) - { - tree t; - if (ix86_abi == MS_ABI) - { - t = ix86_build_builtin_va_list_abi (SYSV_ABI); - if (TREE_CODE (t) != RECORD_TYPE) - t = build_variant_type_copy (t); - sysv_va_list_type_node = t; - } - else - { - t = ret; - if (TREE_CODE (t) != RECORD_TYPE) - t = build_variant_type_copy (t); - sysv_va_list_type_node = t; - } - if (ix86_abi != MS_ABI) - { - t = ix86_build_builtin_va_list_abi (MS_ABI); - if (TREE_CODE (t) != RECORD_TYPE) - t = build_variant_type_copy (t); - ms_va_list_type_node = t; - } - else - { - t = ret; - if (TREE_CODE (t) != RECORD_TYPE) - t = build_variant_type_copy (t); - ms_va_list_type_node = t; - } - } - - return ret; -} - -/* Worker function for TARGET_SETUP_INCOMING_VARARGS. */ - -static void -setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum) -{ - rtx save_area, mem; - rtx label; - rtx label_ref; - rtx tmp_reg; - rtx nsse_reg; - alias_set_type set; - int i; - int regparm = ix86_regparm; - - if (cum->call_abi != ix86_abi) - regparm = ix86_abi != SYSV_ABI ? X86_64_REGPARM_MAX : X64_REGPARM_MAX; - - /* GPR size of varargs save area. */ - if (cfun->va_list_gpr_size) - ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD; - else - ix86_varargs_gpr_size = 0; - - /* FPR size of varargs save area. We don't need it if we don't pass - anything in SSE registers. */ - if (cum->sse_nregs && cfun->va_list_fpr_size) - ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16; - else - ix86_varargs_fpr_size = 0; - - if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size) - return; - - save_area = frame_pointer_rtx; - set = get_varargs_alias_set (); - - for (i = cum->regno; - i < regparm - && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD; - i++) - { - mem = gen_rtx_MEM (Pmode, - plus_constant (save_area, i * UNITS_PER_WORD)); - MEM_NOTRAP_P (mem) = 1; - set_mem_alias_set (mem, set); - emit_move_insn (mem, gen_rtx_REG (Pmode, - x86_64_int_parameter_registers[i])); - } - - if (ix86_varargs_fpr_size) - { - /* Now emit code to save SSE registers. The AX parameter contains number - of SSE parameter registers used to call this function. We use - sse_prologue_save insn template that produces computed jump across - SSE saves. We need some preparation work to get this working. */ - - label = gen_label_rtx (); - label_ref = gen_rtx_LABEL_REF (Pmode, label); - - /* Compute address to jump to : - label - eax*4 + nnamed_sse_arguments*4 Or - label - eax*5 + nnamed_sse_arguments*5 for AVX. */ - tmp_reg = gen_reg_rtx (Pmode); - nsse_reg = gen_reg_rtx (Pmode); - emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, AX_REG))); - emit_insn (gen_rtx_SET (VOIDmode, tmp_reg, - gen_rtx_MULT (Pmode, nsse_reg, - GEN_INT (4)))); - - /* vmovaps is one byte longer than movaps. */ - if (TARGET_AVX) - emit_insn (gen_rtx_SET (VOIDmode, tmp_reg, - gen_rtx_PLUS (Pmode, tmp_reg, - nsse_reg))); - - if (cum->sse_regno) - emit_move_insn - (nsse_reg, - gen_rtx_CONST (DImode, - gen_rtx_PLUS (DImode, - label_ref, - GEN_INT (cum->sse_regno - * (TARGET_AVX ? 5 : 4))))); - else - emit_move_insn (nsse_reg, label_ref); - emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg)); - - /* Compute address of memory block we save into. We always use pointer - pointing 127 bytes after first byte to store - this is needed to keep - instruction size limited by 4 bytes (5 bytes for AVX) with one - byte displacement. */ - tmp_reg = gen_reg_rtx (Pmode); - emit_insn (gen_rtx_SET (VOIDmode, tmp_reg, - plus_constant (save_area, - ix86_varargs_gpr_size + 127))); - mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127)); - MEM_NOTRAP_P (mem) = 1; - set_mem_alias_set (mem, set); - set_mem_align (mem, BITS_PER_WORD); - - /* And finally do the dirty job! */ - emit_insn (gen_sse_prologue_save (mem, nsse_reg, - GEN_INT (cum->sse_regno), label)); - } -} - -static void -setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum) -{ - alias_set_type set = get_varargs_alias_set (); - int i; - - for (i = cum->regno; i < X64_REGPARM_MAX; i++) - { - rtx reg, mem; - - mem = gen_rtx_MEM (Pmode, - plus_constant (virtual_incoming_args_rtx, - i * UNITS_PER_WORD)); - MEM_NOTRAP_P (mem) = 1; - set_mem_alias_set (mem, set); - - reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]); - emit_move_insn (mem, reg); - } -} - -static void -ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode, - tree type, int *pretend_size ATTRIBUTE_UNUSED, - int no_rtl) -{ - CUMULATIVE_ARGS next_cum; - tree fntype; - - /* This argument doesn't appear to be used anymore. Which is good, - because the old code here didn't suppress rtl generation. */ - gcc_assert (!no_rtl); - - if (!TARGET_64BIT) - return; - - fntype = TREE_TYPE (current_function_decl); - - /* For varargs, we do not want to skip the dummy va_dcl argument. - For stdargs, we do want to skip the last named argument. */ - next_cum = *cum; - if (stdarg_p (fntype)) - function_arg_advance (&next_cum, mode, type, 1); - - if (cum->call_abi == MS_ABI) - setup_incoming_varargs_ms_64 (&next_cum); - else - setup_incoming_varargs_64 (&next_cum); -} - -/* Checks if TYPE is of kind va_list char *. */ - -static bool -is_va_list_char_pointer (tree type) -{ - tree canonic; - - /* For 32-bit it is always true. */ - if (!TARGET_64BIT) - return true; - canonic = ix86_canonical_va_list_type (type); - return (canonic == ms_va_list_type_node - || (ix86_abi == MS_ABI && canonic == va_list_type_node)); -} - -/* Implement va_start. */ - -static void -ix86_va_start (tree valist, rtx nextarg) -{ - HOST_WIDE_INT words, n_gpr, n_fpr; - tree f_gpr, f_fpr, f_ovf, f_sav; - tree gpr, fpr, ovf, sav, t; - tree type; - - /* Only 64bit target needs something special. */ - if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist))) - { - std_expand_builtin_va_start (valist, nextarg); - return; - } - - f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node)); - f_fpr = TREE_CHAIN (f_gpr); - f_ovf = TREE_CHAIN (f_fpr); - f_sav = TREE_CHAIN (f_ovf); - - valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist); - gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE); - fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE); - ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE); - sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE); - - /* Count number of gp and fp argument registers used. */ - words = crtl->args.info.words; - n_gpr = crtl->args.info.regno; - n_fpr = crtl->args.info.sse_regno; - - if (cfun->va_list_gpr_size) - { - type = TREE_TYPE (gpr); - t = build2 (MODIFY_EXPR, type, - gpr, build_int_cst (type, n_gpr * 8)); - TREE_SIDE_EFFECTS (t) = 1; - expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); - } - - if (TARGET_SSE && cfun->va_list_fpr_size) - { - type = TREE_TYPE (fpr); - t = build2 (MODIFY_EXPR, type, fpr, - build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX)); - TREE_SIDE_EFFECTS (t) = 1; - expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); - } - - /* Find the overflow area. */ - type = TREE_TYPE (ovf); - t = make_tree (type, crtl->args.internal_arg_pointer); - if (words != 0) - t = build2 (POINTER_PLUS_EXPR, type, t, - size_int (words * UNITS_PER_WORD)); - t = build2 (MODIFY_EXPR, type, ovf, t); - TREE_SIDE_EFFECTS (t) = 1; - expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); - - if (ix86_varargs_gpr_size || ix86_varargs_fpr_size) - { - /* Find the register save area. - Prologue of the function save it right above stack frame. */ - type = TREE_TYPE (sav); - t = make_tree (type, frame_pointer_rtx); - if (!ix86_varargs_gpr_size) - t = build2 (POINTER_PLUS_EXPR, type, t, - size_int (-8 * X86_64_REGPARM_MAX)); - t = build2 (MODIFY_EXPR, type, sav, t); - TREE_SIDE_EFFECTS (t) = 1; - expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); - } -} - -/* Implement va_arg. */ - -static tree -ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p, - gimple_seq *post_p) -{ - static const int intreg[6] = { 0, 1, 2, 3, 4, 5 }; - tree f_gpr, f_fpr, f_ovf, f_sav; - tree gpr, fpr, ovf, sav, t; - int size, rsize; - tree lab_false, lab_over = NULL_TREE; - tree addr, t2; - rtx container; - int indirect_p = 0; - tree ptrtype; - enum machine_mode nat_mode; - int arg_boundary; - - /* Only 64bit target needs something special. */ - if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist))) - return std_gimplify_va_arg_expr (valist, type, pre_p, post_p); - - f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node)); - f_fpr = TREE_CHAIN (f_gpr); - f_ovf = TREE_CHAIN (f_fpr); - f_sav = TREE_CHAIN (f_ovf); - - gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), - build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE); - valist = build_va_arg_indirect_ref (valist); - fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE); - ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE); - sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE); - - indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false); - if (indirect_p) - type = build_pointer_type (type); - size = int_size_in_bytes (type); - rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD; - - nat_mode = type_natural_mode (type, NULL); - switch (nat_mode) - { - case V8SFmode: - case V8SImode: - case V32QImode: - case V16HImode: - case V4DFmode: - case V4DImode: - /* Unnamed 256bit vector mode parameters are passed on stack. */ - if (ix86_cfun_abi () == SYSV_ABI) - { - container = NULL; - break; - } - - default: - container = construct_container (nat_mode, TYPE_MODE (type), - type, 0, X86_64_REGPARM_MAX, - X86_64_SSE_REGPARM_MAX, intreg, - 0); - break; - } - - /* Pull the value out of the saved registers. */ - - addr = create_tmp_var (ptr_type_node, "addr"); - DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set (); - - if (container) - { - int needed_intregs, needed_sseregs; - bool need_temp; - tree int_addr, sse_addr; - - lab_false = create_artificial_label (); - lab_over = create_artificial_label (); - - examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs); - - need_temp = (!REG_P (container) - && ((needed_intregs && TYPE_ALIGN (type) > 64) - || TYPE_ALIGN (type) > 128)); - - /* In case we are passing structure, verify that it is consecutive block - on the register save area. If not we need to do moves. */ - if (!need_temp && !REG_P (container)) - { - /* Verify that all registers are strictly consecutive */ - if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0)))) - { - int i; - - for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++) - { - rtx slot = XVECEXP (container, 0, i); - if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i - || INTVAL (XEXP (slot, 1)) != i * 16) - need_temp = 1; - } - } - else - { - int i; - - for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++) - { - rtx slot = XVECEXP (container, 0, i); - if (REGNO (XEXP (slot, 0)) != (unsigned int) i - || INTVAL (XEXP (slot, 1)) != i * 8) - need_temp = 1; - } - } - } - if (!need_temp) - { - int_addr = addr; - sse_addr = addr; - } - else - { - int_addr = create_tmp_var (ptr_type_node, "int_addr"); - DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set (); - sse_addr = create_tmp_var (ptr_type_node, "sse_addr"); - DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set (); - } - - /* First ensure that we fit completely in registers. */ - if (needed_intregs) - { - t = build_int_cst (TREE_TYPE (gpr), - (X86_64_REGPARM_MAX - needed_intregs + 1) * 8); - t = build2 (GE_EXPR, boolean_type_node, gpr, t); - t2 = build1 (GOTO_EXPR, void_type_node, lab_false); - t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE); - gimplify_and_add (t, pre_p); - } - if (needed_sseregs) - { - t = build_int_cst (TREE_TYPE (fpr), - (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16 - + X86_64_REGPARM_MAX * 8); - t = build2 (GE_EXPR, boolean_type_node, fpr, t); - t2 = build1 (GOTO_EXPR, void_type_node, lab_false); - t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE); - gimplify_and_add (t, pre_p); - } - - /* Compute index to start of area used for integer regs. */ - if (needed_intregs) - { - /* int_addr = gpr + sav; */ - t = fold_convert (sizetype, gpr); - t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t); - gimplify_assign (int_addr, t, pre_p); - } - if (needed_sseregs) - { - /* sse_addr = fpr + sav; */ - t = fold_convert (sizetype, fpr); - t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t); - gimplify_assign (sse_addr, t, pre_p); - } - if (need_temp) - { - int i, prev_size = 0; - tree temp = create_tmp_var (type, "va_arg_tmp"); - - /* addr = &temp; */ - t = build1 (ADDR_EXPR, build_pointer_type (type), temp); - gimplify_assign (addr, t, pre_p); - - for (i = 0; i < XVECLEN (container, 0); i++) - { - rtx slot = XVECEXP (container, 0, i); - rtx reg = XEXP (slot, 0); - enum machine_mode mode = GET_MODE (reg); - tree piece_type; - tree addr_type; - tree daddr_type; - tree src_addr, src; - int src_offset; - tree dest_addr, dest; - int cur_size = GET_MODE_SIZE (mode); - - if (prev_size + cur_size > size) - { - cur_size = size - prev_size; - mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1); - if (mode == BLKmode) - mode = QImode; - } - piece_type = lang_hooks.types.type_for_mode (mode, 1); - if (mode == GET_MODE (reg)) - addr_type = build_pointer_type (piece_type); - else - addr_type = build_pointer_type_for_mode (piece_type, ptr_mode, - true); - daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode, - true); - - if (SSE_REGNO_P (REGNO (reg))) - { - src_addr = sse_addr; - src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16; - } - else - { - src_addr = int_addr; - src_offset = REGNO (reg) * 8; - } - src_addr = fold_convert (addr_type, src_addr); - src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr, - size_int (src_offset)); - - dest_addr = fold_convert (daddr_type, addr); - dest_addr = fold_build2 (POINTER_PLUS_EXPR, daddr_type, dest_addr, - size_int (INTVAL (XEXP (slot, 1)))); - if (cur_size == GET_MODE_SIZE (mode)) - { - src = build_va_arg_indirect_ref (src_addr); - dest = build_va_arg_indirect_ref (dest_addr); - - gimplify_assign (dest, src, pre_p); - } - else - { - tree copy - = build_call_expr (implicit_built_in_decls[BUILT_IN_MEMCPY], - 3, dest_addr, src_addr, - size_int (cur_size)); - gimplify_and_add (copy, pre_p); - } - prev_size += cur_size; - } - } - - if (needed_intregs) - { - t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr, - build_int_cst (TREE_TYPE (gpr), needed_intregs * 8)); - gimplify_assign (gpr, t, pre_p); - } - - if (needed_sseregs) - { - t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr, - build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16)); - gimplify_assign (fpr, t, pre_p); - } - - gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over)); - - gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false)); - } - - /* ... otherwise out of the overflow area. */ - - /* When we align parameter on stack for caller, if the parameter - alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be - aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee - here with caller. */ - arg_boundary = FUNCTION_ARG_BOUNDARY (VOIDmode, type); - if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT) - arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT; - - /* Care for on-stack alignment if needed. */ - if (arg_boundary <= 64 - || integer_zerop (TYPE_SIZE (type))) - t = ovf; - else - { - HOST_WIDE_INT align = arg_boundary / 8; - t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf, - size_int (align - 1)); - t = fold_convert (sizetype, t); - t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, - size_int (-align)); - t = fold_convert (TREE_TYPE (ovf), t); - } - gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue); - gimplify_assign (addr, t, pre_p); - - t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t, - size_int (rsize * UNITS_PER_WORD)); - gimplify_assign (unshare_expr (ovf), t, pre_p); - - if (container) - gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over)); - - ptrtype = build_pointer_type (type); - addr = fold_convert (ptrtype, addr); - - if (indirect_p) - addr = build_va_arg_indirect_ref (addr); - return build_va_arg_indirect_ref (addr); -} - -/* Return nonzero if OPNUM's MEM should be matched - in movabs* patterns. */ - -int -ix86_check_movabs (rtx insn, int opnum) -{ - rtx set, mem; - - set = PATTERN (insn); - if (GET_CODE (set) == PARALLEL) - set = XVECEXP (set, 0, 0); - gcc_assert (GET_CODE (set) == SET); - mem = XEXP (set, opnum); - while (GET_CODE (mem) == SUBREG) - mem = SUBREG_REG (mem); - gcc_assert (MEM_P (mem)); - return (volatile_ok || !MEM_VOLATILE_P (mem)); -} - -/* Initialize the table of extra 80387 mathematical constants. */ - -static void -init_ext_80387_constants (void) -{ - static const char * cst[5] = - { - "0.3010299956639811952256464283594894482", /* 0: fldlg2 */ - "0.6931471805599453094286904741849753009", /* 1: fldln2 */ - "1.4426950408889634073876517827983434472", /* 2: fldl2e */ - "3.3219280948873623478083405569094566090", /* 3: fldl2t */ - "3.1415926535897932385128089594061862044", /* 4: fldpi */ - }; - int i; - - for (i = 0; i < 5; i++) - { - real_from_string (&ext_80387_constants_table[i], cst[i]); - /* Ensure each constant is rounded to XFmode precision. */ - real_convert (&ext_80387_constants_table[i], - XFmode, &ext_80387_constants_table[i]); - } - - ext_80387_constants_init = 1; -} - -/* Return true if the constant is something that can be loaded with - a special instruction. */ - -int -standard_80387_constant_p (rtx x) -{ - enum machine_mode mode = GET_MODE (x); - - REAL_VALUE_TYPE r; - - if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE))) - return -1; - - if (x == CONST0_RTX (mode)) - return 1; - if (x == CONST1_RTX (mode)) - return 2; - - REAL_VALUE_FROM_CONST_DOUBLE (r, x); - - /* For XFmode constants, try to find a special 80387 instruction when - optimizing for size or on those CPUs that benefit from them. */ - if (mode == XFmode - && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS)) - { - int i; - - if (! ext_80387_constants_init) - init_ext_80387_constants (); - - for (i = 0; i < 5; i++) - if (real_identical (&r, &ext_80387_constants_table[i])) - return i + 3; - } - - /* Load of the constant -0.0 or -1.0 will be split as - fldz;fchs or fld1;fchs sequence. */ - if (real_isnegzero (&r)) - return 8; - if (real_identical (&r, &dconstm1)) - return 9; - - return 0; -} - -/* Return the opcode of the special instruction to be used to load - the constant X. */ - -const char * -standard_80387_constant_opcode (rtx x) -{ - switch (standard_80387_constant_p (x)) - { - case 1: - return "fldz"; - case 2: - return "fld1"; - case 3: - return "fldlg2"; - case 4: - return "fldln2"; - case 5: - return "fldl2e"; - case 6: - return "fldl2t"; - case 7: - return "fldpi"; - case 8: - case 9: - return "#"; - default: - gcc_unreachable (); - } -} - -/* Return the CONST_DOUBLE representing the 80387 constant that is - loaded by the specified special instruction. The argument IDX - matches the return value from standard_80387_constant_p. */ - -rtx -standard_80387_constant_rtx (int idx) -{ - int i; - - if (! ext_80387_constants_init) - init_ext_80387_constants (); - - switch (idx) - { - case 3: - case 4: - case 5: - case 6: - case 7: - i = idx - 3; - break; - - default: - gcc_unreachable (); - } - - return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i], - XFmode); -} - -/* Return 1 if X is all 0s and 2 if x is all 1s - in supported SSE vector mode. */ - -int -standard_sse_constant_p (rtx x) -{ - enum machine_mode mode = GET_MODE (x); - - if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x))) - return 1; - if (vector_all_ones_operand (x, mode)) - switch (mode) - { - case V16QImode: - case V8HImode: - case V4SImode: - case V2DImode: - if (TARGET_SSE2) - return 2; - default: - break; - } - - return 0; -} - -/* Return the opcode of the special instruction to be used to load - the constant X. */ - -const char * -standard_sse_constant_opcode (rtx insn, rtx x) -{ - switch (standard_sse_constant_p (x)) - { - case 1: - switch (get_attr_mode (insn)) - { - case MODE_V4SF: - return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0"; - case MODE_V2DF: - return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0"; - case MODE_TI: - return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0"; - case MODE_V8SF: - return "vxorps\t%x0, %x0, %x0"; - case MODE_V4DF: - return "vxorpd\t%x0, %x0, %x0"; - case MODE_OI: - return "vpxor\t%x0, %x0, %x0"; - default: - break; - } - case 2: - return TARGET_AVX ? "vpcmpeqd\t%0, %0, %0" : "pcmpeqd\t%0, %0"; - default: - break; - } - gcc_unreachable (); -} - -/* Returns 1 if OP contains a symbol reference */ - -int -symbolic_reference_mentioned_p (rtx op) -{ - const char *fmt; - int i; - - if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF) - return 1; - - fmt = GET_RTX_FORMAT (GET_CODE (op)); - for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--) - { - if (fmt[i] == 'E') - { - int j; - - for (j = XVECLEN (op, i) - 1; j >= 0; j--) - if (symbolic_reference_mentioned_p (XVECEXP (op, i, j))) - return 1; - } - - else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i))) - return 1; - } - - return 0; -} - -/* Return 1 if it is appropriate to emit `ret' instructions in the - body of a function. Do this only if the epilogue is simple, needing a - couple of insns. Prior to reloading, we can't tell how many registers - must be saved, so return 0 then. Return 0 if there is no frame - marker to de-allocate. */ - -int -ix86_can_use_return_insn_p (void) -{ - struct ix86_frame frame; - - if (! reload_completed || frame_pointer_needed) - return 0; - - /* Don't allow more than 32 pop, since that's all we can do - with one instruction. */ - if (crtl->args.pops_args - && crtl->args.size >= 32768) - return 0; - - ix86_compute_frame_layout (&frame); - return frame.to_allocate == 0 && frame.padding0 == 0 - && (frame.nregs + frame.nsseregs) == 0; -} - -/* Value should be nonzero if functions must have frame pointers. - Zero means the frame pointer need not be set up (and parms may - be accessed via the stack pointer) in functions that seem suitable. */ - -int -ix86_frame_pointer_required (void) -{ - /* If we accessed previous frames, then the generated code expects - to be able to access the saved ebp value in our frame. */ - if (cfun->machine->accesses_prev_frame) - return 1; - - /* Several x86 os'es need a frame pointer for other reasons, - usually pertaining to setjmp. */ - if (SUBTARGET_FRAME_POINTER_REQUIRED) - return 1; - - /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off - the frame pointer by default. Turn it back on now if we've not - got a leaf function. */ - if (TARGET_OMIT_LEAF_FRAME_POINTER - && (!current_function_is_leaf - || ix86_current_function_calls_tls_descriptor)) - return 1; - - if (crtl->profile) - return 1; - - return 0; -} - -/* Record that the current function accesses previous call frames. */ - -void -ix86_setup_frame_addresses (void) -{ - cfun->machine->accesses_prev_frame = 1; -} - -#ifndef USE_HIDDEN_LINKONCE -# if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO -# define USE_HIDDEN_LINKONCE 1 -# else -# define USE_HIDDEN_LINKONCE 0 -# endif -#endif - -static int pic_labels_used; - -/* Fills in the label name that should be used for a pc thunk for - the given register. */ - -static void -get_pc_thunk_name (char name[32], unsigned int regno) -{ - gcc_assert (!TARGET_64BIT); - - if (USE_HIDDEN_LINKONCE) - sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]); - else - ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno); -} - - -/* This function generates code for -fpic that loads %ebx with - the return address of the caller and then returns. */ - -void -ix86_file_end (void) -{ - rtx xops[2]; - int regno; - - for (regno = 0; regno < 8; ++regno) - { - char name[32]; - - if (! ((pic_labels_used >> regno) & 1)) - continue; - - get_pc_thunk_name (name, regno); - -#if TARGET_MACHO - if (TARGET_MACHO) - { - switch_to_section (darwin_sections[text_coal_section]); - fputs ("\t.weak_definition\t", asm_out_file); - assemble_name (asm_out_file, name); - fputs ("\n\t.private_extern\t", asm_out_file); - assemble_name (asm_out_file, name); - fputs ("\n", asm_out_file); - ASM_OUTPUT_LABEL (asm_out_file, name); - } - else -#endif - if (USE_HIDDEN_LINKONCE) - { - tree decl; - - decl = build_decl (FUNCTION_DECL, get_identifier (name), - error_mark_node); - TREE_PUBLIC (decl) = 1; - TREE_STATIC (decl) = 1; - DECL_ONE_ONLY (decl) = 1; - - (*targetm.asm_out.unique_section) (decl, 0); - switch_to_section (get_named_section (decl, NULL, 0)); - - (*targetm.asm_out.globalize_label) (asm_out_file, name); - fputs ("\t.hidden\t", asm_out_file); - assemble_name (asm_out_file, name); - fputc ('\n', asm_out_file); - ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl); - } - else - { - switch_to_section (text_section); - ASM_OUTPUT_LABEL (asm_out_file, name); - } - - xops[0] = gen_rtx_REG (Pmode, regno); - xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx); - /* Pad stack IP move with 4 instructions. 2 NOPs count as 1 - instruction. */ - if (TARGET_PAD_SHORT_FUNCTION) - output_asm_insn ("nop; nop; nop; nop; nop; nop; nop; nop", - xops); - output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops); - output_asm_insn ("ret", xops); - } - - if (NEED_INDICATE_EXEC_STACK) - file_end_indicate_exec_stack (); -} - -/* Emit code for the SET_GOT patterns. */ - -const char * -output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED) -{ - rtx xops[3]; - - xops[0] = dest; - - if (TARGET_VXWORKS_RTP && flag_pic) - { - /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */ - xops[2] = gen_rtx_MEM (Pmode, - gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE)); - output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops); - - /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register. - Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as - an unadorned address. */ - xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX); - SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL; - output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops); - return ""; - } - - xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME); - - if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic) - { - xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ()); - - if (!flag_pic) - output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops); - else - output_asm_insn ("call\t%a2", xops); - -#if TARGET_MACHO - /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This - is what will be referenced by the Mach-O PIC subsystem. */ - if (!label) - ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME); -#endif - - (*targetm.asm_out.internal_label) (asm_out_file, "L", - CODE_LABEL_NUMBER (XEXP (xops[2], 0))); - - if (flag_pic) - output_asm_insn ("pop%z0\t%0", xops); - } - else - { - char name[32]; - get_pc_thunk_name (name, REGNO (dest)); - pic_labels_used |= 1 << REGNO (dest); - - xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name)); - xops[2] = gen_rtx_MEM (QImode, xops[2]); - output_asm_insn ("call\t%X2", xops); - /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This - is what will be referenced by the Mach-O PIC subsystem. */ -#if TARGET_MACHO - if (!label) - ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME); - else - targetm.asm_out.internal_label (asm_out_file, "L", - CODE_LABEL_NUMBER (label)); -#endif - } - - if (TARGET_MACHO) - return ""; - - if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION) - output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops); - else - output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops); - - return ""; -} - -/* Generate an "push" pattern for input ARG. */ - -static rtx -gen_push (rtx arg) -{ - if (ix86_cfa_state->reg == stack_pointer_rtx) - ix86_cfa_state->offset += UNITS_PER_WORD; - - return gen_rtx_SET (VOIDmode, - gen_rtx_MEM (Pmode, - gen_rtx_PRE_DEC (Pmode, - stack_pointer_rtx)), - arg); -} - -/* Return >= 0 if there is an unused call-clobbered register available - for the entire function. */ - -static unsigned int -ix86_select_alt_pic_regnum (void) -{ - if (current_function_is_leaf && !crtl->profile - && !ix86_current_function_calls_tls_descriptor) - { - int i, drap; - /* Can't use the same register for both PIC and DRAP. */ - if (crtl->drap_reg) - drap = REGNO (crtl->drap_reg); - else - drap = -1; - for (i = 2; i >= 0; --i) - if (i != drap && !df_regs_ever_live_p (i)) - return i; - } - - return INVALID_REGNUM; -} - -/* Return 1 if we need to save REGNO. */ -static int -ix86_save_reg (unsigned int regno, int maybe_eh_return) -{ - if (pic_offset_table_rtx - && regno == REAL_PIC_OFFSET_TABLE_REGNUM - && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM) - || crtl->profile - || crtl->calls_eh_return - || crtl->uses_const_pool)) - { - if (ix86_select_alt_pic_regnum () != INVALID_REGNUM) - return 0; - return 1; - } - - if (crtl->calls_eh_return && maybe_eh_return) - { - unsigned i; - for (i = 0; ; i++) - { - unsigned test = EH_RETURN_DATA_REGNO (i); - if (test == INVALID_REGNUM) - break; - if (test == regno) - return 1; - } - } - - if (crtl->drap_reg && regno == REGNO (crtl->drap_reg)) - return 1; - - return (df_regs_ever_live_p (regno) - && !call_used_regs[regno] - && !fixed_regs[regno] - && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed)); -} - -/* Return number of saved general prupose registers. */ - -static int -ix86_nsaved_regs (void) -{ - int nregs = 0; - int regno; - - for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) - if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true)) - nregs ++; - return nregs; -} - -/* Return number of saved SSE registrers. */ - -static int -ix86_nsaved_sseregs (void) -{ - int nregs = 0; - int regno; - - if (ix86_cfun_abi () != MS_ABI) - return 0; - for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) - if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true)) - nregs ++; - return nregs; -} - -/* Given FROM and TO register numbers, say whether this elimination is - allowed. If stack alignment is needed, we can only replace argument - pointer with hard frame pointer, or replace frame pointer with stack - pointer. Otherwise, frame pointer elimination is automatically - handled and all other eliminations are valid. */ - -int -ix86_can_eliminate (int from, int to) -{ - if (stack_realign_fp) - return ((from == ARG_POINTER_REGNUM - && to == HARD_FRAME_POINTER_REGNUM) - || (from == FRAME_POINTER_REGNUM - && to == STACK_POINTER_REGNUM)); - else - return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : 1; -} - -/* Return the offset between two registers, one to be eliminated, and the other - its replacement, at the start of a routine. */ - -HOST_WIDE_INT -ix86_initial_elimination_offset (int from, int to) -{ - struct ix86_frame frame; - ix86_compute_frame_layout (&frame); - - if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM) - return frame.hard_frame_pointer_offset; - else if (from == FRAME_POINTER_REGNUM - && to == HARD_FRAME_POINTER_REGNUM) - return frame.hard_frame_pointer_offset - frame.frame_pointer_offset; - else - { - gcc_assert (to == STACK_POINTER_REGNUM); - - if (from == ARG_POINTER_REGNUM) - return frame.stack_pointer_offset; - - gcc_assert (from == FRAME_POINTER_REGNUM); - return frame.stack_pointer_offset - frame.frame_pointer_offset; - } -} - -/* In a dynamically-aligned function, we can't know the offset from - stack pointer to frame pointer, so we must ensure that setjmp - eliminates fp against the hard fp (%ebp) rather than trying to - index from %esp up to the top of the frame across a gap that is - of unknown (at compile-time) size. */ -static rtx -ix86_builtin_setjmp_frame_value (void) -{ - return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx; -} - -/* Fill structure ix86_frame about frame of currently computed function. */ - -static void -ix86_compute_frame_layout (struct ix86_frame *frame) -{ - HOST_WIDE_INT total_size; - unsigned int stack_alignment_needed; - HOST_WIDE_INT offset; - unsigned int preferred_alignment; - HOST_WIDE_INT size = get_frame_size (); - - frame->nregs = ix86_nsaved_regs (); - frame->nsseregs = ix86_nsaved_sseregs (); - total_size = size; - - stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT; - preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT; - - /* MS ABI seem to require stack alignment to be always 16 except for function - prologues. */ - if (ix86_cfun_abi () == MS_ABI && preferred_alignment < 16) - { - preferred_alignment = 16; - stack_alignment_needed = 16; - crtl->preferred_stack_boundary = 128; - crtl->stack_alignment_needed = 128; - } - - gcc_assert (!size || stack_alignment_needed); - gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT); - gcc_assert (preferred_alignment <= stack_alignment_needed); - - /* During reload iteration the amount of registers saved can change. - Recompute the value as needed. Do not recompute when amount of registers - didn't change as reload does multiple calls to the function and does not - expect the decision to change within single iteration. */ - if (!optimize_function_for_size_p (cfun) - && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs) - { - int count = frame->nregs; - - cfun->machine->use_fast_prologue_epilogue_nregs = count; - /* The fast prologue uses move instead of push to save registers. This - is significantly longer, but also executes faster as modern hardware - can execute the moves in parallel, but can't do that for push/pop. - - Be careful about choosing what prologue to emit: When function takes - many instructions to execute we may use slow version as well as in - case function is known to be outside hot spot (this is known with - feedback only). Weight the size of function by number of registers - to save as it is cheap to use one or two push instructions but very - slow to use many of them. */ - if (count) - count = (count - 1) * FAST_PROLOGUE_INSN_COUNT; - if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL - || (flag_branch_probabilities - && cfun->function_frequency < FUNCTION_FREQUENCY_HOT)) - cfun->machine->use_fast_prologue_epilogue = false; - else - cfun->machine->use_fast_prologue_epilogue - = !expensive_function_p (count); - } - if (TARGET_PROLOGUE_USING_MOVE - && cfun->machine->use_fast_prologue_epilogue) - frame->save_regs_using_mov = true; - else - frame->save_regs_using_mov = false; - - - /* Skip return address and saved base pointer. */ - offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD; - - frame->hard_frame_pointer_offset = offset; - - /* Set offset to aligned because the realigned frame starts from - here. */ - if (stack_realign_fp) - offset = (offset + stack_alignment_needed -1) & -stack_alignment_needed; - - /* Register save area */ - offset += frame->nregs * UNITS_PER_WORD; - - /* Align SSE reg save area. */ - if (frame->nsseregs) - frame->padding0 = ((offset + 16 - 1) & -16) - offset; - else - frame->padding0 = 0; - - /* SSE register save area. */ - offset += frame->padding0 + frame->nsseregs * 16; - - /* Va-arg area */ - frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size; - offset += frame->va_arg_size; - - /* Align start of frame for local function. */ - frame->padding1 = ((offset + stack_alignment_needed - 1) - & -stack_alignment_needed) - offset; - - offset += frame->padding1; - - /* Frame pointer points here. */ - frame->frame_pointer_offset = offset; - - offset += size; - - /* Add outgoing arguments area. Can be skipped if we eliminated - all the function calls as dead code. - Skipping is however impossible when function calls alloca. Alloca - expander assumes that last crtl->outgoing_args_size - of stack frame are unused. */ - if (ACCUMULATE_OUTGOING_ARGS - && (!current_function_is_leaf || cfun->calls_alloca - || ix86_current_function_calls_tls_descriptor)) - { - offset += crtl->outgoing_args_size; - frame->outgoing_arguments_size = crtl->outgoing_args_size; - } - else - frame->outgoing_arguments_size = 0; - - /* Align stack boundary. Only needed if we're calling another function - or using alloca. */ - if (!current_function_is_leaf || cfun->calls_alloca - || ix86_current_function_calls_tls_descriptor) - frame->padding2 = ((offset + preferred_alignment - 1) - & -preferred_alignment) - offset; - else - frame->padding2 = 0; - - offset += frame->padding2; - - /* We've reached end of stack frame. */ - frame->stack_pointer_offset = offset; - - /* Size prologue needs to allocate. */ - frame->to_allocate = - (size + frame->padding1 + frame->padding2 - + frame->outgoing_arguments_size + frame->va_arg_size); - - if ((!frame->to_allocate && frame->nregs <= 1) - || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000)) - frame->save_regs_using_mov = false; - - if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE - && current_function_sp_is_unchanging - && current_function_is_leaf - && !ix86_current_function_calls_tls_descriptor) - { - frame->red_zone_size = frame->to_allocate; - if (frame->save_regs_using_mov) - frame->red_zone_size += frame->nregs * UNITS_PER_WORD; - if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE) - frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE; - } - else - frame->red_zone_size = 0; - frame->to_allocate -= frame->red_zone_size; - frame->stack_pointer_offset -= frame->red_zone_size; -#if 0 - fprintf (stderr, "\n"); - fprintf (stderr, "size: %ld\n", (long)size); - fprintf (stderr, "nregs: %ld\n", (long)frame->nregs); - fprintf (stderr, "nsseregs: %ld\n", (long)frame->nsseregs); - fprintf (stderr, "padding0: %ld\n", (long)frame->padding0); - fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed); - fprintf (stderr, "padding1: %ld\n", (long)frame->padding1); - fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size); - fprintf (stderr, "padding2: %ld\n", (long)frame->padding2); - fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate); - fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size); - fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset); - fprintf (stderr, "hard_frame_pointer_offset: %ld\n", - (long)frame->hard_frame_pointer_offset); - fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset); - fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf); - fprintf (stderr, "cfun->calls_alloca: %ld\n", (long)cfun->calls_alloca); - fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor); -#endif -} - -/* Emit code to save registers in the prologue. */ - -static void -ix86_emit_save_regs (void) -{ - unsigned int regno; - rtx insn; - - for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; ) - if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true)) - { - insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno))); - RTX_FRAME_RELATED_P (insn) = 1; - } -} - -/* Emit code to save registers using MOV insns. First register - is restored from POINTER + OFFSET. */ -static void -ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset) -{ - unsigned int regno; - rtx insn; - - for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) - if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true)) - { - insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer), - Pmode, offset), - gen_rtx_REG (Pmode, regno)); - RTX_FRAME_RELATED_P (insn) = 1; - offset += UNITS_PER_WORD; - } -} - -/* Emit code to save registers using MOV insns. First register - is restored from POINTER + OFFSET. */ -static void -ix86_emit_save_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset) -{ - unsigned int regno; - rtx insn; - rtx mem; - - for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) - if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true)) - { - mem = adjust_address (gen_rtx_MEM (TImode, pointer), TImode, offset); - set_mem_align (mem, 128); - insn = emit_move_insn (mem, gen_rtx_REG (TImode, regno)); - RTX_FRAME_RELATED_P (insn) = 1; - offset += 16; - } -} - -static GTY(()) rtx queued_cfa_restores; - -/* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack - manipulation insn. Don't add it if the previously - saved value will be left untouched within stack red-zone till return, - as unwinders can find the same value in the register and - on the stack. */ - -static void -ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT red_offset) -{ - if (TARGET_RED_ZONE - && !TARGET_64BIT_MS_ABI - && red_offset + RED_ZONE_SIZE >= 0 - && crtl->args.pops_args < 65536) - return; - - if (insn) - { - add_reg_note (insn, REG_CFA_RESTORE, reg); - RTX_FRAME_RELATED_P (insn) = 1; - } - else - queued_cfa_restores - = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores); -} - -/* Add queued REG_CFA_RESTORE notes if any to INSN. */ - -static void -ix86_add_queued_cfa_restore_notes (rtx insn) -{ - rtx last; - if (!queued_cfa_restores) - return; - for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1)) - ; - XEXP (last, 1) = REG_NOTES (insn); - REG_NOTES (insn) = queued_cfa_restores; - queued_cfa_restores = NULL_RTX; - RTX_FRAME_RELATED_P (insn) = 1; -} - -/* Expand prologue or epilogue stack adjustment. - The pattern exist to put a dependency on all ebp-based memory accesses. - STYLE should be negative if instructions should be marked as frame related, - zero if %r11 register is live and cannot be freely used and positive - otherwise. */ - -static void -pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, - int style, bool set_cfa) -{ - rtx insn; - - if (! TARGET_64BIT) - insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset)); - else if (x86_64_immediate_operand (offset, DImode)) - insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset)); - else - { - rtx r11; - /* r11 is used by indirect sibcall return as well, set before the - epilogue and used after the epilogue. ATM indirect sibcall - shouldn't be used together with huge frame sizes in one - function because of the frame_size check in sibcall.c. */ - gcc_assert (style); - r11 = gen_rtx_REG (DImode, R11_REG); - insn = emit_insn (gen_rtx_SET (DImode, r11, offset)); - if (style < 0) - RTX_FRAME_RELATED_P (insn) = 1; - insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11, - offset)); - } - - if (style >= 0) - ix86_add_queued_cfa_restore_notes (insn); - - if (set_cfa) - { - rtx r; - - gcc_assert (ix86_cfa_state->reg == src); - ix86_cfa_state->offset += INTVAL (offset); - ix86_cfa_state->reg = dest; - - r = gen_rtx_PLUS (Pmode, src, offset); - r = gen_rtx_SET (VOIDmode, dest, r); - add_reg_note (insn, REG_CFA_ADJUST_CFA, r); - RTX_FRAME_RELATED_P (insn) = 1; - } - else if (style < 0) - RTX_FRAME_RELATED_P (insn) = 1; -} - -/* Find an available register to be used as dynamic realign argument - pointer regsiter. Such a register will be written in prologue and - used in begin of body, so it must not be - 1. parameter passing register. - 2. GOT pointer. - We reuse static-chain register if it is available. Otherwise, we - use DI for i386 and R13 for x86-64. We chose R13 since it has - shorter encoding. - - Return: the regno of chosen register. */ - -static unsigned int -find_drap_reg (void) -{ - tree decl = cfun->decl; - - if (TARGET_64BIT) - { - /* Use R13 for nested function or function need static chain. - Since function with tail call may use any caller-saved - registers in epilogue, DRAP must not use caller-saved - register in such case. */ - if ((decl_function_context (decl) - && !DECL_NO_STATIC_CHAIN (decl)) - || crtl->tail_call_emit) - return R13_REG; - - return R10_REG; - } - else - { - /* Use DI for nested function or function need static chain. - Since function with tail call may use any caller-saved - registers in epilogue, DRAP must not use caller-saved - register in such case. */ - if ((decl_function_context (decl) - && !DECL_NO_STATIC_CHAIN (decl)) - || crtl->tail_call_emit) - return DI_REG; - - /* Reuse static chain register if it isn't used for parameter - passing. */ - if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2 - && !lookup_attribute ("fastcall", - TYPE_ATTRIBUTES (TREE_TYPE (decl)))) - return CX_REG; - else - return DI_REG; - } -} - -/* Return minimum incoming stack alignment. */ - -static unsigned int -ix86_minimum_incoming_stack_boundary (bool sibcall) -{ - unsigned int incoming_stack_boundary; - - /* Prefer the one specified at command line. */ - if (ix86_user_incoming_stack_boundary) - incoming_stack_boundary = ix86_user_incoming_stack_boundary; - /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary - if -mstackrealign is used, it isn't used for sibcall check and - estimated stack alignment is 128bit. */ - else if (!sibcall - && !TARGET_64BIT - && ix86_force_align_arg_pointer - && crtl->stack_alignment_estimated == 128) - incoming_stack_boundary = MIN_STACK_BOUNDARY; - else - incoming_stack_boundary = ix86_default_incoming_stack_boundary; - - /* Incoming stack alignment can be changed on individual functions - via force_align_arg_pointer attribute. We use the smallest - incoming stack boundary. */ - if (incoming_stack_boundary > MIN_STACK_BOUNDARY - && lookup_attribute (ix86_force_align_arg_pointer_string, - TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl)))) - incoming_stack_boundary = MIN_STACK_BOUNDARY; - - /* The incoming stack frame has to be aligned at least at - parm_stack_boundary. */ - if (incoming_stack_boundary < crtl->parm_stack_boundary) - incoming_stack_boundary = crtl->parm_stack_boundary; - - /* Stack at entrance of main is aligned by runtime. We use the - smallest incoming stack boundary. */ - if (incoming_stack_boundary > MAIN_STACK_BOUNDARY - && DECL_NAME (current_function_decl) - && MAIN_NAME_P (DECL_NAME (current_function_decl)) - && DECL_FILE_SCOPE_P (current_function_decl)) - incoming_stack_boundary = MAIN_STACK_BOUNDARY; - - return incoming_stack_boundary; -} - -/* Update incoming stack boundary and estimated stack alignment. */ - -static void -ix86_update_stack_boundary (void) -{ - ix86_incoming_stack_boundary - = ix86_minimum_incoming_stack_boundary (false); - - /* x86_64 vararg needs 16byte stack alignment for register save - area. */ - if (TARGET_64BIT - && cfun->stdarg - && crtl->stack_alignment_estimated < 128) - crtl->stack_alignment_estimated = 128; -} - -/* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is - needed or an rtx for DRAP otherwise. */ - -static rtx -ix86_get_drap_rtx (void) -{ - if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS) - crtl->need_drap = true; - - if (stack_realign_drap) - { - /* Assign DRAP to vDRAP and returns vDRAP */ - unsigned int regno = find_drap_reg (); - rtx drap_vreg; - rtx arg_ptr; - rtx seq, insn; - - arg_ptr = gen_rtx_REG (Pmode, regno); - crtl->drap_reg = arg_ptr; - - start_sequence (); - drap_vreg = copy_to_reg (arg_ptr); - seq = get_insns (); - end_sequence (); - - insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ())); - RTX_FRAME_RELATED_P (insn) = 1; - return drap_vreg; - } - else - return NULL; -} - -/* Handle the TARGET_INTERNAL_ARG_POINTER hook. */ - -static rtx -ix86_internal_arg_pointer (void) -{ - return virtual_incoming_args_rtx; -} - -/* Finalize stack_realign_needed flag, which will guide prologue/epilogue - to be generated in correct form. */ -static void -ix86_finalize_stack_realign_flags (void) -{ - /* Check if stack realign is really needed after reload, and - stores result in cfun */ - unsigned int incoming_stack_boundary - = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary - ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary); - unsigned int stack_realign = (incoming_stack_boundary - < (current_function_is_leaf - ? crtl->max_used_stack_slot_alignment - : crtl->stack_alignment_needed)); - - if (crtl->stack_realign_finalized) - { - /* After stack_realign_needed is finalized, we can't no longer - change it. */ - gcc_assert (crtl->stack_realign_needed == stack_realign); - } - else - { - crtl->stack_realign_needed = stack_realign; - crtl->stack_realign_finalized = true; - } -} - -/* Expand the prologue into a bunch of separate insns. */ - -void -ix86_expand_prologue (void) -{ - rtx insn; - bool pic_reg_used; - struct ix86_frame frame; - HOST_WIDE_INT allocate; - - ix86_finalize_stack_realign_flags (); - - /* DRAP should not coexist with stack_realign_fp */ - gcc_assert (!(crtl->drap_reg && stack_realign_fp)); - - /* Initialize CFA state for before the prologue. */ - ix86_cfa_state->reg = stack_pointer_rtx; - ix86_cfa_state->offset = INCOMING_FRAME_SP_OFFSET; - - ix86_compute_frame_layout (&frame); - - /* Emit prologue code to adjust stack alignment and setup DRAP, in case - of DRAP is needed and stack realignment is really needed after reload */ - if (crtl->drap_reg && crtl->stack_realign_needed) - { - rtx x, y; - int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT; - int param_ptr_offset = (call_used_regs[REGNO (crtl->drap_reg)] - ? 0 : UNITS_PER_WORD); - - gcc_assert (stack_realign_drap); - - /* Grab the argument pointer. */ - x = plus_constant (stack_pointer_rtx, - (UNITS_PER_WORD + param_ptr_offset)); - y = crtl->drap_reg; - - /* Only need to push parameter pointer reg if it is caller - saved reg */ - if (!call_used_regs[REGNO (crtl->drap_reg)]) - { - /* Push arg pointer reg */ - insn = emit_insn (gen_push (y)); - RTX_FRAME_RELATED_P (insn) = 1; - } - - insn = emit_insn (gen_rtx_SET (VOIDmode, y, x)); - RTX_FRAME_RELATED_P (insn) = 1; - ix86_cfa_state->reg = crtl->drap_reg; - - /* Align the stack. */ - insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx, - stack_pointer_rtx, - GEN_INT (-align_bytes))); - RTX_FRAME_RELATED_P (insn) = 1; - - /* Replicate the return address on the stack so that return - address can be reached via (argp - 1) slot. This is needed - to implement macro RETURN_ADDR_RTX and intrinsic function - expand_builtin_return_addr etc. */ - x = crtl->drap_reg; - x = gen_frame_mem (Pmode, - plus_constant (x, -UNITS_PER_WORD)); - insn = emit_insn (gen_push (x)); - RTX_FRAME_RELATED_P (insn) = 1; - } - - /* Note: AT&T enter does NOT have reversed args. Enter is probably - slower on all targets. Also sdb doesn't like it. */ - - if (frame_pointer_needed) - { - insn = emit_insn (gen_push (hard_frame_pointer_rtx)); - RTX_FRAME_RELATED_P (insn) = 1; - - insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx); - RTX_FRAME_RELATED_P (insn) = 1; - - if (ix86_cfa_state->reg == stack_pointer_rtx) - ix86_cfa_state->reg = hard_frame_pointer_rtx; - } - - if (stack_realign_fp) - { - int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT; - gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT); - - /* Align the stack. */ - insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx, - stack_pointer_rtx, - GEN_INT (-align_bytes))); - RTX_FRAME_RELATED_P (insn) = 1; - } - - allocate = frame.to_allocate + frame.nsseregs * 16 + frame.padding0; - - if (!frame.save_regs_using_mov) - ix86_emit_save_regs (); - else - allocate += frame.nregs * UNITS_PER_WORD; - - /* When using red zone we may start register saving before allocating - the stack frame saving one cycle of the prologue. However I will - avoid doing this if I am going to have to probe the stack since - at least on x86_64 the stack probe can turn into a call that clobbers - a red zone location */ - if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && frame.save_regs_using_mov - && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)) - ix86_emit_save_regs_using_mov ((frame_pointer_needed - && !crtl->stack_realign_needed) - ? hard_frame_pointer_rtx - : stack_pointer_rtx, - -frame.nregs * UNITS_PER_WORD); - - if (allocate == 0) - ; - else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT) - pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, - GEN_INT (-allocate), -1, - ix86_cfa_state->reg == stack_pointer_rtx); - else - { - rtx eax = gen_rtx_REG (Pmode, AX_REG); - bool eax_live; - rtx t; - - if (cfun->machine->call_abi == MS_ABI) - eax_live = false; - else - eax_live = ix86_eax_live_at_start_p (); - - if (eax_live) - { - emit_insn (gen_push (eax)); - allocate -= UNITS_PER_WORD; - } - - emit_move_insn (eax, GEN_INT (allocate)); - - if (TARGET_64BIT) - insn = gen_allocate_stack_worker_64 (eax, eax); - else - insn = gen_allocate_stack_worker_32 (eax, eax); - insn = emit_insn (insn); - - if (ix86_cfa_state->reg == stack_pointer_rtx) - { - ix86_cfa_state->offset += allocate; - t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate)); - t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t); - add_reg_note (insn, REG_CFA_ADJUST_CFA, t); - RTX_FRAME_RELATED_P (insn) = 1; - } - - if (eax_live) - { - if (frame_pointer_needed) - t = plus_constant (hard_frame_pointer_rtx, - allocate - - frame.to_allocate - - frame.nregs * UNITS_PER_WORD); - else - t = plus_constant (stack_pointer_rtx, allocate); - emit_move_insn (eax, gen_rtx_MEM (Pmode, t)); - } - } - - if (frame.save_regs_using_mov - && !(!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE - && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT))) - { - if (!frame_pointer_needed - || !(frame.to_allocate + frame.padding0) - || crtl->stack_realign_needed) - ix86_emit_save_regs_using_mov (stack_pointer_rtx, - frame.to_allocate - + frame.nsseregs * 16 + frame.padding0); - else - ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx, - -frame.nregs * UNITS_PER_WORD); - } - if (!frame_pointer_needed - || !(frame.to_allocate + frame.padding0) - || crtl->stack_realign_needed) - ix86_emit_save_sse_regs_using_mov (stack_pointer_rtx, - frame.to_allocate); - else - ix86_emit_save_sse_regs_using_mov (hard_frame_pointer_rtx, - - frame.nregs * UNITS_PER_WORD - - frame.nsseregs * 16 - - frame.padding0); - - pic_reg_used = false; - if (pic_offset_table_rtx - && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM) - || crtl->profile)) - { - unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum (); - - if (alt_pic_reg_used != INVALID_REGNUM) - SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used); - - pic_reg_used = true; - } - - if (pic_reg_used) - { - if (TARGET_64BIT) - { - if (ix86_cmodel == CM_LARGE_PIC) - { - rtx tmp_reg = gen_rtx_REG (DImode, R11_REG); - rtx label = gen_label_rtx (); - emit_label (label); - LABEL_PRESERVE_P (label) = 1; - gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg)); - insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label)); - insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label)); - insn = emit_insn (gen_adddi3 (pic_offset_table_rtx, - pic_offset_table_rtx, tmp_reg)); - } - else - insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx)); - } - else - insn = emit_insn (gen_set_got (pic_offset_table_rtx)); - } - - /* In the pic_reg_used case, make sure that the got load isn't deleted - when mcount needs it. Blockage to avoid call movement across mcount - call is emitted in generic code after the NOTE_INSN_PROLOGUE_END - note. */ - if (crtl->profile && pic_reg_used) - emit_insn (gen_prologue_use (pic_offset_table_rtx)); - - if (crtl->drap_reg && !crtl->stack_realign_needed) - { - /* vDRAP is setup but after reload it turns out stack realign - isn't necessary, here we will emit prologue to setup DRAP - without stack realign adjustment */ - int drap_bp_offset = UNITS_PER_WORD * 2; - rtx x = plus_constant (hard_frame_pointer_rtx, drap_bp_offset); - insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, x)); - } - - /* Prevent instructions from being scheduled into register save push - sequence when access to the redzone area is done through frame pointer. - The offset betweeh the frame pointer and the stack pointer is calculated - relative to the value of the stack pointer at the end of the function - prologue, and moving instructions that access redzone area via frame - pointer inside push sequence violates this assumption. */ - if (frame_pointer_needed && frame.red_zone_size) - emit_insn (gen_memory_blockage ()); - - /* Emit cld instruction if stringops are used in the function. */ - if (TARGET_CLD && ix86_current_function_needs_cld) - emit_insn (gen_cld ()); -} - -/* Emit code to restore REG using a POP insn. */ - -static void -ix86_emit_restore_reg_using_pop (rtx reg, HOST_WIDE_INT red_offset) -{ - rtx insn = emit_insn (ix86_gen_pop1 (reg)); - - if (ix86_cfa_state->reg == crtl->drap_reg - && REGNO (reg) == REGNO (crtl->drap_reg)) - { - /* Previously we'd represented the CFA as an expression - like *(%ebp - 8). We've just popped that value from - the stack, which means we need to reset the CFA to - the drap register. This will remain until we restore - the stack pointer. */ - add_reg_note (insn, REG_CFA_DEF_CFA, reg); - RTX_FRAME_RELATED_P (insn) = 1; - return; - } - - if (ix86_cfa_state->reg == stack_pointer_rtx) - { - ix86_cfa_state->offset -= UNITS_PER_WORD; - add_reg_note (insn, REG_CFA_ADJUST_CFA, - copy_rtx (XVECEXP (PATTERN (insn), 0, 1))); - RTX_FRAME_RELATED_P (insn) = 1; - } - - /* When the frame pointer is the CFA, and we pop it, we are - swapping back to the stack pointer as the CFA. This happens - for stack frames that don't allocate other data, so we assume - the stack pointer is now pointing at the return address, i.e. - the function entry state, which makes the offset be 1 word. */ - else if (ix86_cfa_state->reg == hard_frame_pointer_rtx - && reg == hard_frame_pointer_rtx) - { - ix86_cfa_state->reg = stack_pointer_rtx; - ix86_cfa_state->offset = UNITS_PER_WORD; - - add_reg_note (insn, REG_CFA_DEF_CFA, - gen_rtx_PLUS (Pmode, stack_pointer_rtx, - GEN_INT (UNITS_PER_WORD))); - RTX_FRAME_RELATED_P (insn) = 1; - } - - ix86_add_cfa_restore_note (insn, reg, red_offset); -} - -/* Emit code to restore saved registers using POP insns. */ - -static void -ix86_emit_restore_regs_using_pop (HOST_WIDE_INT red_offset) -{ - int regno; - - for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) - if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false)) - { - ix86_emit_restore_reg_using_pop (gen_rtx_REG (Pmode, regno), - red_offset); - red_offset += UNITS_PER_WORD; - } -} - -/* Emit code and notes for the LEAVE instruction. */ - -static void -ix86_emit_leave (HOST_WIDE_INT red_offset) -{ - rtx insn = emit_insn (ix86_gen_leave ()); - - ix86_add_queued_cfa_restore_notes (insn); - - if (ix86_cfa_state->reg == hard_frame_pointer_rtx) - { - add_reg_note (insn, REG_CFA_ADJUST_CFA, - copy_rtx (XVECEXP (PATTERN (insn), 0, 0))); - RTX_FRAME_RELATED_P (insn) = 1; - ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx, red_offset); - } -} - -/* Emit code to restore saved registers using MOV insns. First register - is restored from POINTER + OFFSET. */ -static void -ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset, - HOST_WIDE_INT red_offset, - int maybe_eh_return) -{ - unsigned int regno; - rtx base_address = gen_rtx_MEM (Pmode, pointer); - rtx insn; - - for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) - if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return)) - { - rtx reg = gen_rtx_REG (Pmode, regno); - - /* Ensure that adjust_address won't be forced to produce pointer - out of range allowed by x86-64 instruction set. */ - if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode)) - { - rtx r11; - - r11 = gen_rtx_REG (DImode, R11_REG); - emit_move_insn (r11, GEN_INT (offset)); - emit_insn (gen_adddi3 (r11, r11, pointer)); - base_address = gen_rtx_MEM (Pmode, r11); - offset = 0; - } - insn = emit_move_insn (reg, - adjust_address (base_address, Pmode, offset)); - offset += UNITS_PER_WORD; - - if (ix86_cfa_state->reg == crtl->drap_reg - && regno == REGNO (crtl->drap_reg)) - { - /* Previously we'd represented the CFA as an expression - like *(%ebp - 8). We've just popped that value from - the stack, which means we need to reset the CFA to - the drap register. This will remain until we restore - the stack pointer. */ - add_reg_note (insn, REG_CFA_DEF_CFA, reg); - RTX_FRAME_RELATED_P (insn) = 1; - } - else - ix86_add_cfa_restore_note (NULL_RTX, reg, red_offset); - - red_offset += UNITS_PER_WORD; - } -} - -/* Emit code to restore saved registers using MOV insns. First register - is restored from POINTER + OFFSET. */ -static void -ix86_emit_restore_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset, - HOST_WIDE_INT red_offset, - int maybe_eh_return) -{ - int regno; - rtx base_address = gen_rtx_MEM (TImode, pointer); - rtx mem, insn; - - for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) - if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return)) - { - rtx reg = gen_rtx_REG (TImode, regno); - - /* Ensure that adjust_address won't be forced to produce pointer - out of range allowed by x86-64 instruction set. */ - if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode)) - { - rtx r11; - - r11 = gen_rtx_REG (DImode, R11_REG); - emit_move_insn (r11, GEN_INT (offset)); - emit_insn (gen_adddi3 (r11, r11, pointer)); - base_address = gen_rtx_MEM (TImode, r11); - offset = 0; - } - mem = adjust_address (base_address, TImode, offset); - set_mem_align (mem, 128); - insn = emit_move_insn (reg, mem); - offset += 16; - - ix86_add_cfa_restore_note (NULL_RTX, reg, red_offset); - - red_offset += 16; - } -} - -/* Restore function stack, frame, and registers. */ - -void -ix86_expand_epilogue (int style) -{ - int sp_valid; - struct ix86_frame frame; - HOST_WIDE_INT offset, red_offset; - struct machine_cfa_state cfa_state_save = *ix86_cfa_state; - bool using_drap; - - ix86_finalize_stack_realign_flags (); - - /* When stack is realigned, SP must be valid. */ - sp_valid = (!frame_pointer_needed - || current_function_sp_is_unchanging - || stack_realign_fp); - - ix86_compute_frame_layout (&frame); - - /* See the comment about red zone and frame - pointer usage in ix86_expand_prologue. */ - if (frame_pointer_needed && frame.red_zone_size) - emit_insn (gen_memory_blockage ()); - - using_drap = crtl->drap_reg && crtl->stack_realign_needed; - gcc_assert (!using_drap || ix86_cfa_state->reg == crtl->drap_reg); - - /* Calculate start of saved registers relative to ebp. Special care - must be taken for the normal return case of a function using - eh_return: the eax and edx registers are marked as saved, but not - restored along this path. */ - offset = frame.nregs; - if (crtl->calls_eh_return && style != 2) - offset -= 2; - offset *= -UNITS_PER_WORD; - offset -= frame.nsseregs * 16 + frame.padding0; - - /* Calculate start of saved registers relative to esp on entry of the - function. When realigning stack, this needs to be the most negative - value possible at runtime. */ - red_offset = offset; - if (using_drap) - red_offset -= crtl->stack_alignment_needed / BITS_PER_UNIT - + UNITS_PER_WORD; - else if (stack_realign_fp) - red_offset -= crtl->stack_alignment_needed / BITS_PER_UNIT - - UNITS_PER_WORD; - if (frame_pointer_needed) - red_offset -= UNITS_PER_WORD; - - /* If we're only restoring one register and sp is not valid then - using a move instruction to restore the register since it's - less work than reloading sp and popping the register. - - The default code result in stack adjustment using add/lea instruction, - while this code results in LEAVE instruction (or discrete equivalent), - so it is profitable in some other cases as well. Especially when there - are no registers to restore. We also use this code when TARGET_USE_LEAVE - and there is exactly one register to pop. This heuristic may need some - tuning in future. */ - if ((!sp_valid && (frame.nregs + frame.nsseregs) <= 1) - || (TARGET_EPILOGUE_USING_MOVE - && cfun->machine->use_fast_prologue_epilogue - && ((frame.nregs + frame.nsseregs) > 1 - || (frame.to_allocate + frame.padding0) != 0)) - || (frame_pointer_needed && !(frame.nregs + frame.nsseregs) - && (frame.to_allocate + frame.padding0) != 0) - || (frame_pointer_needed && TARGET_USE_LEAVE - && cfun->machine->use_fast_prologue_epilogue - && (frame.nregs + frame.nsseregs) == 1) - || crtl->calls_eh_return) - { - /* Restore registers. We can use ebp or esp to address the memory - locations. If both are available, default to ebp, since offsets - are known to be small. Only exception is esp pointing directly - to the end of block of saved registers, where we may simplify - addressing mode. - - If we are realigning stack with bp and sp, regs restore can't - be addressed by bp. sp must be used instead. */ - - if (!frame_pointer_needed - || (sp_valid && !(frame.to_allocate + frame.padding0)) - || stack_realign_fp) - { - ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx, - frame.to_allocate, red_offset, - style == 2); - ix86_emit_restore_regs_using_mov (stack_pointer_rtx, - frame.to_allocate - + frame.nsseregs * 16 - + frame.padding0, - red_offset - + frame.nsseregs * 16 - + frame.padding0, style == 2); - } - else - { - ix86_emit_restore_sse_regs_using_mov (hard_frame_pointer_rtx, - offset, red_offset, - style == 2); - ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx, - offset - + frame.nsseregs * 16 - + frame.padding0, - red_offset - + frame.nsseregs * 16 - + frame.padding0, style == 2); - } - - red_offset -= offset; - - /* eh_return epilogues need %ecx added to the stack pointer. */ - if (style == 2) - { - rtx tmp, sa = EH_RETURN_STACKADJ_RTX; - - /* Stack align doesn't work with eh_return. */ - gcc_assert (!crtl->stack_realign_needed); - - if (frame_pointer_needed) - { - tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa); - tmp = plus_constant (tmp, UNITS_PER_WORD); - tmp = emit_insn (gen_rtx_SET (VOIDmode, sa, tmp)); - - tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx); - tmp = emit_move_insn (hard_frame_pointer_rtx, tmp); - - /* Note that we use SA as a temporary CFA, as the return - address is at the proper place relative to it. We - pretend this happens at the FP restore insn because - prior to this insn the FP would be stored at the wrong - offset relative to SA, and after this insn we have no - other reasonable register to use for the CFA. We don't - bother resetting the CFA to the SP for the duration of - the return insn. */ - add_reg_note (tmp, REG_CFA_DEF_CFA, - plus_constant (sa, UNITS_PER_WORD)); - ix86_add_queued_cfa_restore_notes (tmp); - add_reg_note (tmp, REG_CFA_RESTORE, hard_frame_pointer_rtx); - RTX_FRAME_RELATED_P (tmp) = 1; - ix86_cfa_state->reg = sa; - ix86_cfa_state->offset = UNITS_PER_WORD; - - pro_epilogue_adjust_stack (stack_pointer_rtx, sa, - const0_rtx, style, false); - } - else - { - tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa); - tmp = plus_constant (tmp, (frame.to_allocate - + frame.nregs * UNITS_PER_WORD - + frame.nsseregs * 16 - + frame.padding0)); - tmp = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp)); - ix86_add_queued_cfa_restore_notes (tmp); - - gcc_assert (ix86_cfa_state->reg == stack_pointer_rtx); - if (ix86_cfa_state->offset != UNITS_PER_WORD) - { - ix86_cfa_state->offset = UNITS_PER_WORD; - add_reg_note (tmp, REG_CFA_DEF_CFA, - plus_constant (stack_pointer_rtx, - UNITS_PER_WORD)); - RTX_FRAME_RELATED_P (tmp) = 1; - } - } - } - else if (!frame_pointer_needed) - pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, - GEN_INT (frame.to_allocate - + frame.nregs * UNITS_PER_WORD - + frame.nsseregs * 16 - + frame.padding0), - style, !using_drap); - /* If not an i386, mov & pop is faster than "leave". */ - else if (TARGET_USE_LEAVE || optimize_function_for_size_p (cfun) - || !cfun->machine->use_fast_prologue_epilogue) - ix86_emit_leave (red_offset); - else - { - pro_epilogue_adjust_stack (stack_pointer_rtx, - hard_frame_pointer_rtx, - const0_rtx, style, !using_drap); - - ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx, red_offset); - } - } - else - { - /* First step is to deallocate the stack frame so that we can - pop the registers. - - If we realign stack with frame pointer, then stack pointer - won't be able to recover via lea $offset(%bp), %sp, because - there is a padding area between bp and sp for realign. - "add $to_allocate, %sp" must be used instead. */ - if (!sp_valid) - { - gcc_assert (frame_pointer_needed); - gcc_assert (!stack_realign_fp); - pro_epilogue_adjust_stack (stack_pointer_rtx, - hard_frame_pointer_rtx, - GEN_INT (offset), style, false); - ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx, - 0, red_offset, style == 2); - pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, - GEN_INT (frame.nsseregs * 16 + - frame.padding0), style, false); - } - else if (frame.to_allocate || frame.padding0 || frame.nsseregs) - { - ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx, - frame.to_allocate, red_offset, - style == 2); - pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, - GEN_INT (frame.to_allocate - + frame.nsseregs * 16 - + frame.padding0), style, - !using_drap && !frame_pointer_needed); - } - - ix86_emit_restore_regs_using_pop (red_offset + frame.nsseregs * 16 - + frame.padding0); - red_offset -= offset; - - if (frame_pointer_needed) - { - /* Leave results in shorter dependency chains on CPUs that are - able to grok it fast. */ - if (TARGET_USE_LEAVE) - ix86_emit_leave (red_offset); - else - { - /* For stack realigned really happens, recover stack - pointer to hard frame pointer is a must, if not using - leave. */ - if (stack_realign_fp) - pro_epilogue_adjust_stack (stack_pointer_rtx, - hard_frame_pointer_rtx, - const0_rtx, style, !using_drap); - ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx, - red_offset); - } - } - } - - if (using_drap) - { - int param_ptr_offset = (call_used_regs[REGNO (crtl->drap_reg)] - ? 0 : UNITS_PER_WORD); - rtx insn; - - gcc_assert (stack_realign_drap); - - insn = emit_insn ((*ix86_gen_add3) (stack_pointer_rtx, - crtl->drap_reg, - GEN_INT (-(UNITS_PER_WORD - + param_ptr_offset)))); - - ix86_cfa_state->reg = stack_pointer_rtx; - ix86_cfa_state->offset = UNITS_PER_WORD + param_ptr_offset; - - add_reg_note (insn, REG_CFA_DEF_CFA, - gen_rtx_PLUS (Pmode, ix86_cfa_state->reg, - GEN_INT (ix86_cfa_state->offset))); - RTX_FRAME_RELATED_P (insn) = 1; - - if (param_ptr_offset) - ix86_emit_restore_reg_using_pop (crtl->drap_reg, -UNITS_PER_WORD); - } - - /* Sibcall epilogues don't want a return instruction. */ - if (style == 0) - { - *ix86_cfa_state = cfa_state_save; - return; - } - - if (crtl->args.pops_args && crtl->args.size) - { - rtx popc = GEN_INT (crtl->args.pops_args); - - /* i386 can only pop 64K bytes. If asked to pop more, pop return - address, do explicit add, and jump indirectly to the caller. */ - - if (crtl->args.pops_args >= 65536) - { - rtx ecx = gen_rtx_REG (SImode, CX_REG); - rtx insn; - - /* There is no "pascal" calling convention in any 64bit ABI. */ - gcc_assert (!TARGET_64BIT); - - insn = emit_insn (gen_popsi1 (ecx)); - ix86_cfa_state->offset -= UNITS_PER_WORD; - - add_reg_note (insn, REG_CFA_ADJUST_CFA, - copy_rtx (XVECEXP (PATTERN (insn), 0, 1))); - add_reg_note (insn, REG_CFA_REGISTER, - gen_rtx_SET (VOIDmode, ecx, pc_rtx)); - RTX_FRAME_RELATED_P (insn) = 1; - - pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, - popc, -1, true); - emit_jump_insn (gen_return_indirect_internal (ecx)); - } - else - emit_jump_insn (gen_return_pop_internal (popc)); - } - else - emit_jump_insn (gen_return_internal ()); - - /* Restore the state back to the state from the prologue, - so that it's correct for the next epilogue. */ - *ix86_cfa_state = cfa_state_save; -} - -/* Reset from the function's potential modifications. */ - -static void -ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, - HOST_WIDE_INT size ATTRIBUTE_UNUSED) -{ - if (pic_offset_table_rtx) - SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM); -#if TARGET_MACHO - /* Mach-O doesn't support labels at the end of objects, so if - it looks like we might want one, insert a NOP. */ - { - rtx insn = get_last_insn (); - while (insn - && NOTE_P (insn) - && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL) - insn = PREV_INSN (insn); - if (insn - && (LABEL_P (insn) - || (NOTE_P (insn) - && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL))) - fputs ("\tnop\n", file); - } -#endif - -} - -/* Extract the parts of an RTL expression that is a valid memory address - for an instruction. Return 0 if the structure of the address is - grossly off. Return -1 if the address contains ASHIFT, so it is not - strictly valid, but still used for computing length of lea instruction. */ - -int -ix86_decompose_address (rtx addr, struct ix86_address *out) -{ - rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX; - rtx base_reg, index_reg; - HOST_WIDE_INT scale = 1; - rtx scale_rtx = NULL_RTX; - int retval = 1; - enum ix86_address_seg seg = SEG_DEFAULT; - - if (REG_P (addr) || GET_CODE (addr) == SUBREG) - base = addr; - else if (GET_CODE (addr) == PLUS) - { - rtx addends[4], op; - int n = 0, i; - - op = addr; - do - { - if (n >= 4) - return 0; - addends[n++] = XEXP (op, 1); - op = XEXP (op, 0); - } - while (GET_CODE (op) == PLUS); - if (n >= 4) - return 0; - addends[n] = op; - - for (i = n; i >= 0; --i) - { - op = addends[i]; - switch (GET_CODE (op)) - { - case MULT: - if (index) - return 0; - index = XEXP (op, 0); - scale_rtx = XEXP (op, 1); - break; - - case UNSPEC: - if (XINT (op, 1) == UNSPEC_TP - && TARGET_TLS_DIRECT_SEG_REFS - && seg == SEG_DEFAULT) - seg = TARGET_64BIT ? SEG_FS : SEG_GS; - else - return 0; - break; - - case REG: - case SUBREG: - if (!base) - base = op; - else if (!index) - index = op; - else - return 0; - break; - - case CONST: - case CONST_INT: - case SYMBOL_REF: - case LABEL_REF: - if (disp) - return 0; - disp = op; - break; - - default: - return 0; - } - } - } - else if (GET_CODE (addr) == MULT) - { - index = XEXP (addr, 0); /* index*scale */ - scale_rtx = XEXP (addr, 1); - } - else if (GET_CODE (addr) == ASHIFT) - { - rtx tmp; - - /* We're called for lea too, which implements ashift on occasion. */ - index = XEXP (addr, 0); - tmp = XEXP (addr, 1); - if (!CONST_INT_P (tmp)) - return 0; - scale = INTVAL (tmp); - if ((unsigned HOST_WIDE_INT) scale > 3) - return 0; - scale = 1 << scale; - retval = -1; - } - else - disp = addr; /* displacement */ - - /* Extract the integral value of scale. */ - if (scale_rtx) - { - if (!CONST_INT_P (scale_rtx)) - return 0; - scale = INTVAL (scale_rtx); - } - - base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base; - index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index; - - /* Allow arg pointer and stack pointer as index if there is not scaling. */ - if (base_reg && index_reg && scale == 1 - && (index_reg == arg_pointer_rtx - || index_reg == frame_pointer_rtx - || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM))) - { - rtx tmp; - tmp = base, base = index, index = tmp; - tmp = base_reg, base_reg = index_reg, index_reg = tmp; - } - - /* Special case: %ebp cannot be encoded as a base without a displacement. */ - if ((base_reg == hard_frame_pointer_rtx - || base_reg == frame_pointer_rtx - || base_reg == arg_pointer_rtx) && !disp) - disp = const0_rtx; - - /* Special case: on K6, [%esi] makes the instruction vector decoded. - Avoid this by transforming to [%esi+0]. - Reload calls address legitimization without cfun defined, so we need - to test cfun for being non-NULL. */ - if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun) - && base_reg && !index_reg && !disp - && REG_P (base_reg) - && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG) - disp = const0_rtx; - - /* Special case: encode reg+reg instead of reg*2. */ - if (!base && index && scale && scale == 2) - base = index, base_reg = index_reg, scale = 1; - - /* Special case: scaling cannot be encoded without base or displacement. */ - if (!base && !disp && index && scale != 1) - disp = const0_rtx; - - out->base = base; - out->index = index; - out->disp = disp; - out->scale = scale; - out->seg = seg; - - return retval; -} - -/* Return cost of the memory address x. - For i386, it is better to use a complex address than let gcc copy - the address into a reg and make a new pseudo. But not if the address - requires to two regs - that would mean more pseudos with longer - lifetimes. */ -static int -ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED) -{ - struct ix86_address parts; - int cost = 1; - int ok = ix86_decompose_address (x, &parts); - - gcc_assert (ok); - - if (parts.base && GET_CODE (parts.base) == SUBREG) - parts.base = SUBREG_REG (parts.base); - if (parts.index && GET_CODE (parts.index) == SUBREG) - parts.index = SUBREG_REG (parts.index); - - /* Attempt to minimize number of registers in the address. */ - if ((parts.base - && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)) - || (parts.index - && (!REG_P (parts.index) - || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER))) - cost++; - - if (parts.base - && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER) - && parts.index - && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER) - && parts.base != parts.index) - cost++; - - /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b, - since it's predecode logic can't detect the length of instructions - and it degenerates to vector decoded. Increase cost of such - addresses here. The penalty is minimally 2 cycles. It may be worthwhile - to split such addresses or even refuse such addresses at all. - - Following addressing modes are affected: - [base+scale*index] - [scale*index+disp] - [base+index] - - The first and last case may be avoidable by explicitly coding the zero in - memory address, but I don't have AMD-K6 machine handy to check this - theory. */ - - if (TARGET_K6 - && ((!parts.disp && parts.base && parts.index && parts.scale != 1) - || (parts.disp && !parts.base && parts.index && parts.scale != 1) - || (!parts.disp && parts.base && parts.index && parts.scale == 1))) - cost += 10; - - return cost; -} - -/* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as - this is used for to form addresses to local data when -fPIC is in - use. */ - -static bool -darwin_local_data_pic (rtx disp) -{ - return (GET_CODE (disp) == UNSPEC - && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET); -} - -/* Determine if a given RTX is a valid constant. We already know this - satisfies CONSTANT_P. */ - -bool -legitimate_constant_p (rtx x) -{ - switch (GET_CODE (x)) - { - case CONST: - x = XEXP (x, 0); - - if (GET_CODE (x) == PLUS) - { - if (!CONST_INT_P (XEXP (x, 1))) - return false; - x = XEXP (x, 0); - } - - if (TARGET_MACHO && darwin_local_data_pic (x)) - return true; - - /* Only some unspecs are valid as "constants". */ - if (GET_CODE (x) == UNSPEC) - switch (XINT (x, 1)) - { - case UNSPEC_GOT: - case UNSPEC_GOTOFF: - case UNSPEC_PLTOFF: - return TARGET_64BIT; - case UNSPEC_TPOFF: - case UNSPEC_NTPOFF: - x = XVECEXP (x, 0, 0); - return (GET_CODE (x) == SYMBOL_REF - && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC); - case UNSPEC_DTPOFF: - x = XVECEXP (x, 0, 0); - return (GET_CODE (x) == SYMBOL_REF - && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC); - default: - return false; - } - - /* We must have drilled down to a symbol. */ - if (GET_CODE (x) == LABEL_REF) - return true; - if (GET_CODE (x) != SYMBOL_REF) - return false; - /* FALLTHRU */ - - case SYMBOL_REF: - /* TLS symbols are never valid. */ - if (SYMBOL_REF_TLS_MODEL (x)) - return false; - - /* DLLIMPORT symbols are never valid. */ - if (TARGET_DLLIMPORT_DECL_ATTRIBUTES - && SYMBOL_REF_DLLIMPORT_P (x)) - return false; - break; - - case CONST_DOUBLE: - if (GET_MODE (x) == TImode - && x != CONST0_RTX (TImode) - && !TARGET_64BIT) - return false; - break; - - case CONST_VECTOR: - if (x == CONST0_RTX (GET_MODE (x)) - || (vector_all_ones_operand (x, GET_MODE (x)) && TARGET_SSE)) - return true; - return false; - - default: - break; - } - - /* Otherwise we handle everything else in the move patterns. */ - return true; -} - -/* Determine if it's legal to put X into the constant pool. This - is not possible for the address of thread-local symbols, which - is checked above. */ - -static bool -ix86_cannot_force_const_mem (rtx x) -{ - /* We can always put integral constants and vectors in memory. */ - switch (GET_CODE (x)) - { - case CONST_INT: - case CONST_DOUBLE: - case CONST_VECTOR: - return false; - - default: - break; - } - return !legitimate_constant_p (x); -} - -/* Determine if a given RTX is a valid constant address. */ - -bool -constant_address_p (rtx x) -{ - return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1); -} - -/* Nonzero if the constant value X is a legitimate general operand - when generating PIC code. It is given that flag_pic is on and - that X satisfies CONSTANT_P or is a CONST_DOUBLE. */ - -bool -legitimate_pic_operand_p (rtx x) -{ - rtx inner; - - switch (GET_CODE (x)) - { - case CONST: - inner = XEXP (x, 0); - if (GET_CODE (inner) == PLUS - && CONST_INT_P (XEXP (inner, 1))) - inner = XEXP (inner, 0); - - /* Only some unspecs are valid as "constants". */ - if (GET_CODE (inner) == UNSPEC) - switch (XINT (inner, 1)) - { - case UNSPEC_GOT: - case UNSPEC_GOTOFF: - case UNSPEC_PLTOFF: - return TARGET_64BIT; - case UNSPEC_TPOFF: - x = XVECEXP (inner, 0, 0); - return (GET_CODE (x) == SYMBOL_REF - && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC); - case UNSPEC_MACHOPIC_OFFSET: - return legitimate_pic_address_disp_p (x); - default: - return false; - } - /* FALLTHRU */ - - case SYMBOL_REF: - case LABEL_REF: - return legitimate_pic_address_disp_p (x); - - default: - return true; - } -} - -/* Determine if a given CONST RTX is a valid memory displacement - in PIC mode. */ - -int -legitimate_pic_address_disp_p (rtx disp) -{ - bool saw_plus; - - /* In 64bit mode we can allow direct addresses of symbols and labels - when they are not dynamic symbols. */ - if (TARGET_64BIT) - { - rtx op0 = disp, op1; - - switch (GET_CODE (disp)) - { - case LABEL_REF: - return true; - - case CONST: - if (GET_CODE (XEXP (disp, 0)) != PLUS) - break; - op0 = XEXP (XEXP (disp, 0), 0); - op1 = XEXP (XEXP (disp, 0), 1); - if (!CONST_INT_P (op1) - || INTVAL (op1) >= 16*1024*1024 - || INTVAL (op1) < -16*1024*1024) - break; - if (GET_CODE (op0) == LABEL_REF) - return true; - if (GET_CODE (op0) != SYMBOL_REF) - break; - /* FALLTHRU */ - - case SYMBOL_REF: - /* TLS references should always be enclosed in UNSPEC. */ - if (SYMBOL_REF_TLS_MODEL (op0)) - return false; - if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0) - && ix86_cmodel != CM_LARGE_PIC) - return true; - break; - - default: - break; - } - } - if (GET_CODE (disp) != CONST) - return 0; - disp = XEXP (disp, 0); - - if (TARGET_64BIT) - { - /* We are unsafe to allow PLUS expressions. This limit allowed distance - of GOT tables. We should not need these anyway. */ - if (GET_CODE (disp) != UNSPEC - || (XINT (disp, 1) != UNSPEC_GOTPCREL - && XINT (disp, 1) != UNSPEC_GOTOFF - && XINT (disp, 1) != UNSPEC_PLTOFF)) - return 0; - - if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF - && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF) - return 0; - return 1; - } - - saw_plus = false; - if (GET_CODE (disp) == PLUS) - { - if (!CONST_INT_P (XEXP (disp, 1))) - return 0; - disp = XEXP (disp, 0); - saw_plus = true; - } - - if (TARGET_MACHO && darwin_local_data_pic (disp)) - return 1; - - if (GET_CODE (disp) != UNSPEC) - return 0; - - switch (XINT (disp, 1)) - { - case UNSPEC_GOT: - if (saw_plus) - return false; - /* We need to check for both symbols and labels because VxWorks loads - text labels with @GOT rather than @GOTOFF. See gotoff_operand for - details. */ - return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF - || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF); - case UNSPEC_GOTOFF: - /* Refuse GOTOFF in 64bit mode since it is always 64bit when used. - While ABI specify also 32bit relocation but we don't produce it in - small PIC model at all. */ - if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF - || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF) - && !TARGET_64BIT) - return gotoff_operand (XVECEXP (disp, 0, 0), Pmode); - return false; - case UNSPEC_GOTTPOFF: - case UNSPEC_GOTNTPOFF: - case UNSPEC_INDNTPOFF: - if (saw_plus) - return false; - disp = XVECEXP (disp, 0, 0); - return (GET_CODE (disp) == SYMBOL_REF - && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC); - case UNSPEC_NTPOFF: - disp = XVECEXP (disp, 0, 0); - return (GET_CODE (disp) == SYMBOL_REF - && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC); - case UNSPEC_DTPOFF: - disp = XVECEXP (disp, 0, 0); - return (GET_CODE (disp) == SYMBOL_REF - && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC); - } - - return 0; -} - -/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid - memory address for an instruction. The MODE argument is the machine mode - for the MEM expression that wants to use this address. - - It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should - convert common non-canonical forms to canonical form so that they will - be recognized. */ - -int -legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED, - rtx addr, int strict) -{ - struct ix86_address parts; - rtx base, index, disp; - HOST_WIDE_INT scale; - const char *reason = NULL; - rtx reason_rtx = NULL_RTX; - - if (ix86_decompose_address (addr, &parts) <= 0) - { - reason = "decomposition failed"; - goto report_error; - } - - base = parts.base; - index = parts.index; - disp = parts.disp; - scale = parts.scale; - - /* Validate base register. - - Don't allow SUBREG's that span more than a word here. It can lead to spill - failures when the base is one word out of a two word structure, which is - represented internally as a DImode int. */ - - if (base) - { - rtx reg; - reason_rtx = base; - - if (REG_P (base)) - reg = base; - else if (GET_CODE (base) == SUBREG - && REG_P (SUBREG_REG (base)) - && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base))) - <= UNITS_PER_WORD) - reg = SUBREG_REG (base); - else - { - reason = "base is not a register"; - goto report_error; - } - - if (GET_MODE (base) != Pmode) - { - reason = "base is not in Pmode"; - goto report_error; - } - - if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg)) - || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg))) - { - reason = "base is not valid"; - goto report_error; - } - } - - /* Validate index register. - - Don't allow SUBREG's that span more than a word here -- same as above. */ - - if (index) - { - rtx reg; - reason_rtx = index; - - if (REG_P (index)) - reg = index; - else if (GET_CODE (index) == SUBREG - && REG_P (SUBREG_REG (index)) - && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index))) - <= UNITS_PER_WORD) - reg = SUBREG_REG (index); - else - { - reason = "index is not a register"; - goto report_error; - } - - if (GET_MODE (index) != Pmode) - { - reason = "index is not in Pmode"; - goto report_error; - } - - if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg)) - || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg))) - { - reason = "index is not valid"; - goto report_error; - } - } - - /* Validate scale factor. */ - if (scale != 1) - { - reason_rtx = GEN_INT (scale); - if (!index) - { - reason = "scale without index"; - goto report_error; - } - - if (scale != 2 && scale != 4 && scale != 8) - { - reason = "scale is not a valid multiplier"; - goto report_error; - } - } - - /* Validate displacement. */ - if (disp) - { - reason_rtx = disp; - - if (GET_CODE (disp) == CONST - && GET_CODE (XEXP (disp, 0)) == UNSPEC - && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET) - switch (XINT (XEXP (disp, 0), 1)) - { - /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when - used. While ABI specify also 32bit relocations, we don't produce - them at all and use IP relative instead. */ - case UNSPEC_GOT: - case UNSPEC_GOTOFF: - gcc_assert (flag_pic); - if (!TARGET_64BIT) - goto is_legitimate_pic; - reason = "64bit address unspec"; - goto report_error; - - case UNSPEC_GOTPCREL: - gcc_assert (flag_pic); - goto is_legitimate_pic; - - case UNSPEC_GOTTPOFF: - case UNSPEC_GOTNTPOFF: - case UNSPEC_INDNTPOFF: - case UNSPEC_NTPOFF: - case UNSPEC_DTPOFF: - break; - - default: - reason = "invalid address unspec"; - goto report_error; - } - - else if (SYMBOLIC_CONST (disp) - && (flag_pic - || (TARGET_MACHO -#if TARGET_MACHO - && MACHOPIC_INDIRECT - && !machopic_operand_p (disp) -#endif - ))) - { - - is_legitimate_pic: - if (TARGET_64BIT && (index || base)) - { - /* foo@dtpoff(%rX) is ok. */ - if (GET_CODE (disp) != CONST - || GET_CODE (XEXP (disp, 0)) != PLUS - || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC - || !CONST_INT_P (XEXP (XEXP (disp, 0), 1)) - || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF - && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF)) - { - reason = "non-constant pic memory reference"; - goto report_error; - } - } - else if (! legitimate_pic_address_disp_p (disp)) - { - reason = "displacement is an invalid pic construct"; - goto report_error; - } - - /* This code used to verify that a symbolic pic displacement - includes the pic_offset_table_rtx register. - - While this is good idea, unfortunately these constructs may - be created by "adds using lea" optimization for incorrect - code like: - - int a; - int foo(int i) - { - return *(&a+i); - } - - This code is nonsensical, but results in addressing - GOT table with pic_offset_table_rtx base. We can't - just refuse it easily, since it gets matched by - "addsi3" pattern, that later gets split to lea in the - case output register differs from input. While this - can be handled by separate addsi pattern for this case - that never results in lea, this seems to be easier and - correct fix for crash to disable this test. */ - } - else if (GET_CODE (disp) != LABEL_REF - && !CONST_INT_P (disp) - && (GET_CODE (disp) != CONST - || !legitimate_constant_p (disp)) - && (GET_CODE (disp) != SYMBOL_REF - || !legitimate_constant_p (disp))) - { - reason = "displacement is not constant"; - goto report_error; - } - else if (TARGET_64BIT - && !x86_64_immediate_operand (disp, VOIDmode)) - { - reason = "displacement is out of range"; - goto report_error; - } - } - - /* Everything looks valid. */ - return TRUE; - - report_error: - return FALSE; -} - -/* Return a unique alias set for the GOT. */ - -static alias_set_type -ix86_GOT_alias_set (void) -{ - static alias_set_type set = -1; - if (set == -1) - set = new_alias_set (); - return set; -} - -/* Return a legitimate reference for ORIG (an address) using the - register REG. If REG is 0, a new pseudo is generated. - - There are two types of references that must be handled: - - 1. Global data references must load the address from the GOT, via - the PIC reg. An insn is emitted to do this load, and the reg is - returned. - - 2. Static data references, constant pool addresses, and code labels - compute the address as an offset from the GOT, whose base is in - the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to - differentiate them from global data objects. The returned - address is the PIC reg + an unspec constant. - - GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC - reg also appears in the address. */ - -static rtx -legitimize_pic_address (rtx orig, rtx reg) -{ - rtx addr = orig; - rtx new_rtx = orig; - rtx base; - -#if TARGET_MACHO - if (TARGET_MACHO && !TARGET_64BIT) - { - if (reg == 0) - reg = gen_reg_rtx (Pmode); - /* Use the generic Mach-O PIC machinery. */ - return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg); - } -#endif - - if (TARGET_64BIT && legitimate_pic_address_disp_p (addr)) - new_rtx = addr; - else if (TARGET_64BIT - && ix86_cmodel != CM_SMALL_PIC - && gotoff_operand (addr, Pmode)) - { - rtx tmpreg; - /* This symbol may be referenced via a displacement from the PIC - base address (@GOTOFF). */ - - if (reload_in_progress) - df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true); - if (GET_CODE (addr) == CONST) - addr = XEXP (addr, 0); - if (GET_CODE (addr) == PLUS) - { - new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), - UNSPEC_GOTOFF); - new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1)); - } - else - new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF); - new_rtx = gen_rtx_CONST (Pmode, new_rtx); - if (!reg) - tmpreg = gen_reg_rtx (Pmode); - else - tmpreg = reg; - emit_move_insn (tmpreg, new_rtx); - - if (reg != 0) - { - new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx, - tmpreg, 1, OPTAB_DIRECT); - new_rtx = reg; - } - else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg); - } - else if (!TARGET_64BIT && gotoff_operand (addr, Pmode)) - { - /* This symbol may be referenced via a displacement from the PIC - base address (@GOTOFF). */ - - if (reload_in_progress) - df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true); - if (GET_CODE (addr) == CONST) - addr = XEXP (addr, 0); - if (GET_CODE (addr) == PLUS) - { - new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), - UNSPEC_GOTOFF); - new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1)); - } - else - new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF); - new_rtx = gen_rtx_CONST (Pmode, new_rtx); - new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx); - - if (reg != 0) - { - emit_move_insn (reg, new_rtx); - new_rtx = reg; - } - } - else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0) - /* We can't use @GOTOFF for text labels on VxWorks; - see gotoff_operand. */ - || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF)) - { - if (TARGET_DLLIMPORT_DECL_ATTRIBUTES) - { - if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr)) - return legitimize_dllimport_symbol (addr, true); - if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS - && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF - && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0))) - { - rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true); - return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1)); - } - } - - if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC) - { - new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL); - new_rtx = gen_rtx_CONST (Pmode, new_rtx); - new_rtx = gen_const_mem (Pmode, new_rtx); - set_mem_alias_set (new_rtx, ix86_GOT_alias_set ()); - - if (reg == 0) - reg = gen_reg_rtx (Pmode); - /* Use directly gen_movsi, otherwise the address is loaded - into register for CSE. We don't want to CSE this addresses, - instead we CSE addresses from the GOT table, so skip this. */ - emit_insn (gen_movsi (reg, new_rtx)); - new_rtx = reg; - } - else - { - /* This symbol must be referenced via a load from the - Global Offset Table (@GOT). */ - - if (reload_in_progress) - df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true); - new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT); - new_rtx = gen_rtx_CONST (Pmode, new_rtx); - if (TARGET_64BIT) - new_rtx = force_reg (Pmode, new_rtx); - new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx); - new_rtx = gen_const_mem (Pmode, new_rtx); - set_mem_alias_set (new_rtx, ix86_GOT_alias_set ()); - - if (reg == 0) - reg = gen_reg_rtx (Pmode); - emit_move_insn (reg, new_rtx); - new_rtx = reg; - } - } - else - { - if (CONST_INT_P (addr) - && !x86_64_immediate_operand (addr, VOIDmode)) - { - if (reg) - { - emit_move_insn (reg, addr); - new_rtx = reg; - } - else - new_rtx = force_reg (Pmode, addr); - } - else if (GET_CODE (addr) == CONST) - { - addr = XEXP (addr, 0); - - /* We must match stuff we generate before. Assume the only - unspecs that can get here are ours. Not that we could do - anything with them anyway.... */ - if (GET_CODE (addr) == UNSPEC - || (GET_CODE (addr) == PLUS - && GET_CODE (XEXP (addr, 0)) == UNSPEC)) - return orig; - gcc_assert (GET_CODE (addr) == PLUS); - } - if (GET_CODE (addr) == PLUS) - { - rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1); - - /* Check first to see if this is a constant offset from a @GOTOFF - symbol reference. */ - if (gotoff_operand (op0, Pmode) - && CONST_INT_P (op1)) - { - if (!TARGET_64BIT) - { - if (reload_in_progress) - df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true); - new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), - UNSPEC_GOTOFF); - new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1); - new_rtx = gen_rtx_CONST (Pmode, new_rtx); - new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx); - - if (reg != 0) - { - emit_move_insn (reg, new_rtx); - new_rtx = reg; - } - } - else - { - if (INTVAL (op1) < -16*1024*1024 - || INTVAL (op1) >= 16*1024*1024) - { - if (!x86_64_immediate_operand (op1, Pmode)) - op1 = force_reg (Pmode, op1); - new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1); - } - } - } - else - { - base = legitimize_pic_address (XEXP (addr, 0), reg); - new_rtx = legitimize_pic_address (XEXP (addr, 1), - base == reg ? NULL_RTX : reg); - - if (CONST_INT_P (new_rtx)) - new_rtx = plus_constant (base, INTVAL (new_rtx)); - else - { - if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1))) - { - base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0)); - new_rtx = XEXP (new_rtx, 1); - } - new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx); - } - } - } - } - return new_rtx; -} - -/* Load the thread pointer. If TO_REG is true, force it into a register. */ - -static rtx -get_thread_pointer (int to_reg) -{ - rtx tp, reg, insn; - - tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP); - if (!to_reg) - return tp; - - reg = gen_reg_rtx (Pmode); - insn = gen_rtx_SET (VOIDmode, reg, tp); - insn = emit_insn (insn); - - return reg; -} - -/* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is - false if we expect this to be used for a memory address and true if - we expect to load the address into a register. */ - -static rtx -legitimize_tls_address (rtx x, enum tls_model model, int for_mov) -{ - rtx dest, base, off, pic, tp; - int type; - - switch (model) - { - case TLS_MODEL_GLOBAL_DYNAMIC: - dest = gen_reg_rtx (Pmode); - tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0; - - if (TARGET_64BIT && ! TARGET_GNU2_TLS) - { - rtx rax = gen_rtx_REG (Pmode, AX_REG), insns; - - start_sequence (); - emit_call_insn (gen_tls_global_dynamic_64 (rax, x)); - insns = get_insns (); - end_sequence (); - - RTL_CONST_CALL_P (insns) = 1; - emit_libcall_block (insns, dest, rax, x); - } - else if (TARGET_64BIT && TARGET_GNU2_TLS) - emit_insn (gen_tls_global_dynamic_64 (dest, x)); - else - emit_insn (gen_tls_global_dynamic_32 (dest, x)); - - if (TARGET_GNU2_TLS) - { - dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest)); - - set_unique_reg_note (get_last_insn (), REG_EQUIV, x); - } - break; - - case TLS_MODEL_LOCAL_DYNAMIC: - base = gen_reg_rtx (Pmode); - tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0; - - if (TARGET_64BIT && ! TARGET_GNU2_TLS) - { - rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note; - - start_sequence (); - emit_call_insn (gen_tls_local_dynamic_base_64 (rax)); - insns = get_insns (); - end_sequence (); - - note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL); - note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note); - RTL_CONST_CALL_P (insns) = 1; - emit_libcall_block (insns, base, rax, note); - } - else if (TARGET_64BIT && TARGET_GNU2_TLS) - emit_insn (gen_tls_local_dynamic_base_64 (base)); - else - emit_insn (gen_tls_local_dynamic_base_32 (base)); - - if (TARGET_GNU2_TLS) - { - rtx x = ix86_tls_module_base (); - - set_unique_reg_note (get_last_insn (), REG_EQUIV, - gen_rtx_MINUS (Pmode, x, tp)); - } - - off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF); - off = gen_rtx_CONST (Pmode, off); - - dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off)); - - if (TARGET_GNU2_TLS) - { - dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp)); - - set_unique_reg_note (get_last_insn (), REG_EQUIV, x); - } - - break; - - case TLS_MODEL_INITIAL_EXEC: - if (TARGET_64BIT) - { - pic = NULL; - type = UNSPEC_GOTNTPOFF; - } - else if (flag_pic) - { - if (reload_in_progress) - df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true); - pic = pic_offset_table_rtx; - type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF; - } - else if (!TARGET_ANY_GNU_TLS) - { - pic = gen_reg_rtx (Pmode); - emit_insn (gen_set_got (pic)); - type = UNSPEC_GOTTPOFF; - } - else - { - pic = NULL; - type = UNSPEC_INDNTPOFF; - } - - off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type); - off = gen_rtx_CONST (Pmode, off); - if (pic) - off = gen_rtx_PLUS (Pmode, pic, off); - off = gen_const_mem (Pmode, off); - set_mem_alias_set (off, ix86_GOT_alias_set ()); - - if (TARGET_64BIT || TARGET_ANY_GNU_TLS) - { - base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS); - off = force_reg (Pmode, off); - return gen_rtx_PLUS (Pmode, base, off); - } - else - { - base = get_thread_pointer (true); - dest = gen_reg_rtx (Pmode); - emit_insn (gen_subsi3 (dest, base, off)); - } - break; - - case TLS_MODEL_LOCAL_EXEC: - off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), - (TARGET_64BIT || TARGET_ANY_GNU_TLS) - ? UNSPEC_NTPOFF : UNSPEC_TPOFF); - off = gen_rtx_CONST (Pmode, off); - - if (TARGET_64BIT || TARGET_ANY_GNU_TLS) - { - base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS); - return gen_rtx_PLUS (Pmode, base, off); - } - else - { - base = get_thread_pointer (true); - dest = gen_reg_rtx (Pmode); - emit_insn (gen_subsi3 (dest, base, off)); - } - break; - - default: - gcc_unreachable (); - } - - return dest; -} - -/* Create or return the unique __imp_DECL dllimport symbol corresponding - to symbol DECL. */ - -static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map))) - htab_t dllimport_map; - -static tree -get_dllimport_decl (tree decl) -{ - struct tree_map *h, in; - void **loc; - const char *name; - const char *prefix; - size_t namelen, prefixlen; - char *imp_name; - tree to; - rtx rtl; - - if (!dllimport_map) - dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0); - - in.hash = htab_hash_pointer (decl); - in.base.from = decl; - loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT); - h = (struct tree_map *) *loc; - if (h) - return h->to; - - *loc = h = GGC_NEW (struct tree_map); - h->hash = in.hash; - h->base.from = decl; - h->to = to = build_decl (VAR_DECL, NULL, ptr_type_node); - DECL_ARTIFICIAL (to) = 1; - DECL_IGNORED_P (to) = 1; - DECL_EXTERNAL (to) = 1; - TREE_READONLY (to) = 1; - - name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)); - name = targetm.strip_name_encoding (name); - prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0 - ? "*__imp_" : "*__imp__"; - namelen = strlen (name); - prefixlen = strlen (prefix); - imp_name = (char *) alloca (namelen + prefixlen + 1); - memcpy (imp_name, prefix, prefixlen); - memcpy (imp_name + prefixlen, name, namelen + 1); - - name = ggc_alloc_string (imp_name, namelen + prefixlen); - rtl = gen_rtx_SYMBOL_REF (Pmode, name); - SET_SYMBOL_REF_DECL (rtl, to); - SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL; - - rtl = gen_const_mem (Pmode, rtl); - set_mem_alias_set (rtl, ix86_GOT_alias_set ()); - - SET_DECL_RTL (to, rtl); - SET_DECL_ASSEMBLER_NAME (to, get_identifier (name)); - - return to; -} - -/* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is - true if we require the result be a register. */ - -static rtx -legitimize_dllimport_symbol (rtx symbol, bool want_reg) -{ - tree imp_decl; - rtx x; - - gcc_assert (SYMBOL_REF_DECL (symbol)); - imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol)); - - x = DECL_RTL (imp_decl); - if (want_reg) - x = force_reg (Pmode, x); - return x; -} - -/* Try machine-dependent ways of modifying an illegitimate address - to be legitimate. If we find one, return the new, valid address. - This macro is used in only one place: `memory_address' in explow.c. - - OLDX is the address as it was before break_out_memory_refs was called. - In some cases it is useful to look at this to decide what needs to be done. - - MODE and WIN are passed so that this macro can use - GO_IF_LEGITIMATE_ADDRESS. - - It is always safe for this macro to do nothing. It exists to recognize - opportunities to optimize the output. - - For the 80386, we handle X+REG by loading X into a register R and - using R+REG. R will go in a general reg and indexing will be used. - However, if REG is a broken-out memory address or multiplication, - nothing needs to be done because REG can certainly go in a general reg. - - When -fpic is used, special handling is needed for symbolic references. - See comments by legitimize_pic_address in i386.c for details. */ - -rtx -legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode) -{ - int changed = 0; - unsigned log; - - log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0; - if (log) - return legitimize_tls_address (x, (enum tls_model) log, false); - if (GET_CODE (x) == CONST - && GET_CODE (XEXP (x, 0)) == PLUS - && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF - && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)))) - { - rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), - (enum tls_model) log, false); - return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1)); - } - - if (TARGET_DLLIMPORT_DECL_ATTRIBUTES) - { - if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x)) - return legitimize_dllimport_symbol (x, true); - if (GET_CODE (x) == CONST - && GET_CODE (XEXP (x, 0)) == PLUS - && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF - && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0))) - { - rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true); - return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1)); - } - } - - if (flag_pic && SYMBOLIC_CONST (x)) - return legitimize_pic_address (x, 0); - - /* Canonicalize shifts by 0, 1, 2, 3 into multiply */ - if (GET_CODE (x) == ASHIFT - && CONST_INT_P (XEXP (x, 1)) - && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4) - { - changed = 1; - log = INTVAL (XEXP (x, 1)); - x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)), - GEN_INT (1 << log)); - } - - if (GET_CODE (x) == PLUS) - { - /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */ - - if (GET_CODE (XEXP (x, 0)) == ASHIFT - && CONST_INT_P (XEXP (XEXP (x, 0), 1)) - && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4) - { - changed = 1; - log = INTVAL (XEXP (XEXP (x, 0), 1)); - XEXP (x, 0) = gen_rtx_MULT (Pmode, - force_reg (Pmode, XEXP (XEXP (x, 0), 0)), - GEN_INT (1 << log)); - } - - if (GET_CODE (XEXP (x, 1)) == ASHIFT - && CONST_INT_P (XEXP (XEXP (x, 1), 1)) - && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4) - { - changed = 1; - log = INTVAL (XEXP (XEXP (x, 1), 1)); - XEXP (x, 1) = gen_rtx_MULT (Pmode, - force_reg (Pmode, XEXP (XEXP (x, 1), 0)), - GEN_INT (1 << log)); - } - - /* Put multiply first if it isn't already. */ - if (GET_CODE (XEXP (x, 1)) == MULT) - { - rtx tmp = XEXP (x, 0); - XEXP (x, 0) = XEXP (x, 1); - XEXP (x, 1) = tmp; - changed = 1; - } - - /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const))) - into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be - created by virtual register instantiation, register elimination, and - similar optimizations. */ - if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS) - { - changed = 1; - x = gen_rtx_PLUS (Pmode, - gen_rtx_PLUS (Pmode, XEXP (x, 0), - XEXP (XEXP (x, 1), 0)), - XEXP (XEXP (x, 1), 1)); - } - - /* Canonicalize - (plus (plus (mult (reg) (const)) (plus (reg) (const))) const) - into (plus (plus (mult (reg) (const)) (reg)) (const)). */ - else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS - && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT - && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS - && CONSTANT_P (XEXP (x, 1))) - { - rtx constant; - rtx other = NULL_RTX; - - if (CONST_INT_P (XEXP (x, 1))) - { - constant = XEXP (x, 1); - other = XEXP (XEXP (XEXP (x, 0), 1), 1); - } - else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1))) - { - constant = XEXP (XEXP (XEXP (x, 0), 1), 1); - other = XEXP (x, 1); - } - else - constant = 0; - - if (constant) - { - changed = 1; - x = gen_rtx_PLUS (Pmode, - gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0), - XEXP (XEXP (XEXP (x, 0), 1), 0)), - plus_constant (other, INTVAL (constant))); - } - } - - if (changed && legitimate_address_p (mode, x, FALSE)) - return x; - - if (GET_CODE (XEXP (x, 0)) == MULT) - { - changed = 1; - XEXP (x, 0) = force_operand (XEXP (x, 0), 0); - } - - if (GET_CODE (XEXP (x, 1)) == MULT) - { - changed = 1; - XEXP (x, 1) = force_operand (XEXP (x, 1), 0); - } - - if (changed - && REG_P (XEXP (x, 1)) - && REG_P (XEXP (x, 0))) - return x; - - if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1))) - { - changed = 1; - x = legitimize_pic_address (x, 0); - } - - if (changed && legitimate_address_p (mode, x, FALSE)) - return x; - - if (REG_P (XEXP (x, 0))) - { - rtx temp = gen_reg_rtx (Pmode); - rtx val = force_operand (XEXP (x, 1), temp); - if (val != temp) - emit_move_insn (temp, val); - - XEXP (x, 1) = temp; - return x; - } - - else if (REG_P (XEXP (x, 1))) - { - rtx temp = gen_reg_rtx (Pmode); - rtx val = force_operand (XEXP (x, 0), temp); - if (val != temp) - emit_move_insn (temp, val); - - XEXP (x, 0) = temp; - return x; - } - } - - return x; -} - -/* Print an integer constant expression in assembler syntax. Addition - and subtraction are the only arithmetic that may appear in these - expressions. FILE is the stdio stream to write to, X is the rtx, and - CODE is the operand print code from the output string. */ - -static void -output_pic_addr_const (FILE *file, rtx x, int code) -{ - char buf[256]; - - switch (GET_CODE (x)) - { - case PC: - gcc_assert (flag_pic); - putc ('.', file); - break; - - case SYMBOL_REF: - if (! TARGET_MACHO || TARGET_64BIT) - output_addr_const (file, x); - else - { - const char *name = XSTR (x, 0); - - /* Mark the decl as referenced so that cgraph will - output the function. */ - if (SYMBOL_REF_DECL (x)) - mark_decl_referenced (SYMBOL_REF_DECL (x)); - -#if TARGET_MACHO - if (MACHOPIC_INDIRECT - && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION) - name = machopic_indirection_name (x, /*stub_p=*/true); -#endif - assemble_name (file, name); - } - if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI) - && code == 'P' && ! SYMBOL_REF_LOCAL_P (x)) - fputs ("@PLT", file); - break; - - case LABEL_REF: - x = XEXP (x, 0); - /* FALLTHRU */ - case CODE_LABEL: - ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x)); - assemble_name (asm_out_file, buf); - break; - - case CONST_INT: - fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x)); - break; - - case CONST: - /* This used to output parentheses around the expression, - but that does not work on the 386 (either ATT or BSD assembler). */ - output_pic_addr_const (file, XEXP (x, 0), code); - break; - - case CONST_DOUBLE: - if (GET_MODE (x) == VOIDmode) - { - /* We can use %d if the number is <32 bits and positive. */ - if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0) - fprintf (file, "0x%lx%08lx", - (unsigned long) CONST_DOUBLE_HIGH (x), - (unsigned long) CONST_DOUBLE_LOW (x)); - else - fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x)); - } - else - /* We can't handle floating point constants; - PRINT_OPERAND must handle them. */ - output_operand_lossage ("floating constant misused"); - break; - - case PLUS: - /* Some assemblers need integer constants to appear first. */ - if (CONST_INT_P (XEXP (x, 0))) - { - output_pic_addr_const (file, XEXP (x, 0), code); - putc ('+', file); - output_pic_addr_const (file, XEXP (x, 1), code); - } - else - { - gcc_assert (CONST_INT_P (XEXP (x, 1))); - output_pic_addr_const (file, XEXP (x, 1), code); - putc ('+', file); - output_pic_addr_const (file, XEXP (x, 0), code); - } - break; - - case MINUS: - if (!TARGET_MACHO) - putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file); - output_pic_addr_const (file, XEXP (x, 0), code); - putc ('-', file); - output_pic_addr_const (file, XEXP (x, 1), code); - if (!TARGET_MACHO) - putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file); - break; - - case UNSPEC: - gcc_assert (XVECLEN (x, 0) == 1); - output_pic_addr_const (file, XVECEXP (x, 0, 0), code); - switch (XINT (x, 1)) - { - case UNSPEC_GOT: - fputs ("@GOT", file); - break; - case UNSPEC_GOTOFF: - fputs ("@GOTOFF", file); - break; - case UNSPEC_PLTOFF: - fputs ("@PLTOFF", file); - break; - case UNSPEC_GOTPCREL: - fputs (ASSEMBLER_DIALECT == ASM_ATT ? - "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file); - break; - case UNSPEC_GOTTPOFF: - /* FIXME: This might be @TPOFF in Sun ld too. */ - fputs ("@GOTTPOFF", file); - break; - case UNSPEC_TPOFF: - fputs ("@TPOFF", file); - break; - case UNSPEC_NTPOFF: - if (TARGET_64BIT) - fputs ("@TPOFF", file); - else - fputs ("@NTPOFF", file); - break; - case UNSPEC_DTPOFF: - fputs ("@DTPOFF", file); - break; - case UNSPEC_GOTNTPOFF: - if (TARGET_64BIT) - fputs (ASSEMBLER_DIALECT == ASM_ATT ? - "@GOTTPOFF(%rip)": "@GOTTPOFF[rip]", file); - else - fputs ("@GOTNTPOFF", file); - break; - case UNSPEC_INDNTPOFF: - fputs ("@INDNTPOFF", file); - break; -#if TARGET_MACHO - case UNSPEC_MACHOPIC_OFFSET: - putc ('-', file); - machopic_output_function_base_name (file); - break; -#endif - default: - output_operand_lossage ("invalid UNSPEC as operand"); - break; - } - break; - - default: - output_operand_lossage ("invalid expression as operand"); - } -} - -/* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL. - We need to emit DTP-relative relocations. */ - -static void ATTRIBUTE_UNUSED -i386_output_dwarf_dtprel (FILE *file, int size, rtx x) -{ - fputs (ASM_LONG, file); - output_addr_const (file, x); - fputs ("@DTPOFF", file); - switch (size) - { - case 4: - break; - case 8: - fputs (", 0", file); - break; - default: - gcc_unreachable (); - } -} - -/* Return true if X is a representation of the PIC register. This copes - with calls from ix86_find_base_term, where the register might have - been replaced by a cselib value. */ - -static bool -ix86_pic_register_p (rtx x) -{ - if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x)) - return (pic_offset_table_rtx - && rtx_equal_for_cselib_p (x, pic_offset_table_rtx)); - else - return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM; -} - -/* In the name of slightly smaller debug output, and to cater to - general assembler lossage, recognize PIC+GOTOFF and turn it back - into a direct symbol reference. - - On Darwin, this is necessary to avoid a crash, because Darwin - has a different PIC label for each routine but the DWARF debugging - information is not associated with any particular routine, so it's - necessary to remove references to the PIC label from RTL stored by - the DWARF output code. */ - -static rtx -ix86_delegitimize_address (rtx orig_x) -{ - rtx x = orig_x; - /* reg_addend is NULL or a multiple of some register. */ - rtx reg_addend = NULL_RTX; - /* const_addend is NULL or a const_int. */ - rtx const_addend = NULL_RTX; - /* This is the result, or NULL. */ - rtx result = NULL_RTX; - - if (MEM_P (x)) - x = XEXP (x, 0); - - if (TARGET_64BIT) - { - if (GET_CODE (x) != CONST - || GET_CODE (XEXP (x, 0)) != UNSPEC - || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL - || !MEM_P (orig_x)) - return orig_x; - return XVECEXP (XEXP (x, 0), 0, 0); - } - - if (GET_CODE (x) != PLUS - || GET_CODE (XEXP (x, 1)) != CONST) - return orig_x; - - if (ix86_pic_register_p (XEXP (x, 0))) - /* %ebx + GOT/GOTOFF */ - ; - else if (GET_CODE (XEXP (x, 0)) == PLUS) - { - /* %ebx + %reg * scale + GOT/GOTOFF */ - reg_addend = XEXP (x, 0); - if (ix86_pic_register_p (XEXP (reg_addend, 0))) - reg_addend = XEXP (reg_addend, 1); - else if (ix86_pic_register_p (XEXP (reg_addend, 1))) - reg_addend = XEXP (reg_addend, 0); - else - return orig_x; - if (!REG_P (reg_addend) - && GET_CODE (reg_addend) != MULT - && GET_CODE (reg_addend) != ASHIFT) - return orig_x; - } - else - return orig_x; - - x = XEXP (XEXP (x, 1), 0); - if (GET_CODE (x) == PLUS - && CONST_INT_P (XEXP (x, 1))) - { - const_addend = XEXP (x, 1); - x = XEXP (x, 0); - } - - if (GET_CODE (x) == UNSPEC - && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x)) - || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x)))) - result = XVECEXP (x, 0, 0); - - if (TARGET_MACHO && darwin_local_data_pic (x) - && !MEM_P (orig_x)) - result = XVECEXP (x, 0, 0); - - if (! result) - return orig_x; - - if (const_addend) - result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend)); - if (reg_addend) - result = gen_rtx_PLUS (Pmode, reg_addend, result); - return result; -} - -/* If X is a machine specific address (i.e. a symbol or label being - referenced as a displacement from the GOT implemented using an - UNSPEC), then return the base term. Otherwise return X. */ - -rtx -ix86_find_base_term (rtx x) -{ - rtx term; - - if (TARGET_64BIT) - { - if (GET_CODE (x) != CONST) - return x; - term = XEXP (x, 0); - if (GET_CODE (term) == PLUS - && (CONST_INT_P (XEXP (term, 1)) - || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE)) - term = XEXP (term, 0); - if (GET_CODE (term) != UNSPEC - || XINT (term, 1) != UNSPEC_GOTPCREL) - return x; - - return XVECEXP (term, 0, 0); - } - - return ix86_delegitimize_address (x); -} - -static void -put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse, - int fp, FILE *file) -{ - const char *suffix; - - if (mode == CCFPmode || mode == CCFPUmode) - { - enum rtx_code second_code, bypass_code; - ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code); - gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN); - code = ix86_fp_compare_code_to_integer (code); - mode = CCmode; - } - if (reverse) - code = reverse_condition (code); - - switch (code) - { - case EQ: - switch (mode) - { - case CCAmode: - suffix = "a"; - break; - - case CCCmode: - suffix = "c"; - break; - - case CCOmode: - suffix = "o"; - break; - - case CCSmode: - suffix = "s"; - break; - - default: - suffix = "e"; - } - break; - case NE: - switch (mode) - { - case CCAmode: - suffix = "na"; - break; - - case CCCmode: - suffix = "nc"; - break; - - case CCOmode: - suffix = "no"; - break; - - case CCSmode: - suffix = "ns"; - break; - - default: - suffix = "ne"; - } - break; - case GT: - gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode); - suffix = "g"; - break; - case GTU: - /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers. - Those same assemblers have the same but opposite lossage on cmov. */ - if (mode == CCmode) - suffix = fp ? "nbe" : "a"; - else if (mode == CCCmode) - suffix = "b"; - else - gcc_unreachable (); - break; - case LT: - switch (mode) - { - case CCNOmode: - case CCGOCmode: - suffix = "s"; - break; - - case CCmode: - case CCGCmode: - suffix = "l"; - break; - - default: - gcc_unreachable (); - } - break; - case LTU: - gcc_assert (mode == CCmode || mode == CCCmode); - suffix = "b"; - break; - case GE: - switch (mode) - { - case CCNOmode: - case CCGOCmode: - suffix = "ns"; - break; - - case CCmode: - case CCGCmode: - suffix = "ge"; - break; - - default: - gcc_unreachable (); - } - break; - case GEU: - /* ??? As above. */ - gcc_assert (mode == CCmode || mode == CCCmode); - suffix = fp ? "nb" : "ae"; - break; - case LE: - gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode); - suffix = "le"; - break; - case LEU: - /* ??? As above. */ - if (mode == CCmode) - suffix = "be"; - else if (mode == CCCmode) - suffix = fp ? "nb" : "ae"; - else - gcc_unreachable (); - break; - case UNORDERED: - suffix = fp ? "u" : "p"; - break; - case ORDERED: - suffix = fp ? "nu" : "np"; - break; - default: - gcc_unreachable (); - } - fputs (suffix, file); -} - -/* Print the name of register X to FILE based on its machine mode and number. - If CODE is 'w', pretend the mode is HImode. - If CODE is 'b', pretend the mode is QImode. - If CODE is 'k', pretend the mode is SImode. - If CODE is 'q', pretend the mode is DImode. - If CODE is 'x', pretend the mode is V4SFmode. - If CODE is 't', pretend the mode is V8SFmode. - If CODE is 'h', pretend the reg is the 'high' byte register. - If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. - If CODE is 'd', duplicate the operand for AVX instruction. - */ - -void -print_reg (rtx x, int code, FILE *file) -{ - const char *reg; - bool duplicated = code == 'd' && TARGET_AVX; - - gcc_assert (x == pc_rtx - || (REGNO (x) != ARG_POINTER_REGNUM - && REGNO (x) != FRAME_POINTER_REGNUM - && REGNO (x) != FLAGS_REG - && REGNO (x) != FPSR_REG - && REGNO (x) != FPCR_REG)); - - if (ASSEMBLER_DIALECT == ASM_ATT) - putc ('%', file); - - if (x == pc_rtx) - { - gcc_assert (TARGET_64BIT); - fputs ("rip", file); - return; - } - - if (code == 'w' || MMX_REG_P (x)) - code = 2; - else if (code == 'b') - code = 1; - else if (code == 'k') - code = 4; - else if (code == 'q') - code = 8; - else if (code == 'y') - code = 3; - else if (code == 'h') - code = 0; - else if (code == 'x') - code = 16; - else if (code == 't') - code = 32; - else - code = GET_MODE_SIZE (GET_MODE (x)); - - /* Irritatingly, AMD extended registers use different naming convention - from the normal registers. */ - if (REX_INT_REG_P (x)) - { - gcc_assert (TARGET_64BIT); - switch (code) - { - case 0: - error ("extended registers have no high halves"); - break; - case 1: - fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8); - break; - case 2: - fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8); - break; - case 4: - fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8); - break; - case 8: - fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8); - break; - default: - error ("unsupported operand size for extended register"); - break; - } - return; - } - - reg = NULL; - switch (code) - { - case 3: - if (STACK_TOP_P (x)) - { - reg = "st(0)"; - break; - } - /* FALLTHRU */ - case 8: - case 4: - case 12: - if (! ANY_FP_REG_P (x)) - putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file); - /* FALLTHRU */ - case 16: - case 2: - normal: - reg = hi_reg_name[REGNO (x)]; - break; - case 1: - if (REGNO (x) >= ARRAY_SIZE (qi_reg_name)) - goto normal; - reg = qi_reg_name[REGNO (x)]; - break; - case 0: - if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name)) - goto normal; - reg = qi_high_reg_name[REGNO (x)]; - break; - case 32: - if (SSE_REG_P (x)) - { - gcc_assert (!duplicated); - putc ('y', file); - fputs (hi_reg_name[REGNO (x)] + 1, file); - return; - } - break; - default: - gcc_unreachable (); - } - - fputs (reg, file); - if (duplicated) - { - if (ASSEMBLER_DIALECT == ASM_ATT) - fprintf (file, ", %%%s", reg); - else - fprintf (file, ", %s", reg); - } -} - -/* Locate some local-dynamic symbol still in use by this function - so that we can print its name in some tls_local_dynamic_base - pattern. */ - -static int -get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED) -{ - rtx x = *px; - - if (GET_CODE (x) == SYMBOL_REF - && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC) - { - cfun->machine->some_ld_name = XSTR (x, 0); - return 1; - } - - return 0; -} - -static const char * -get_some_local_dynamic_name (void) -{ - rtx insn; - - if (cfun->machine->some_ld_name) - return cfun->machine->some_ld_name; - - for (insn = get_insns (); insn ; insn = NEXT_INSN (insn)) - if (INSN_P (insn) - && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0)) - return cfun->machine->some_ld_name; - - gcc_unreachable (); -} - -/* Meaning of CODE: - L,W,B,Q,S,T -- print the opcode suffix for specified size of operand. - C -- print opcode suffix for set/cmov insn. - c -- like C, but print reversed condition - E,e -- likewise, but for compare-and-branch fused insn. - F,f -- likewise, but for floating-point. - O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.", - otherwise nothing - R -- print the prefix for register names. - z -- print the opcode suffix for the size of the current operand. - * -- print a star (in certain assembler syntax) - A -- print an absolute memory reference. - w -- print the operand as if it's a "word" (HImode) even if it isn't. - s -- print a shift double count, followed by the assemblers argument - delimiter. - b -- print the QImode name of the register for the indicated operand. - %b0 would print %al if operands[0] is reg 0. - w -- likewise, print the HImode name of the register. - k -- likewise, print the SImode name of the register. - q -- likewise, print the DImode name of the register. - x -- likewise, print the V4SFmode name of the register. - t -- likewise, print the V8SFmode name of the register. - h -- print the QImode name for a "high" register, either ah, bh, ch or dh. - y -- print "st(0)" instead of "st" as a register. - d -- print duplicated register operand for AVX instruction. - D -- print condition for SSE cmp instruction. - P -- if PIC, print an @PLT suffix. - X -- don't print any sort of PIC '@' suffix for a symbol. - & -- print some in-use local-dynamic symbol name. - H -- print a memory address offset by 8; used for sse high-parts - Y -- print condition for SSE5 com* instruction. - + -- print a branch hint as 'cs' or 'ds' prefix - ; -- print a semicolon (after prefixes due to bug in older gas). - */ - -void -print_operand (FILE *file, rtx x, int code) -{ - if (code) - { - switch (code) - { - case '*': - if (ASSEMBLER_DIALECT == ASM_ATT) - putc ('*', file); - return; - - case '&': - assemble_name (file, get_some_local_dynamic_name ()); - return; - - case 'A': - switch (ASSEMBLER_DIALECT) - { - case ASM_ATT: - putc ('*', file); - break; - - case ASM_INTEL: - /* Intel syntax. For absolute addresses, registers should not - be surrounded by braces. */ - if (!REG_P (x)) - { - putc ('[', file); - PRINT_OPERAND (file, x, 0); - putc (']', file); - return; - } - break; - - default: - gcc_unreachable (); - } - - PRINT_OPERAND (file, x, 0); - return; - - - case 'L': - if (ASSEMBLER_DIALECT == ASM_ATT) - putc ('l', file); - return; - - case 'W': - if (ASSEMBLER_DIALECT == ASM_ATT) - putc ('w', file); - return; - - case 'B': - if (ASSEMBLER_DIALECT == ASM_ATT) - putc ('b', file); - return; - - case 'Q': - if (ASSEMBLER_DIALECT == ASM_ATT) - putc ('l', file); - return; - - case 'S': - if (ASSEMBLER_DIALECT == ASM_ATT) - putc ('s', file); - return; - - case 'T': - if (ASSEMBLER_DIALECT == ASM_ATT) - putc ('t', file); - return; - - case 'z': - /* 387 opcodes don't get size suffixes if the operands are - registers. */ - if (STACK_REG_P (x)) - return; - - /* Likewise if using Intel opcodes. */ - if (ASSEMBLER_DIALECT == ASM_INTEL) - return; - - /* This is the size of op from size of operand. */ - switch (GET_MODE_SIZE (GET_MODE (x))) - { - case 1: - putc ('b', file); - return; - - case 2: - if (MEM_P (x)) - { -#ifdef HAVE_GAS_FILDS_FISTS - putc ('s', file); -#endif - return; - } - else - putc ('w', file); - return; - - case 4: - if (GET_MODE (x) == SFmode) - { - putc ('s', file); - return; - } - else - putc ('l', file); - return; - - case 12: - case 16: - putc ('t', file); - return; - - case 8: - if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) - { - if (MEM_P (x)) - { -#ifdef GAS_MNEMONICS - putc ('q', file); -#else - putc ('l', file); - putc ('l', file); -#endif - } - else - putc ('q', file); - } - else - putc ('l', file); - return; - - default: - gcc_unreachable (); - } - - case 'd': - case 'b': - case 'w': - case 'k': - case 'q': - case 'h': - case 't': - case 'y': - case 'x': - case 'X': - case 'P': - break; - - case 's': - if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT) - { - PRINT_OPERAND (file, x, 0); - fputs (", ", file); - } - return; - - case 'D': - /* Little bit of braindamage here. The SSE compare instructions - does use completely different names for the comparisons that the - fp conditional moves. */ - if (TARGET_AVX) - { - switch (GET_CODE (x)) - { - case EQ: - fputs ("eq", file); - break; - case UNEQ: - fputs ("eq_us", file); - break; - case LT: - fputs ("lt", file); - break; - case UNLT: - fputs ("nge", file); - break; - case LE: - fputs ("le", file); - break; - case UNLE: - fputs ("ngt", file); - break; - case UNORDERED: - fputs ("unord", file); - break; - case NE: - fputs ("neq", file); - break; - case LTGT: - fputs ("neq_oq", file); - break; - case GE: - fputs ("ge", file); - break; - case UNGE: - fputs ("nlt", file); - break; - case GT: - fputs ("gt", file); - break; - case UNGT: - fputs ("nle", file); - break; - case ORDERED: - fputs ("ord", file); - break; - default: - output_operand_lossage ("operand is not a condition code, invalid operand code 'D'"); - return; - } - } - else - { - switch (GET_CODE (x)) - { - case EQ: - case UNEQ: - fputs ("eq", file); - break; - case LT: - case UNLT: - fputs ("lt", file); - break; - case LE: - case UNLE: - fputs ("le", file); - break; - case UNORDERED: - fputs ("unord", file); - break; - case NE: - case LTGT: - fputs ("neq", file); - break; - case UNGE: - case GE: - fputs ("nlt", file); - break; - case UNGT: - case GT: - fputs ("nle", file); - break; - case ORDERED: - fputs ("ord", file); - break; - default: - output_operand_lossage ("operand is not a condition code, invalid operand code 'D'"); - return; - } - } - return; - case 'O': -#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX - if (ASSEMBLER_DIALECT == ASM_ATT) - { - switch (GET_MODE (x)) - { - case HImode: putc ('w', file); break; - case SImode: - case SFmode: putc ('l', file); break; - case DImode: - case DFmode: putc ('q', file); break; - default: gcc_unreachable (); - } - putc ('.', file); - } -#endif - return; - case 'C': - if (!COMPARISON_P (x)) - { - output_operand_lossage ("operand is neither a constant nor a " - "condition code, invalid operand code " - "'C'"); - return; - } - put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file); - return; - case 'F': - if (!COMPARISON_P (x)) - { - output_operand_lossage ("operand is neither a constant nor a " - "condition code, invalid operand code " - "'F'"); - return; - } -#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX - if (ASSEMBLER_DIALECT == ASM_ATT) - putc ('.', file); -#endif - put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file); - return; - - /* Like above, but reverse condition */ - case 'c': - /* Check to see if argument to %c is really a constant - and not a condition code which needs to be reversed. */ - if (!COMPARISON_P (x)) - { - output_operand_lossage ("operand is neither a constant nor a " - "condition code, invalid operand " - "code 'c'"); - return; - } - put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file); - return; - case 'f': - if (!COMPARISON_P (x)) - { - output_operand_lossage ("operand is neither a constant nor a " - "condition code, invalid operand " - "code 'f'"); - return; - } -#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX - if (ASSEMBLER_DIALECT == ASM_ATT) - putc ('.', file); -#endif - put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file); - return; - - case 'E': - put_condition_code (GET_CODE (x), CCmode, 0, 0, file); - return; - - case 'e': - put_condition_code (GET_CODE (x), CCmode, 1, 0, file); - return; - - case 'H': - /* It doesn't actually matter what mode we use here, as we're - only going to use this for printing. */ - x = adjust_address_nv (x, DImode, 8); - break; - - case '+': - { - rtx x; - - if (!optimize - || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS) - return; - - x = find_reg_note (current_output_insn, REG_BR_PROB, 0); - if (x) - { - int pred_val = INTVAL (XEXP (x, 0)); - - if (pred_val < REG_BR_PROB_BASE * 45 / 100 - || pred_val > REG_BR_PROB_BASE * 55 / 100) - { - int taken = pred_val > REG_BR_PROB_BASE / 2; - int cputaken = final_forward_branch_p (current_output_insn) == 0; - - /* Emit hints only in the case default branch prediction - heuristics would fail. */ - if (taken != cputaken) - { - /* We use 3e (DS) prefix for taken branches and - 2e (CS) prefix for not taken branches. */ - if (taken) - fputs ("ds ; ", file); - else - fputs ("cs ; ", file); - } - } - } - return; - } - - case 'Y': - switch (GET_CODE (x)) - { - case NE: - fputs ("neq", file); - break; - case EQ: - fputs ("eq", file); - break; - case GE: - case GEU: - fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file); - break; - case GT: - case GTU: - fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file); - break; - case LE: - case LEU: - fputs ("le", file); - break; - case LT: - case LTU: - fputs ("lt", file); - break; - case UNORDERED: - fputs ("unord", file); - break; - case ORDERED: - fputs ("ord", file); - break; - case UNEQ: - fputs ("ueq", file); - break; - case UNGE: - fputs ("nlt", file); - break; - case UNGT: - fputs ("nle", file); - break; - case UNLE: - fputs ("ule", file); - break; - case UNLT: - fputs ("ult", file); - break; - case LTGT: - fputs ("une", file); - break; - default: - output_operand_lossage ("operand is not a condition code, invalid operand code 'D'"); - return; - } - return; - - case ';': -#if TARGET_MACHO - fputs (" ; ", file); -#else - fputc (' ', file); -#endif - return; - - default: - output_operand_lossage ("invalid operand code '%c'", code); - } - } - - if (REG_P (x)) - print_reg (x, code, file); - - else if (MEM_P (x)) - { - /* No `byte ptr' prefix for call instructions or BLKmode operands. */ - if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P' - && GET_MODE (x) != BLKmode) - { - const char * size; - switch (GET_MODE_SIZE (GET_MODE (x))) - { - case 1: size = "BYTE"; break; - case 2: size = "WORD"; break; - case 4: size = "DWORD"; break; - case 8: size = "QWORD"; break; - case 12: size = "TBYTE"; break; - case 16: - if (GET_MODE (x) == XFmode) - size = "TBYTE"; - else - size = "XMMWORD"; - break; - case 32: size = "YMMWORD"; break; - default: - gcc_unreachable (); - } - - /* Check for explicit size override (codes 'b', 'w' and 'k') */ - if (code == 'b') - size = "BYTE"; - else if (code == 'w') - size = "WORD"; - else if (code == 'k') - size = "DWORD"; - - fputs (size, file); - fputs (" PTR ", file); - } - - x = XEXP (x, 0); - /* Avoid (%rip) for call operands. */ - if (CONSTANT_ADDRESS_P (x) && code == 'P' - && !CONST_INT_P (x)) - output_addr_const (file, x); - else if (this_is_asm_operands && ! address_operand (x, VOIDmode)) - output_operand_lossage ("invalid constraints for operand"); - else - output_address (x); - } - - else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode) - { - REAL_VALUE_TYPE r; - long l; - - REAL_VALUE_FROM_CONST_DOUBLE (r, x); - REAL_VALUE_TO_TARGET_SINGLE (r, l); - - if (ASSEMBLER_DIALECT == ASM_ATT) - putc ('$', file); - fprintf (file, "0x%08lx", (long unsigned int) l); - } - - /* These float cases don't actually occur as immediate operands. */ - else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode) - { - char dstr[30]; - - real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1); - fprintf (file, "%s", dstr); - } - - else if (GET_CODE (x) == CONST_DOUBLE - && GET_MODE (x) == XFmode) - { - char dstr[30]; - - real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1); - fprintf (file, "%s", dstr); - } - - else - { - /* We have patterns that allow zero sets of memory, for instance. - In 64-bit mode, we should probably support all 8-byte vectors, - since we can in fact encode that into an immediate. */ - if (GET_CODE (x) == CONST_VECTOR) - { - gcc_assert (x == CONST0_RTX (GET_MODE (x))); - x = const0_rtx; - } - - if (code != 'P') - { - if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE) - { - if (ASSEMBLER_DIALECT == ASM_ATT) - putc ('$', file); - } - else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF - || GET_CODE (x) == LABEL_REF) - { - if (ASSEMBLER_DIALECT == ASM_ATT) - putc ('$', file); - else - fputs ("OFFSET FLAT:", file); - } - } - if (CONST_INT_P (x)) - fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x)); - else if (flag_pic) - output_pic_addr_const (file, x, code); - else - output_addr_const (file, x); - } -} - -/* Print a memory operand whose address is ADDR. */ - -void -print_operand_address (FILE *file, rtx addr) -{ - struct ix86_address parts; - rtx base, index, disp; - int scale; - int ok = ix86_decompose_address (addr, &parts); - - gcc_assert (ok); - - base = parts.base; - index = parts.index; - disp = parts.disp; - scale = parts.scale; - - switch (parts.seg) - { - case SEG_DEFAULT: - break; - case SEG_FS: - case SEG_GS: - if (ASSEMBLER_DIALECT == ASM_ATT) - putc ('%', file); - fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file); - break; - default: - gcc_unreachable (); - } - - /* Use one byte shorter RIP relative addressing for 64bit mode. */ - if (TARGET_64BIT && !base && !index) - { - rtx symbol = disp; - - if (GET_CODE (disp) == CONST - && GET_CODE (XEXP (disp, 0)) == PLUS - && CONST_INT_P (XEXP (XEXP (disp, 0), 1))) - symbol = XEXP (XEXP (disp, 0), 0); - - if (GET_CODE (symbol) == LABEL_REF - || (GET_CODE (symbol) == SYMBOL_REF - && SYMBOL_REF_TLS_MODEL (symbol) == 0)) - base = pc_rtx; - } - if (!base && !index) - { - /* Displacement only requires special attention. */ - - if (CONST_INT_P (disp)) - { - if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT) - fputs ("ds:", file); - fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp)); - } - else if (flag_pic) - output_pic_addr_const (file, disp, 0); - else - output_addr_const (file, disp); - } - else - { - if (ASSEMBLER_DIALECT == ASM_ATT) - { - if (disp) - { - if (flag_pic) - output_pic_addr_const (file, disp, 0); - else if (GET_CODE (disp) == LABEL_REF) - output_asm_label (disp); - else - output_addr_const (file, disp); - } - - putc ('(', file); - if (base) - print_reg (base, 0, file); - if (index) - { - putc (',', file); - print_reg (index, 0, file); - if (scale != 1) - fprintf (file, ",%d", scale); - } - putc (')', file); - } - else - { - rtx offset = NULL_RTX; - - if (disp) - { - /* Pull out the offset of a symbol; print any symbol itself. */ - if (GET_CODE (disp) == CONST - && GET_CODE (XEXP (disp, 0)) == PLUS - && CONST_INT_P (XEXP (XEXP (disp, 0), 1))) - { - offset = XEXP (XEXP (disp, 0), 1); - disp = gen_rtx_CONST (VOIDmode, - XEXP (XEXP (disp, 0), 0)); - } - - if (flag_pic) - output_pic_addr_const (file, disp, 0); - else if (GET_CODE (disp) == LABEL_REF) - output_asm_label (disp); - else if (CONST_INT_P (disp)) - offset = disp; - else - output_addr_const (file, disp); - } - - putc ('[', file); - if (base) - { - print_reg (base, 0, file); - if (offset) - { - if (INTVAL (offset) >= 0) - putc ('+', file); - fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset)); - } - } - else if (offset) - fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset)); - else - putc ('0', file); - - if (index) - { - putc ('+', file); - print_reg (index, 0, file); - if (scale != 1) - fprintf (file, "*%d", scale); - } - putc (']', file); - } - } -} - -bool -output_addr_const_extra (FILE *file, rtx x) -{ - rtx op; - - if (GET_CODE (x) != UNSPEC) - return false; - - op = XVECEXP (x, 0, 0); - switch (XINT (x, 1)) - { - case UNSPEC_GOTTPOFF: - output_addr_const (file, op); - /* FIXME: This might be @TPOFF in Sun ld. */ - fputs ("@GOTTPOFF", file); - break; - case UNSPEC_TPOFF: - output_addr_const (file, op); - fputs ("@TPOFF", file); - break; - case UNSPEC_NTPOFF: - output_addr_const (file, op); - if (TARGET_64BIT) - fputs ("@TPOFF", file); - else - fputs ("@NTPOFF", file); - break; - case UNSPEC_DTPOFF: - output_addr_const (file, op); - fputs ("@DTPOFF", file); - break; - case UNSPEC_GOTNTPOFF: - output_addr_const (file, op); - if (TARGET_64BIT) - fputs (ASSEMBLER_DIALECT == ASM_ATT ? - "@GOTTPOFF(%rip)" : "@GOTTPOFF[rip]", file); - else - fputs ("@GOTNTPOFF", file); - break; - case UNSPEC_INDNTPOFF: - output_addr_const (file, op); - fputs ("@INDNTPOFF", file); - break; -#if TARGET_MACHO - case UNSPEC_MACHOPIC_OFFSET: - output_addr_const (file, op); - putc ('-', file); - machopic_output_function_base_name (file); - break; -#endif - - default: - return false; - } - - return true; -} - -/* Split one or more DImode RTL references into pairs of SImode - references. The RTL can be REG, offsettable MEM, integer constant, or - CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to - split and "num" is its length. lo_half and hi_half are output arrays - that parallel "operands". */ - -void -split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[]) -{ - while (num--) - { - rtx op = operands[num]; - - /* simplify_subreg refuse to split volatile memory addresses, - but we still have to handle it. */ - if (MEM_P (op)) - { - lo_half[num] = adjust_address (op, SImode, 0); - hi_half[num] = adjust_address (op, SImode, 4); - } - else - { - lo_half[num] = simplify_gen_subreg (SImode, op, - GET_MODE (op) == VOIDmode - ? DImode : GET_MODE (op), 0); - hi_half[num] = simplify_gen_subreg (SImode, op, - GET_MODE (op) == VOIDmode - ? DImode : GET_MODE (op), 4); - } - } -} -/* Split one or more TImode RTL references into pairs of DImode - references. The RTL can be REG, offsettable MEM, integer constant, or - CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to - split and "num" is its length. lo_half and hi_half are output arrays - that parallel "operands". */ - -void -split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[]) -{ - while (num--) - { - rtx op = operands[num]; - - /* simplify_subreg refuse to split volatile memory addresses, but we - still have to handle it. */ - if (MEM_P (op)) - { - lo_half[num] = adjust_address (op, DImode, 0); - hi_half[num] = adjust_address (op, DImode, 8); - } - else - { - lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0); - hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8); - } - } -} - -/* Output code to perform a 387 binary operation in INSN, one of PLUS, - MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3] - is the expression of the binary operation. The output may either be - emitted here, or returned to the caller, like all output_* functions. - - There is no guarantee that the operands are the same mode, as they - might be within FLOAT or FLOAT_EXTEND expressions. */ - -#ifndef SYSV386_COMPAT -/* Set to 1 for compatibility with brain-damaged assemblers. No-one - wants to fix the assemblers because that causes incompatibility - with gcc. No-one wants to fix gcc because that causes - incompatibility with assemblers... You can use the option of - -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */ -#define SYSV386_COMPAT 1 -#endif - -const char * -output_387_binary_op (rtx insn, rtx *operands) -{ - static char buf[40]; - const char *p; - const char *ssep; - int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]); - -#ifdef ENABLE_CHECKING - /* Even if we do not want to check the inputs, this documents input - constraints. Which helps in understanding the following code. */ - if (STACK_REG_P (operands[0]) - && ((REG_P (operands[1]) - && REGNO (operands[0]) == REGNO (operands[1]) - && (STACK_REG_P (operands[2]) || MEM_P (operands[2]))) - || (REG_P (operands[2]) - && REGNO (operands[0]) == REGNO (operands[2]) - && (STACK_REG_P (operands[1]) || MEM_P (operands[1])))) - && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2]))) - ; /* ok */ - else - gcc_assert (is_sse); -#endif - - switch (GET_CODE (operands[3])) - { - case PLUS: - if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT - || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT) - p = "fiadd"; - else - p = "fadd"; - ssep = "vadd"; - break; - - case MINUS: - if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT - || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT) - p = "fisub"; - else - p = "fsub"; - ssep = "vsub"; - break; - - case MULT: - if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT - || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT) - p = "fimul"; - else - p = "fmul"; - ssep = "vmul"; - break; - - case DIV: - if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT - || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT) - p = "fidiv"; - else - p = "fdiv"; - ssep = "vdiv"; - break; - - default: - gcc_unreachable (); - } - - if (is_sse) - { - if (TARGET_AVX) - { - strcpy (buf, ssep); - if (GET_MODE (operands[0]) == SFmode) - strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}"); - else - strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}"); - } - else - { - strcpy (buf, ssep + 1); - if (GET_MODE (operands[0]) == SFmode) - strcat (buf, "ss\t{%2, %0|%0, %2}"); - else - strcat (buf, "sd\t{%2, %0|%0, %2}"); - } - return buf; - } - strcpy (buf, p); - - switch (GET_CODE (operands[3])) - { - case MULT: - case PLUS: - if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2])) - { - rtx temp = operands[2]; - operands[2] = operands[1]; - operands[1] = temp; - } - - /* know operands[0] == operands[1]. */ - - if (MEM_P (operands[2])) - { - p = "%z2\t%2"; - break; - } - - if (find_regno_note (insn, REG_DEAD, REGNO (operands[2]))) - { - if (STACK_TOP_P (operands[0])) - /* How is it that we are storing to a dead operand[2]? - Well, presumably operands[1] is dead too. We can't - store the result to st(0) as st(0) gets popped on this - instruction. Instead store to operands[2] (which I - think has to be st(1)). st(1) will be popped later. - gcc <= 2.8.1 didn't have this check and generated - assembly code that the Unixware assembler rejected. */ - p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */ - else - p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */ - break; - } - - if (STACK_TOP_P (operands[0])) - p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */ - else - p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */ - break; - - case MINUS: - case DIV: - if (MEM_P (operands[1])) - { - p = "r%z1\t%1"; - break; - } - - if (MEM_P (operands[2])) - { - p = "%z2\t%2"; - break; - } - - if (find_regno_note (insn, REG_DEAD, REGNO (operands[2]))) - { -#if SYSV386_COMPAT - /* The SystemV/386 SVR3.2 assembler, and probably all AT&T - derived assemblers, confusingly reverse the direction of - the operation for fsub{r} and fdiv{r} when the - destination register is not st(0). The Intel assembler - doesn't have this brain damage. Read !SYSV386_COMPAT to - figure out what the hardware really does. */ - if (STACK_TOP_P (operands[0])) - p = "{p\t%0, %2|rp\t%2, %0}"; - else - p = "{rp\t%2, %0|p\t%0, %2}"; -#else - if (STACK_TOP_P (operands[0])) - /* As above for fmul/fadd, we can't store to st(0). */ - p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */ - else - p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */ -#endif - break; - } - - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - { -#if SYSV386_COMPAT - if (STACK_TOP_P (operands[0])) - p = "{rp\t%0, %1|p\t%1, %0}"; - else - p = "{p\t%1, %0|rp\t%0, %1}"; -#else - if (STACK_TOP_P (operands[0])) - p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */ - else - p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */ -#endif - break; - } - - if (STACK_TOP_P (operands[0])) - { - if (STACK_TOP_P (operands[1])) - p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */ - else - p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */ - break; - } - else if (STACK_TOP_P (operands[1])) - { -#if SYSV386_COMPAT - p = "{\t%1, %0|r\t%0, %1}"; -#else - p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */ -#endif - } - else - { -#if SYSV386_COMPAT - p = "{r\t%2, %0|\t%0, %2}"; -#else - p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */ -#endif - } - break; - - default: - gcc_unreachable (); - } - - strcat (buf, p); - return buf; -} - -/* Return needed mode for entity in optimize_mode_switching pass. */ - -int -ix86_mode_needed (int entity, rtx insn) -{ - enum attr_i387_cw mode; - - /* The mode UNINITIALIZED is used to store control word after a - function call or ASM pattern. The mode ANY specify that function - has no requirements on the control word and make no changes in the - bits we are interested in. */ - - if (CALL_P (insn) - || (NONJUMP_INSN_P (insn) - && (asm_noperands (PATTERN (insn)) >= 0 - || GET_CODE (PATTERN (insn)) == ASM_INPUT))) - return I387_CW_UNINITIALIZED; - - if (recog_memoized (insn) < 0) - return I387_CW_ANY; - - mode = get_attr_i387_cw (insn); - - switch (entity) - { - case I387_TRUNC: - if (mode == I387_CW_TRUNC) - return mode; - break; - - case I387_FLOOR: - if (mode == I387_CW_FLOOR) - return mode; - break; - - case I387_CEIL: - if (mode == I387_CW_CEIL) - return mode; - break; - - case I387_MASK_PM: - if (mode == I387_CW_MASK_PM) - return mode; - break; - - default: - gcc_unreachable (); - } - - return I387_CW_ANY; -} - -/* Output code to initialize control word copies used by trunc?f?i and - rounding patterns. CURRENT_MODE is set to current control word, - while NEW_MODE is set to new control word. */ - -void -emit_i387_cw_initialization (int mode) -{ - rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED); - rtx new_mode; - - enum ix86_stack_slot slot; - - rtx reg = gen_reg_rtx (HImode); - - emit_insn (gen_x86_fnstcw_1 (stored_mode)); - emit_move_insn (reg, copy_rtx (stored_mode)); - - if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL - || optimize_function_for_size_p (cfun)) - { - switch (mode) - { - case I387_CW_TRUNC: - /* round toward zero (truncate) */ - emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00))); - slot = SLOT_CW_TRUNC; - break; - - case I387_CW_FLOOR: - /* round down toward -oo */ - emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00))); - emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400))); - slot = SLOT_CW_FLOOR; - break; - - case I387_CW_CEIL: - /* round up toward +oo */ - emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00))); - emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800))); - slot = SLOT_CW_CEIL; - break; - - case I387_CW_MASK_PM: - /* mask precision exception for nearbyint() */ - emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020))); - slot = SLOT_CW_MASK_PM; - break; - - default: - gcc_unreachable (); - } - } - else - { - switch (mode) - { - case I387_CW_TRUNC: - /* round toward zero (truncate) */ - emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc))); - slot = SLOT_CW_TRUNC; - break; - - case I387_CW_FLOOR: - /* round down toward -oo */ - emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4))); - slot = SLOT_CW_FLOOR; - break; - - case I387_CW_CEIL: - /* round up toward +oo */ - emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8))); - slot = SLOT_CW_CEIL; - break; - - case I387_CW_MASK_PM: - /* mask precision exception for nearbyint() */ - emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020))); - slot = SLOT_CW_MASK_PM; - break; - - default: - gcc_unreachable (); - } - } - - gcc_assert (slot < MAX_386_STACK_LOCALS); - - new_mode = assign_386_stack_local (HImode, slot); - emit_move_insn (new_mode, reg); -} - -/* Output code for INSN to convert a float to a signed int. OPERANDS - are the insn operands. The output may be [HSD]Imode and the input - operand may be [SDX]Fmode. */ - -const char * -output_fix_trunc (rtx insn, rtx *operands, int fisttp) -{ - int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0; - int dimode_p = GET_MODE (operands[0]) == DImode; - int round_mode = get_attr_i387_cw (insn); - - /* Jump through a hoop or two for DImode, since the hardware has no - non-popping instruction. We used to do this a different way, but - that was somewhat fragile and broke with post-reload splitters. */ - if ((dimode_p || fisttp) && !stack_top_dies) - output_asm_insn ("fld\t%y1", operands); - - gcc_assert (STACK_TOP_P (operands[1])); - gcc_assert (MEM_P (operands[0])); - gcc_assert (GET_MODE (operands[1]) != TFmode); - - if (fisttp) - output_asm_insn ("fisttp%z0\t%0", operands); - else - { - if (round_mode != I387_CW_ANY) - output_asm_insn ("fldcw\t%3", operands); - if (stack_top_dies || dimode_p) - output_asm_insn ("fistp%z0\t%0", operands); - else - output_asm_insn ("fist%z0\t%0", operands); - if (round_mode != I387_CW_ANY) - output_asm_insn ("fldcw\t%2", operands); - } - - return ""; -} - -/* Output code for x87 ffreep insn. The OPNO argument, which may only - have the values zero or one, indicates the ffreep insn's operand - from the OPERANDS array. */ - -static const char * -output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno) -{ - if (TARGET_USE_FFREEP) -#ifdef HAVE_AS_IX86_FFREEP - return opno ? "ffreep\t%y1" : "ffreep\t%y0"; -#else - { - static char retval[32]; - int regno = REGNO (operands[opno]); - - gcc_assert (FP_REGNO_P (regno)); - - regno -= FIRST_STACK_REG; - - snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno); - return retval; - } -#endif - - return opno ? "fstp\t%y1" : "fstp\t%y0"; -} - - -/* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi - should be used. UNORDERED_P is true when fucom should be used. */ - -const char * -output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p) -{ - int stack_top_dies; - rtx cmp_op0, cmp_op1; - int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]); - - if (eflags_p) - { - cmp_op0 = operands[0]; - cmp_op1 = operands[1]; - } - else - { - cmp_op0 = operands[1]; - cmp_op1 = operands[2]; - } - - if (is_sse) - { - static const char ucomiss[] = "vucomiss\t{%1, %0|%0, %1}"; - static const char ucomisd[] = "vucomisd\t{%1, %0|%0, %1}"; - static const char comiss[] = "vcomiss\t{%1, %0|%0, %1}"; - static const char comisd[] = "vcomisd\t{%1, %0|%0, %1}"; - - if (GET_MODE (operands[0]) == SFmode) - if (unordered_p) - return &ucomiss[TARGET_AVX ? 0 : 1]; - else - return &comiss[TARGET_AVX ? 0 : 1]; - else - if (unordered_p) - return &ucomisd[TARGET_AVX ? 0 : 1]; - else - return &comisd[TARGET_AVX ? 0 : 1]; - } - - gcc_assert (STACK_TOP_P (cmp_op0)); - - stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0; - - if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1))) - { - if (stack_top_dies) - { - output_asm_insn ("ftst\n\tfnstsw\t%0", operands); - return output_387_ffreep (operands, 1); - } - else - return "ftst\n\tfnstsw\t%0"; - } - - if (STACK_REG_P (cmp_op1) - && stack_top_dies - && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1)) - && REGNO (cmp_op1) != FIRST_STACK_REG) - { - /* If both the top of the 387 stack dies, and the other operand - is also a stack register that dies, then this must be a - `fcompp' float compare */ - - if (eflags_p) - { - /* There is no double popping fcomi variant. Fortunately, - eflags is immune from the fstp's cc clobbering. */ - if (unordered_p) - output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands); - else - output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands); - return output_387_ffreep (operands, 0); - } - else - { - if (unordered_p) - return "fucompp\n\tfnstsw\t%0"; - else - return "fcompp\n\tfnstsw\t%0"; - } - } - else - { - /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */ - - static const char * const alt[16] = - { - "fcom%z2\t%y2\n\tfnstsw\t%0", - "fcomp%z2\t%y2\n\tfnstsw\t%0", - "fucom%z2\t%y2\n\tfnstsw\t%0", - "fucomp%z2\t%y2\n\tfnstsw\t%0", - - "ficom%z2\t%y2\n\tfnstsw\t%0", - "ficomp%z2\t%y2\n\tfnstsw\t%0", - NULL, - NULL, - - "fcomi\t{%y1, %0|%0, %y1}", - "fcomip\t{%y1, %0|%0, %y1}", - "fucomi\t{%y1, %0|%0, %y1}", - "fucomip\t{%y1, %0|%0, %y1}", - - NULL, - NULL, - NULL, - NULL - }; - - int mask; - const char *ret; - - mask = eflags_p << 3; - mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2; - mask |= unordered_p << 1; - mask |= stack_top_dies; - - gcc_assert (mask < 16); - ret = alt[mask]; - gcc_assert (ret); - - return ret; - } -} - -void -ix86_output_addr_vec_elt (FILE *file, int value) -{ - const char *directive = ASM_LONG; - -#ifdef ASM_QUAD - if (TARGET_64BIT) - directive = ASM_QUAD; -#else - gcc_assert (!TARGET_64BIT); -#endif - - fprintf (file, "%s%s%d\n", directive, LPREFIX, value); -} - -void -ix86_output_addr_diff_elt (FILE *file, int value, int rel) -{ - const char *directive = ASM_LONG; - -#ifdef ASM_QUAD - if (TARGET_64BIT && CASE_VECTOR_MODE == DImode) - directive = ASM_QUAD; -#else - gcc_assert (!TARGET_64BIT); -#endif - /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */ - if (TARGET_64BIT || TARGET_VXWORKS_RTP) - fprintf (file, "%s%s%d-%s%d\n", - directive, LPREFIX, value, LPREFIX, rel); - else if (HAVE_AS_GOTOFF_IN_DATA) - fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value); -#if TARGET_MACHO - else if (TARGET_MACHO) - { - fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value); - machopic_output_function_base_name (file); - fprintf(file, "\n"); - } -#endif - else - asm_fprintf (file, "%s%U%s+[.-%s%d]\n", - ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value); -} - -/* Generate either "mov $0, reg" or "xor reg, reg", as appropriate - for the target. */ - -void -ix86_expand_clear (rtx dest) -{ - rtx tmp; - - /* We play register width games, which are only valid after reload. */ - gcc_assert (reload_completed); - - /* Avoid HImode and its attendant prefix byte. */ - if (GET_MODE_SIZE (GET_MODE (dest)) < 4) - dest = gen_rtx_REG (SImode, REGNO (dest)); - tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx); - - /* This predicate should match that for movsi_xor and movdi_xor_rex64. */ - if (reload_completed && (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ())) - { - rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); - tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob)); - } - - emit_insn (tmp); -} - -/* X is an unchanging MEM. If it is a constant pool reference, return - the constant pool rtx, else NULL. */ - -rtx -maybe_get_pool_constant (rtx x) -{ - x = ix86_delegitimize_address (XEXP (x, 0)); - - if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x)) - return get_pool_constant (x); - - return NULL_RTX; -} - -void -ix86_expand_move (enum machine_mode mode, rtx operands[]) -{ - rtx op0, op1; - enum tls_model model; - - op0 = operands[0]; - op1 = operands[1]; - - if (GET_CODE (op1) == SYMBOL_REF) - { - model = SYMBOL_REF_TLS_MODEL (op1); - if (model) - { - op1 = legitimize_tls_address (op1, model, true); - op1 = force_operand (op1, op0); - if (op1 == op0) - return; - } - else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES - && SYMBOL_REF_DLLIMPORT_P (op1)) - op1 = legitimize_dllimport_symbol (op1, false); - } - else if (GET_CODE (op1) == CONST - && GET_CODE (XEXP (op1, 0)) == PLUS - && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF) - { - rtx addend = XEXP (XEXP (op1, 0), 1); - rtx symbol = XEXP (XEXP (op1, 0), 0); - rtx tmp = NULL; - - model = SYMBOL_REF_TLS_MODEL (symbol); - if (model) - tmp = legitimize_tls_address (symbol, model, true); - else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES - && SYMBOL_REF_DLLIMPORT_P (symbol)) - tmp = legitimize_dllimport_symbol (symbol, true); - - if (tmp) - { - tmp = force_operand (tmp, NULL); - tmp = expand_simple_binop (Pmode, PLUS, tmp, addend, - op0, 1, OPTAB_DIRECT); - if (tmp == op0) - return; - } - } - - if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode)) - { - if (TARGET_MACHO && !TARGET_64BIT) - { -#if TARGET_MACHO - if (MACHOPIC_PURE) - { - rtx temp = ((reload_in_progress - || ((op0 && REG_P (op0)) - && mode == Pmode)) - ? op0 : gen_reg_rtx (Pmode)); - op1 = machopic_indirect_data_reference (op1, temp); - op1 = machopic_legitimize_pic_address (op1, mode, - temp == op1 ? 0 : temp); - } - else if (MACHOPIC_INDIRECT) - op1 = machopic_indirect_data_reference (op1, 0); - if (op0 == op1) - return; -#endif - } - else - { - if (MEM_P (op0)) - op1 = force_reg (Pmode, op1); - else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode)) - { - rtx reg = !can_create_pseudo_p () ? op0 : NULL_RTX; - op1 = legitimize_pic_address (op1, reg); - if (op0 == op1) - return; - } - } - } - else - { - if (MEM_P (op0) - && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode) - || !push_operand (op0, mode)) - && MEM_P (op1)) - op1 = force_reg (mode, op1); - - if (push_operand (op0, mode) - && ! general_no_elim_operand (op1, mode)) - op1 = copy_to_mode_reg (mode, op1); - - /* Force large constants in 64bit compilation into register - to get them CSEed. */ - if (can_create_pseudo_p () - && (mode == DImode) && TARGET_64BIT - && immediate_operand (op1, mode) - && !x86_64_zext_immediate_operand (op1, VOIDmode) - && !register_operand (op0, mode) - && optimize) - op1 = copy_to_mode_reg (mode, op1); - - if (can_create_pseudo_p () - && FLOAT_MODE_P (mode) - && GET_CODE (op1) == CONST_DOUBLE) - { - /* If we are loading a floating point constant to a register, - force the value to memory now, since we'll get better code - out the back end. */ - - op1 = validize_mem (force_const_mem (mode, op1)); - if (!register_operand (op0, mode)) - { - rtx temp = gen_reg_rtx (mode); - emit_insn (gen_rtx_SET (VOIDmode, temp, op1)); - emit_move_insn (op0, temp); - return; - } - } - } - - emit_insn (gen_rtx_SET (VOIDmode, op0, op1)); -} - -void -ix86_expand_vector_move (enum machine_mode mode, rtx operands[]) -{ - rtx op0 = operands[0], op1 = operands[1]; - unsigned int align = GET_MODE_ALIGNMENT (mode); - - /* Force constants other than zero into memory. We do not know how - the instructions used to build constants modify the upper 64 bits - of the register, once we have that information we may be able - to handle some of them more efficiently. */ - if (can_create_pseudo_p () - && register_operand (op0, mode) - && (CONSTANT_P (op1) - || (GET_CODE (op1) == SUBREG - && CONSTANT_P (SUBREG_REG (op1)))) - && !standard_sse_constant_p (op1)) - op1 = validize_mem (force_const_mem (mode, op1)); - - /* We need to check memory alignment for SSE mode since attribute - can make operands unaligned. */ - if (can_create_pseudo_p () - && SSE_REG_MODE_P (mode) - && ((MEM_P (op0) && (MEM_ALIGN (op0) < align)) - || (MEM_P (op1) && (MEM_ALIGN (op1) < align)))) - { - rtx tmp[2]; - - /* ix86_expand_vector_move_misalign() does not like constants ... */ - if (CONSTANT_P (op1) - || (GET_CODE (op1) == SUBREG - && CONSTANT_P (SUBREG_REG (op1)))) - op1 = validize_mem (force_const_mem (mode, op1)); - - /* ... nor both arguments in memory. */ - if (!register_operand (op0, mode) - && !register_operand (op1, mode)) - op1 = force_reg (mode, op1); - - tmp[0] = op0; tmp[1] = op1; - ix86_expand_vector_move_misalign (mode, tmp); - return; - } - - /* Make operand1 a register if it isn't already. */ - if (can_create_pseudo_p () - && !register_operand (op0, mode) - && !register_operand (op1, mode)) - { - emit_move_insn (op0, force_reg (GET_MODE (op0), op1)); - return; - } - - emit_insn (gen_rtx_SET (VOIDmode, op0, op1)); -} - -/* Implement the movmisalign patterns for SSE. Non-SSE modes go - straight to ix86_expand_vector_move. */ -/* Code generation for scalar reg-reg moves of single and double precision data: - if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true) - movaps reg, reg - else - movss reg, reg - if (x86_sse_partial_reg_dependency == true) - movapd reg, reg - else - movsd reg, reg - - Code generation for scalar loads of double precision data: - if (x86_sse_split_regs == true) - movlpd mem, reg (gas syntax) - else - movsd mem, reg - - Code generation for unaligned packed loads of single precision data - (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency): - if (x86_sse_unaligned_move_optimal) - movups mem, reg - - if (x86_sse_partial_reg_dependency == true) - { - xorps reg, reg - movlps mem, reg - movhps mem+8, reg - } - else - { - movlps mem, reg - movhps mem+8, reg - } - - Code generation for unaligned packed loads of double precision data - (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs): - if (x86_sse_unaligned_move_optimal) - movupd mem, reg - - if (x86_sse_split_regs == true) - { - movlpd mem, reg - movhpd mem+8, reg - } - else - { - movsd mem, reg - movhpd mem+8, reg - } - */ - -void -ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[]) -{ - rtx op0, op1, m; - - op0 = operands[0]; - op1 = operands[1]; - - if (TARGET_AVX) - { - switch (GET_MODE_CLASS (mode)) - { - case MODE_VECTOR_INT: - case MODE_INT: - switch (GET_MODE_SIZE (mode)) - { - case 16: - op0 = gen_lowpart (V16QImode, op0); - op1 = gen_lowpart (V16QImode, op1); - emit_insn (gen_avx_movdqu (op0, op1)); - break; - case 32: - op0 = gen_lowpart (V32QImode, op0); - op1 = gen_lowpart (V32QImode, op1); - emit_insn (gen_avx_movdqu256 (op0, op1)); - break; - default: - gcc_unreachable (); - } - break; - case MODE_VECTOR_FLOAT: - op0 = gen_lowpart (mode, op0); - op1 = gen_lowpart (mode, op1); - - switch (mode) - { - case V4SFmode: - emit_insn (gen_avx_movups (op0, op1)); - break; - case V8SFmode: - emit_insn (gen_avx_movups256 (op0, op1)); - break; - case V2DFmode: - emit_insn (gen_avx_movupd (op0, op1)); - break; - case V4DFmode: - emit_insn (gen_avx_movupd256 (op0, op1)); - break; - default: - gcc_unreachable (); - } - break; - - default: - gcc_unreachable (); - } - - return; - } - - if (MEM_P (op1)) - { - /* If we're optimizing for size, movups is the smallest. */ - if (optimize_insn_for_size_p ()) - { - op0 = gen_lowpart (V4SFmode, op0); - op1 = gen_lowpart (V4SFmode, op1); - emit_insn (gen_sse_movups (op0, op1)); - return; - } - - /* ??? If we have typed data, then it would appear that using - movdqu is the only way to get unaligned data loaded with - integer type. */ - if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT) - { - op0 = gen_lowpart (V16QImode, op0); - op1 = gen_lowpart (V16QImode, op1); - emit_insn (gen_sse2_movdqu (op0, op1)); - return; - } - - if (TARGET_SSE2 && mode == V2DFmode) - { - rtx zero; - - if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL) - { - op0 = gen_lowpart (V2DFmode, op0); - op1 = gen_lowpart (V2DFmode, op1); - emit_insn (gen_sse2_movupd (op0, op1)); - return; - } - - /* When SSE registers are split into halves, we can avoid - writing to the top half twice. */ - if (TARGET_SSE_SPLIT_REGS) - { - emit_clobber (op0); - zero = op0; - } - else - { - /* ??? Not sure about the best option for the Intel chips. - The following would seem to satisfy; the register is - entirely cleared, breaking the dependency chain. We - then store to the upper half, with a dependency depth - of one. A rumor has it that Intel recommends two movsd - followed by an unpacklpd, but this is unconfirmed. And - given that the dependency depth of the unpacklpd would - still be one, I'm not sure why this would be better. */ - zero = CONST0_RTX (V2DFmode); - } - - m = adjust_address (op1, DFmode, 0); - emit_insn (gen_sse2_loadlpd (op0, zero, m)); - m = adjust_address (op1, DFmode, 8); - emit_insn (gen_sse2_loadhpd (op0, op0, m)); - } - else - { - if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL) - { - op0 = gen_lowpart (V4SFmode, op0); - op1 = gen_lowpart (V4SFmode, op1); - emit_insn (gen_sse_movups (op0, op1)); - return; - } - - if (TARGET_SSE_PARTIAL_REG_DEPENDENCY) - emit_move_insn (op0, CONST0_RTX (mode)); - else - emit_clobber (op0); - - if (mode != V4SFmode) - op0 = gen_lowpart (V4SFmode, op0); - m = adjust_address (op1, V2SFmode, 0); - emit_insn (gen_sse_loadlps (op0, op0, m)); - m = adjust_address (op1, V2SFmode, 8); - emit_insn (gen_sse_loadhps (op0, op0, m)); - } - } - else if (MEM_P (op0)) - { - /* If we're optimizing for size, movups is the smallest. */ - if (optimize_insn_for_size_p ()) - { - op0 = gen_lowpart (V4SFmode, op0); - op1 = gen_lowpart (V4SFmode, op1); - emit_insn (gen_sse_movups (op0, op1)); - return; - } - - /* ??? Similar to above, only less clear because of quote - typeless stores unquote. */ - if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES - && GET_MODE_CLASS (mode) == MODE_VECTOR_INT) - { - op0 = gen_lowpart (V16QImode, op0); - op1 = gen_lowpart (V16QImode, op1); - emit_insn (gen_sse2_movdqu (op0, op1)); - return; - } - - if (TARGET_SSE2 && mode == V2DFmode) - { - m = adjust_address (op0, DFmode, 0); - emit_insn (gen_sse2_storelpd (m, op1)); - m = adjust_address (op0, DFmode, 8); - emit_insn (gen_sse2_storehpd (m, op1)); - } - else - { - if (mode != V4SFmode) - op1 = gen_lowpart (V4SFmode, op1); - m = adjust_address (op0, V2SFmode, 0); - emit_insn (gen_sse_storelps (m, op1)); - m = adjust_address (op0, V2SFmode, 8); - emit_insn (gen_sse_storehps (m, op1)); - } - } - else - gcc_unreachable (); -} - -/* Expand a push in MODE. This is some mode for which we do not support - proper push instructions, at least from the registers that we expect - the value to live in. */ - -void -ix86_expand_push (enum machine_mode mode, rtx x) -{ - rtx tmp; - - tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx, - GEN_INT (-GET_MODE_SIZE (mode)), - stack_pointer_rtx, 1, OPTAB_DIRECT); - if (tmp != stack_pointer_rtx) - emit_move_insn (stack_pointer_rtx, tmp); - - tmp = gen_rtx_MEM (mode, stack_pointer_rtx); - - /* When we push an operand onto stack, it has to be aligned at least - at the function argument boundary. However since we don't have - the argument type, we can't determine the actual argument - boundary. */ - emit_move_insn (tmp, x); -} - -/* Helper function of ix86_fixup_binary_operands to canonicalize - operand order. Returns true if the operands should be swapped. */ - -static bool -ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode, - rtx operands[]) -{ - rtx dst = operands[0]; - rtx src1 = operands[1]; - rtx src2 = operands[2]; - - /* If the operation is not commutative, we can't do anything. */ - if (GET_RTX_CLASS (code) != RTX_COMM_ARITH) - return false; - - /* Highest priority is that src1 should match dst. */ - if (rtx_equal_p (dst, src1)) - return false; - if (rtx_equal_p (dst, src2)) - return true; - - /* Next highest priority is that immediate constants come second. */ - if (immediate_operand (src2, mode)) - return false; - if (immediate_operand (src1, mode)) - return true; - - /* Lowest priority is that memory references should come second. */ - if (MEM_P (src2)) - return false; - if (MEM_P (src1)) - return true; - - return false; -} - - -/* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the - destination to use for the operation. If different from the true - destination in operands[0], a copy operation will be required. */ - -rtx -ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode, - rtx operands[]) -{ - rtx dst = operands[0]; - rtx src1 = operands[1]; - rtx src2 = operands[2]; - - /* Canonicalize operand order. */ - if (ix86_swap_binary_operands_p (code, mode, operands)) - { - rtx temp; - - /* It is invalid to swap operands of different modes. */ - gcc_assert (GET_MODE (src1) == GET_MODE (src2)); - - temp = src1; - src1 = src2; - src2 = temp; - } - - /* Both source operands cannot be in memory. */ - if (MEM_P (src1) && MEM_P (src2)) - { - /* Optimization: Only read from memory once. */ - if (rtx_equal_p (src1, src2)) - { - src2 = force_reg (mode, src2); - src1 = src2; - } - else - src2 = force_reg (mode, src2); - } - - /* If the destination is memory, and we do not have matching source - operands, do things in registers. */ - if (MEM_P (dst) && !rtx_equal_p (dst, src1)) - dst = gen_reg_rtx (mode); - - /* Source 1 cannot be a constant. */ - if (CONSTANT_P (src1)) - src1 = force_reg (mode, src1); - - /* Source 1 cannot be a non-matching memory. */ - if (MEM_P (src1) && !rtx_equal_p (dst, src1)) - src1 = force_reg (mode, src1); - - operands[1] = src1; - operands[2] = src2; - return dst; -} - -/* Similarly, but assume that the destination has already been - set up properly. */ - -void -ix86_fixup_binary_operands_no_copy (enum rtx_code code, - enum machine_mode mode, rtx operands[]) -{ - rtx dst = ix86_fixup_binary_operands (code, mode, operands); - gcc_assert (dst == operands[0]); -} - -/* Attempt to expand a binary operator. Make the expansion closer to the - actual machine, then just general_operand, which will allow 3 separate - memory references (one output, two input) in a single insn. */ - -void -ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode, - rtx operands[]) -{ - rtx src1, src2, dst, op, clob; - - dst = ix86_fixup_binary_operands (code, mode, operands); - src1 = operands[1]; - src2 = operands[2]; - - /* Emit the instruction. */ - - op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2)); - if (reload_in_progress) - { - /* Reload doesn't know about the flags register, and doesn't know that - it doesn't want to clobber it. We can only do this with PLUS. */ - gcc_assert (code == PLUS); - emit_insn (op); - } - else - { - clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); - emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob))); - } - - /* Fix up the destination if needed. */ - if (dst != operands[0]) - emit_move_insn (operands[0], dst); -} - -/* Return TRUE or FALSE depending on whether the binary operator meets the - appropriate constraints. */ - -int -ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode, - rtx operands[3]) -{ - rtx dst = operands[0]; - rtx src1 = operands[1]; - rtx src2 = operands[2]; - - /* Both source operands cannot be in memory. */ - if (MEM_P (src1) && MEM_P (src2)) - return 0; - - /* Canonicalize operand order for commutative operators. */ - if (ix86_swap_binary_operands_p (code, mode, operands)) - { - rtx temp = src1; - src1 = src2; - src2 = temp; - } - - /* If the destination is memory, we must have a matching source operand. */ - if (MEM_P (dst) && !rtx_equal_p (dst, src1)) - return 0; - - /* Source 1 cannot be a constant. */ - if (CONSTANT_P (src1)) - return 0; - - /* Source 1 cannot be a non-matching memory. */ - if (MEM_P (src1) && !rtx_equal_p (dst, src1)) - return 0; - - return 1; -} - -/* Attempt to expand a unary operator. Make the expansion closer to the - actual machine, then just general_operand, which will allow 2 separate - memory references (one output, one input) in a single insn. */ - -void -ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode, - rtx operands[]) -{ - int matching_memory; - rtx src, dst, op, clob; - - dst = operands[0]; - src = operands[1]; - - /* If the destination is memory, and we do not have matching source - operands, do things in registers. */ - matching_memory = 0; - if (MEM_P (dst)) - { - if (rtx_equal_p (dst, src)) - matching_memory = 1; - else - dst = gen_reg_rtx (mode); - } - - /* When source operand is memory, destination must match. */ - if (MEM_P (src) && !matching_memory) - src = force_reg (mode, src); - - /* Emit the instruction. */ - - op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src)); - if (reload_in_progress || code == NOT) - { - /* Reload doesn't know about the flags register, and doesn't know that - it doesn't want to clobber it. */ - gcc_assert (code == NOT); - emit_insn (op); - } - else - { - clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); - emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob))); - } - - /* Fix up the destination if needed. */ - if (dst != operands[0]) - emit_move_insn (operands[0], dst); -} - -#define LEA_SEARCH_THRESHOLD 12 - -/* Search backward for non-agu definition of register number REGNO1 - or register number REGNO2 in INSN's basic block until - 1. Pass LEA_SEARCH_THRESHOLD instructions, or - 2. Reach BB boundary, or - 3. Reach agu definition. - Returns the distance between the non-agu definition point and INSN. - If no definition point, returns -1. */ - -static int -distance_non_agu_define (unsigned int regno1, unsigned int regno2, - rtx insn) -{ - basic_block bb = BLOCK_FOR_INSN (insn); - int distance = 0; - df_ref *def_rec; - enum attr_type insn_type; - - if (insn != BB_HEAD (bb)) - { - rtx prev = PREV_INSN (insn); - while (prev && distance < LEA_SEARCH_THRESHOLD) - { - if (INSN_P (prev)) - { - distance++; - for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++) - if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF - && !DF_REF_IS_ARTIFICIAL (*def_rec) - && (regno1 == DF_REF_REGNO (*def_rec) - || regno2 == DF_REF_REGNO (*def_rec))) - { - insn_type = get_attr_type (prev); - if (insn_type != TYPE_LEA) - goto done; - } - } - if (prev == BB_HEAD (bb)) - break; - prev = PREV_INSN (prev); - } - } - - if (distance < LEA_SEARCH_THRESHOLD) - { - edge e; - edge_iterator ei; - bool simple_loop = false; - - FOR_EACH_EDGE (e, ei, bb->preds) - if (e->src == bb) - { - simple_loop = true; - break; - } - - if (simple_loop) - { - rtx prev = BB_END (bb); - while (prev - && prev != insn - && distance < LEA_SEARCH_THRESHOLD) - { - if (INSN_P (prev)) - { - distance++; - for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++) - if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF - && !DF_REF_IS_ARTIFICIAL (*def_rec) - && (regno1 == DF_REF_REGNO (*def_rec) - || regno2 == DF_REF_REGNO (*def_rec))) - { - insn_type = get_attr_type (prev); - if (insn_type != TYPE_LEA) - goto done; - } - } - prev = PREV_INSN (prev); - } - } - } - - distance = -1; - -done: - /* get_attr_type may modify recog data. We want to make sure - that recog data is valid for instruction INSN, on which - distance_non_agu_define is called. INSN is unchanged here. */ - extract_insn_cached (insn); - return distance; -} - -/* Return the distance between INSN and the next insn that uses - register number REGNO0 in memory address. Return -1 if no such - a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */ - -static int -distance_agu_use (unsigned int regno0, rtx insn) -{ - basic_block bb = BLOCK_FOR_INSN (insn); - int distance = 0; - df_ref *def_rec; - df_ref *use_rec; - - if (insn != BB_END (bb)) - { - rtx next = NEXT_INSN (insn); - while (next && distance < LEA_SEARCH_THRESHOLD) - { - if (INSN_P (next)) - { - distance++; - - for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++) - if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD - || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE) - && regno0 == DF_REF_REGNO (*use_rec)) - { - /* Return DISTANCE if OP0 is used in memory - address in NEXT. */ - return distance; - } - - for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++) - if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF - && !DF_REF_IS_ARTIFICIAL (*def_rec) - && regno0 == DF_REF_REGNO (*def_rec)) - { - /* Return -1 if OP0 is set in NEXT. */ - return -1; - } - } - if (next == BB_END (bb)) - break; - next = NEXT_INSN (next); - } - } - - if (distance < LEA_SEARCH_THRESHOLD) - { - edge e; - edge_iterator ei; - bool simple_loop = false; - - FOR_EACH_EDGE (e, ei, bb->succs) - if (e->dest == bb) - { - simple_loop = true; - break; - } - - if (simple_loop) - { - rtx next = BB_HEAD (bb); - while (next - && next != insn - && distance < LEA_SEARCH_THRESHOLD) - { - if (INSN_P (next)) - { - distance++; - - for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++) - if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD - || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE) - && regno0 == DF_REF_REGNO (*use_rec)) - { - /* Return DISTANCE if OP0 is used in memory - address in NEXT. */ - return distance; - } - - for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++) - if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF - && !DF_REF_IS_ARTIFICIAL (*def_rec) - && regno0 == DF_REF_REGNO (*def_rec)) - { - /* Return -1 if OP0 is set in NEXT. */ - return -1; - } - - } - next = NEXT_INSN (next); - } - } - } - - return -1; -} - -/* Define this macro to tune LEA priority vs ADD, it take effect when - there is a dilemma of choicing LEA or ADD - Negative value: ADD is more preferred than LEA - Zero: Netrual - Positive value: LEA is more preferred than ADD*/ -#define IX86_LEA_PRIORITY 2 - -/* Return true if it is ok to optimize an ADD operation to LEA - operation to avoid flag register consumation. For the processors - like ATOM, if the destination register of LEA holds an actual - address which will be used soon, LEA is better and otherwise ADD - is better. */ - -bool -ix86_lea_for_add_ok (enum rtx_code code ATTRIBUTE_UNUSED, - rtx insn, rtx operands[]) -{ - unsigned int regno0 = true_regnum (operands[0]); - unsigned int regno1 = true_regnum (operands[1]); - unsigned int regno2; - - if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun)) - return regno0 != regno1; - - regno2 = true_regnum (operands[2]); - - /* If a = b + c, (a!=b && a!=c), must use lea form. */ - if (regno0 != regno1 && regno0 != regno2) - return true; - else - { - int dist_define, dist_use; - dist_define = distance_non_agu_define (regno1, regno2, insn); - if (dist_define <= 0) - return true; - - /* If this insn has both backward non-agu dependence and forward - agu dependence, the one with short distance take effect. */ - dist_use = distance_agu_use (regno0, insn); - if (dist_use <= 0 - || (dist_define + IX86_LEA_PRIORITY) < dist_use) - return false; - - return true; - } -} - -/* Return true if destination reg of SET_BODY is shift count of - USE_BODY. */ - -static bool -ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body) -{ - rtx set_dest; - rtx shift_rtx; - int i; - - /* Retrieve destination of SET_BODY. */ - switch (GET_CODE (set_body)) - { - case SET: - set_dest = SET_DEST (set_body); - if (!set_dest || !REG_P (set_dest)) - return false; - break; - case PARALLEL: - for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--) - if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i), - use_body)) - return true; - default: - return false; - break; - } - - /* Retrieve shift count of USE_BODY. */ - switch (GET_CODE (use_body)) - { - case SET: - shift_rtx = XEXP (use_body, 1); - break; - case PARALLEL: - for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--) - if (ix86_dep_by_shift_count_body (set_body, - XVECEXP (use_body, 0, i))) - return true; - default: - return false; - break; - } - - if (shift_rtx - && (GET_CODE (shift_rtx) == ASHIFT - || GET_CODE (shift_rtx) == LSHIFTRT - || GET_CODE (shift_rtx) == ASHIFTRT - || GET_CODE (shift_rtx) == ROTATE - || GET_CODE (shift_rtx) == ROTATERT)) - { - rtx shift_count = XEXP (shift_rtx, 1); - - /* Return true if shift count is dest of SET_BODY. */ - if (REG_P (shift_count) - && true_regnum (set_dest) == true_regnum (shift_count)) - return true; - } - - return false; -} - -/* Return true if destination reg of SET_INSN is shift count of - USE_INSN. */ - -bool -ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn) -{ - return ix86_dep_by_shift_count_body (PATTERN (set_insn), - PATTERN (use_insn)); -} - -/* Return TRUE or FALSE depending on whether the unary operator meets the - appropriate constraints. */ - -int -ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED, - enum machine_mode mode ATTRIBUTE_UNUSED, - rtx operands[2] ATTRIBUTE_UNUSED) -{ - /* If one of operands is memory, source and destination must match. */ - if ((MEM_P (operands[0]) - || MEM_P (operands[1])) - && ! rtx_equal_p (operands[0], operands[1])) - return FALSE; - return TRUE; -} - -/* Post-reload splitter for converting an SF or DFmode value in an - SSE register into an unsigned SImode. */ - -void -ix86_split_convert_uns_si_sse (rtx operands[]) -{ - enum machine_mode vecmode; - rtx value, large, zero_or_two31, input, two31, x; - - large = operands[1]; - zero_or_two31 = operands[2]; - input = operands[3]; - two31 = operands[4]; - vecmode = GET_MODE (large); - value = gen_rtx_REG (vecmode, REGNO (operands[0])); - - /* Load up the value into the low element. We must ensure that the other - elements are valid floats -- zero is the easiest such value. */ - if (MEM_P (input)) - { - if (vecmode == V4SFmode) - emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input)); - else - emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input)); - } - else - { - input = gen_rtx_REG (vecmode, REGNO (input)); - emit_move_insn (value, CONST0_RTX (vecmode)); - if (vecmode == V4SFmode) - emit_insn (gen_sse_movss (value, value, input)); - else - emit_insn (gen_sse2_movsd (value, value, input)); - } - - emit_move_insn (large, two31); - emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31); - - x = gen_rtx_fmt_ee (LE, vecmode, large, value); - emit_insn (gen_rtx_SET (VOIDmode, large, x)); - - x = gen_rtx_AND (vecmode, zero_or_two31, large); - emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x)); - - x = gen_rtx_MINUS (vecmode, value, zero_or_two31); - emit_insn (gen_rtx_SET (VOIDmode, value, x)); - - large = gen_rtx_REG (V4SImode, REGNO (large)); - emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31))); - - x = gen_rtx_REG (V4SImode, REGNO (value)); - if (vecmode == V4SFmode) - emit_insn (gen_sse2_cvttps2dq (x, value)); - else - emit_insn (gen_sse2_cvttpd2dq (x, value)); - value = x; - - emit_insn (gen_xorv4si3 (value, value, large)); -} - -/* Convert an unsigned DImode value into a DFmode, using only SSE. - Expects the 64-bit DImode to be supplied in a pair of integral - registers. Requires SSE2; will use SSE3 if available. For x86_32, - -mfpmath=sse, !optimize_size only. */ - -void -ix86_expand_convert_uns_didf_sse (rtx target, rtx input) -{ - REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt; - rtx int_xmm, fp_xmm; - rtx biases, exponents; - rtx x; - - int_xmm = gen_reg_rtx (V4SImode); - if (TARGET_INTER_UNIT_MOVES) - emit_insn (gen_movdi_to_sse (int_xmm, input)); - else if (TARGET_SSE_SPLIT_REGS) - { - emit_clobber (int_xmm); - emit_move_insn (gen_lowpart (DImode, int_xmm), input); - } - else - { - x = gen_reg_rtx (V2DImode); - ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0); - emit_move_insn (int_xmm, gen_lowpart (V4SImode, x)); - } - - x = gen_rtx_CONST_VECTOR (V4SImode, - gen_rtvec (4, GEN_INT (0x43300000UL), - GEN_INT (0x45300000UL), - const0_rtx, const0_rtx)); - exponents = validize_mem (force_const_mem (V4SImode, x)); - - /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */ - emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents)); - - /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm) - yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)). - Similarly (0x45300000UL ## fp_value_hi_xmm) yields - (0x1.0p84 + double(fp_value_hi_xmm)). - Note these exponents differ by 32. */ - - fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm)); - - /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values - in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */ - real_ldexp (&bias_lo_rvt, &dconst1, 52); - real_ldexp (&bias_hi_rvt, &dconst1, 84); - biases = const_double_from_real_value (bias_lo_rvt, DFmode); - x = const_double_from_real_value (bias_hi_rvt, DFmode); - biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x)); - biases = validize_mem (force_const_mem (V2DFmode, biases)); - emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases)); - - /* Add the upper and lower DFmode values together. */ - if (TARGET_SSE3) - emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm)); - else - { - x = copy_to_mode_reg (V2DFmode, fp_xmm); - emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm)); - emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x)); - } - - ix86_expand_vector_extract (false, target, fp_xmm, 0); -} - -/* Not used, but eases macroization of patterns. */ -void -ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED, - rtx input ATTRIBUTE_UNUSED) -{ - gcc_unreachable (); -} - -/* Convert an unsigned SImode value into a DFmode. Only currently used - for SSE, but applicable anywhere. */ - -void -ix86_expand_convert_uns_sidf_sse (rtx target, rtx input) -{ - REAL_VALUE_TYPE TWO31r; - rtx x, fp; - - x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1), - NULL, 1, OPTAB_DIRECT); - - fp = gen_reg_rtx (DFmode); - emit_insn (gen_floatsidf2 (fp, x)); - - real_ldexp (&TWO31r, &dconst1, 31); - x = const_double_from_real_value (TWO31r, DFmode); - - x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT); - if (x != target) - emit_move_insn (target, x); -} - -/* Convert a signed DImode value into a DFmode. Only used for SSE in - 32-bit mode; otherwise we have a direct convert instruction. */ - -void -ix86_expand_convert_sign_didf_sse (rtx target, rtx input) -{ - REAL_VALUE_TYPE TWO32r; - rtx fp_lo, fp_hi, x; - - fp_lo = gen_reg_rtx (DFmode); - fp_hi = gen_reg_rtx (DFmode); - - emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input))); - - real_ldexp (&TWO32r, &dconst1, 32); - x = const_double_from_real_value (TWO32r, DFmode); - fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT); - - ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input)); - - x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target, - 0, OPTAB_DIRECT); - if (x != target) - emit_move_insn (target, x); -} - -/* Convert an unsigned SImode value into a SFmode, using only SSE. - For x86_32, -mfpmath=sse, !optimize_size only. */ -void -ix86_expand_convert_uns_sisf_sse (rtx target, rtx input) -{ - REAL_VALUE_TYPE ONE16r; - rtx fp_hi, fp_lo, int_hi, int_lo, x; - - real_ldexp (&ONE16r, &dconst1, 16); - x = const_double_from_real_value (ONE16r, SFmode); - int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff), - NULL, 0, OPTAB_DIRECT); - int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16), - NULL, 0, OPTAB_DIRECT); - fp_hi = gen_reg_rtx (SFmode); - fp_lo = gen_reg_rtx (SFmode); - emit_insn (gen_floatsisf2 (fp_hi, int_hi)); - emit_insn (gen_floatsisf2 (fp_lo, int_lo)); - fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi, - 0, OPTAB_DIRECT); - fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target, - 0, OPTAB_DIRECT); - if (!rtx_equal_p (target, fp_hi)) - emit_move_insn (target, fp_hi); -} - -/* A subroutine of ix86_build_signbit_mask_vector. If VECT is true, - then replicate the value for all elements of the vector - register. */ - -rtx -ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value) -{ - rtvec v; - switch (mode) - { - case SImode: - gcc_assert (vect); - v = gen_rtvec (4, value, value, value, value); - return gen_rtx_CONST_VECTOR (V4SImode, v); - - case DImode: - gcc_assert (vect); - v = gen_rtvec (2, value, value); - return gen_rtx_CONST_VECTOR (V2DImode, v); - - case SFmode: - if (vect) - v = gen_rtvec (4, value, value, value, value); - else - v = gen_rtvec (4, value, CONST0_RTX (SFmode), - CONST0_RTX (SFmode), CONST0_RTX (SFmode)); - return gen_rtx_CONST_VECTOR (V4SFmode, v); - - case DFmode: - if (vect) - v = gen_rtvec (2, value, value); - else - v = gen_rtvec (2, value, CONST0_RTX (DFmode)); - return gen_rtx_CONST_VECTOR (V2DFmode, v); - - default: - gcc_unreachable (); - } -} - -/* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders - and ix86_expand_int_vcond. Create a mask for the sign bit in MODE - for an SSE register. If VECT is true, then replicate the mask for - all elements of the vector register. If INVERT is true, then create - a mask excluding the sign bit. */ - -rtx -ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert) -{ - enum machine_mode vec_mode, imode; - HOST_WIDE_INT hi, lo; - int shift = 63; - rtx v; - rtx mask; - - /* Find the sign bit, sign extended to 2*HWI. */ - switch (mode) - { - case SImode: - case SFmode: - imode = SImode; - vec_mode = (mode == SImode) ? V4SImode : V4SFmode; - lo = 0x80000000, hi = lo < 0; - break; - - case DImode: - case DFmode: - imode = DImode; - vec_mode = (mode == DImode) ? V2DImode : V2DFmode; - if (HOST_BITS_PER_WIDE_INT >= 64) - lo = (HOST_WIDE_INT)1 << shift, hi = -1; - else - lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT); - break; - - case TImode: - case TFmode: - vec_mode = VOIDmode; - if (HOST_BITS_PER_WIDE_INT >= 64) - { - imode = TImode; - lo = 0, hi = (HOST_WIDE_INT)1 << shift; - } - else - { - rtvec vec; - - imode = DImode; - lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT); - - if (invert) - { - lo = ~lo, hi = ~hi; - v = constm1_rtx; - } - else - v = const0_rtx; - - mask = immed_double_const (lo, hi, imode); - - vec = gen_rtvec (2, v, mask); - v = gen_rtx_CONST_VECTOR (V2DImode, vec); - v = copy_to_mode_reg (mode, gen_lowpart (mode, v)); - - return v; - } - break; - - default: - gcc_unreachable (); - } - - if (invert) - lo = ~lo, hi = ~hi; - - /* Force this value into the low part of a fp vector constant. */ - mask = immed_double_const (lo, hi, imode); - mask = gen_lowpart (mode, mask); - - if (vec_mode == VOIDmode) - return force_reg (mode, mask); - - v = ix86_build_const_vector (mode, vect, mask); - return force_reg (vec_mode, v); -} - -/* Generate code for floating point ABS or NEG. */ - -void -ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode, - rtx operands[]) -{ - rtx mask, set, use, clob, dst, src; - bool use_sse = false; - bool vector_mode = VECTOR_MODE_P (mode); - enum machine_mode elt_mode = mode; - - if (vector_mode) - { - elt_mode = GET_MODE_INNER (mode); - use_sse = true; - } - else if (mode == TFmode) - use_sse = true; - else if (TARGET_SSE_MATH) - use_sse = SSE_FLOAT_MODE_P (mode); - - /* NEG and ABS performed with SSE use bitwise mask operations. - Create the appropriate mask now. */ - if (use_sse) - mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS); - else - mask = NULL_RTX; - - dst = operands[0]; - src = operands[1]; - - if (vector_mode) - { - set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask); - set = gen_rtx_SET (VOIDmode, dst, set); - emit_insn (set); - } - else - { - set = gen_rtx_fmt_e (code, mode, src); - set = gen_rtx_SET (VOIDmode, dst, set); - if (mask) - { - use = gen_rtx_USE (VOIDmode, mask); - clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); - emit_insn (gen_rtx_PARALLEL (VOIDmode, - gen_rtvec (3, set, use, clob))); - } - else - emit_insn (set); - } -} - -/* Expand a copysign operation. Special case operand 0 being a constant. */ - -void -ix86_expand_copysign (rtx operands[]) -{ - enum machine_mode mode; - rtx dest, op0, op1, mask, nmask; - - dest = operands[0]; - op0 = operands[1]; - op1 = operands[2]; - - mode = GET_MODE (dest); - - if (GET_CODE (op0) == CONST_DOUBLE) - { - rtx (*copysign_insn)(rtx, rtx, rtx, rtx); - - if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0))) - op0 = simplify_unary_operation (ABS, mode, op0, mode); - - if (mode == SFmode || mode == DFmode) - { - enum machine_mode vmode; - - vmode = mode == SFmode ? V4SFmode : V2DFmode; - - if (op0 == CONST0_RTX (mode)) - op0 = CONST0_RTX (vmode); - else - { - rtvec v; - - if (mode == SFmode) - v = gen_rtvec (4, op0, CONST0_RTX (SFmode), - CONST0_RTX (SFmode), CONST0_RTX (SFmode)); - else - v = gen_rtvec (2, op0, CONST0_RTX (DFmode)); - - op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v)); - } - } - else if (op0 != CONST0_RTX (mode)) - op0 = force_reg (mode, op0); - - mask = ix86_build_signbit_mask (mode, 0, 0); - - if (mode == SFmode) - copysign_insn = gen_copysignsf3_const; - else if (mode == DFmode) - copysign_insn = gen_copysigndf3_const; - else - copysign_insn = gen_copysigntf3_const; - - emit_insn (copysign_insn (dest, op0, op1, mask)); - } - else - { - rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx); - - nmask = ix86_build_signbit_mask (mode, 0, 1); - mask = ix86_build_signbit_mask (mode, 0, 0); - - if (mode == SFmode) - copysign_insn = gen_copysignsf3_var; - else if (mode == DFmode) - copysign_insn = gen_copysigndf3_var; - else - copysign_insn = gen_copysigntf3_var; - - emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask)); - } -} - -/* Deconstruct a copysign operation into bit masks. Operand 0 is known to - be a constant, and so has already been expanded into a vector constant. */ - -void -ix86_split_copysign_const (rtx operands[]) -{ - enum machine_mode mode, vmode; - rtx dest, op0, op1, mask, x; - - dest = operands[0]; - op0 = operands[1]; - op1 = operands[2]; - mask = operands[3]; - - mode = GET_MODE (dest); - vmode = GET_MODE (mask); - - dest = simplify_gen_subreg (vmode, dest, mode, 0); - x = gen_rtx_AND (vmode, dest, mask); - emit_insn (gen_rtx_SET (VOIDmode, dest, x)); - - if (op0 != CONST0_RTX (vmode)) - { - x = gen_rtx_IOR (vmode, dest, op0); - emit_insn (gen_rtx_SET (VOIDmode, dest, x)); - } -} - -/* Deconstruct a copysign operation into bit masks. Operand 0 is variable, - so we have to do two masks. */ - -void -ix86_split_copysign_var (rtx operands[]) -{ - enum machine_mode mode, vmode; - rtx dest, scratch, op0, op1, mask, nmask, x; - - dest = operands[0]; - scratch = operands[1]; - op0 = operands[2]; - op1 = operands[3]; - nmask = operands[4]; - mask = operands[5]; - - mode = GET_MODE (dest); - vmode = GET_MODE (mask); - - if (rtx_equal_p (op0, op1)) - { - /* Shouldn't happen often (it's useless, obviously), but when it does - we'd generate incorrect code if we continue below. */ - emit_move_insn (dest, op0); - return; - } - - if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */ - { - gcc_assert (REGNO (op1) == REGNO (scratch)); - - x = gen_rtx_AND (vmode, scratch, mask); - emit_insn (gen_rtx_SET (VOIDmode, scratch, x)); - - dest = mask; - op0 = simplify_gen_subreg (vmode, op0, mode, 0); - x = gen_rtx_NOT (vmode, dest); - x = gen_rtx_AND (vmode, x, op0); - emit_insn (gen_rtx_SET (VOIDmode, dest, x)); - } - else - { - if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */ - { - x = gen_rtx_AND (vmode, scratch, mask); - } - else /* alternative 2,4 */ - { - gcc_assert (REGNO (mask) == REGNO (scratch)); - op1 = simplify_gen_subreg (vmode, op1, mode, 0); - x = gen_rtx_AND (vmode, scratch, op1); - } - emit_insn (gen_rtx_SET (VOIDmode, scratch, x)); - - if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */ - { - dest = simplify_gen_subreg (vmode, op0, mode, 0); - x = gen_rtx_AND (vmode, dest, nmask); - } - else /* alternative 3,4 */ - { - gcc_assert (REGNO (nmask) == REGNO (dest)); - dest = nmask; - op0 = simplify_gen_subreg (vmode, op0, mode, 0); - x = gen_rtx_AND (vmode, dest, op0); - } - emit_insn (gen_rtx_SET (VOIDmode, dest, x)); - } - - x = gen_rtx_IOR (vmode, dest, scratch); - emit_insn (gen_rtx_SET (VOIDmode, dest, x)); -} - -/* Return TRUE or FALSE depending on whether the first SET in INSN - has source and destination with matching CC modes, and that the - CC mode is at least as constrained as REQ_MODE. */ - -int -ix86_match_ccmode (rtx insn, enum machine_mode req_mode) -{ - rtx set; - enum machine_mode set_mode; - - set = PATTERN (insn); - if (GET_CODE (set) == PARALLEL) - set = XVECEXP (set, 0, 0); - gcc_assert (GET_CODE (set) == SET); - gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE); - - set_mode = GET_MODE (SET_DEST (set)); - switch (set_mode) - { - case CCNOmode: - if (req_mode != CCNOmode - && (req_mode != CCmode - || XEXP (SET_SRC (set), 1) != const0_rtx)) - return 0; - break; - case CCmode: - if (req_mode == CCGCmode) - return 0; - /* FALLTHRU */ - case CCGCmode: - if (req_mode == CCGOCmode || req_mode == CCNOmode) - return 0; - /* FALLTHRU */ - case CCGOCmode: - if (req_mode == CCZmode) - return 0; - /* FALLTHRU */ - case CCAmode: - case CCCmode: - case CCOmode: - case CCSmode: - case CCZmode: - break; - - default: - gcc_unreachable (); - } - - return (GET_MODE (SET_SRC (set)) == set_mode); -} - -/* Generate insn patterns to do an integer compare of OPERANDS. */ - -static rtx -ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1) -{ - enum machine_mode cmpmode; - rtx tmp, flags; - - cmpmode = SELECT_CC_MODE (code, op0, op1); - flags = gen_rtx_REG (cmpmode, FLAGS_REG); - - /* This is very simple, but making the interface the same as in the - FP case makes the rest of the code easier. */ - tmp = gen_rtx_COMPARE (cmpmode, op0, op1); - emit_insn (gen_rtx_SET (VOIDmode, flags, tmp)); - - /* Return the test that should be put into the flags user, i.e. - the bcc, scc, or cmov instruction. */ - return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx); -} - -/* Figure out whether to use ordered or unordered fp comparisons. - Return the appropriate mode to use. */ - -enum machine_mode -ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED) -{ - /* ??? In order to make all comparisons reversible, we do all comparisons - non-trapping when compiling for IEEE. Once gcc is able to distinguish - all forms trapping and nontrapping comparisons, we can make inequality - comparisons trapping again, since it results in better code when using - FCOM based compares. */ - return TARGET_IEEE_FP ? CCFPUmode : CCFPmode; -} - -enum machine_mode -ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1) -{ - enum machine_mode mode = GET_MODE (op0); - - if (SCALAR_FLOAT_MODE_P (mode)) - { - gcc_assert (!DECIMAL_FLOAT_MODE_P (mode)); - return ix86_fp_compare_mode (code); - } - - switch (code) - { - /* Only zero flag is needed. */ - case EQ: /* ZF=0 */ - case NE: /* ZF!=0 */ - return CCZmode; - /* Codes needing carry flag. */ - case GEU: /* CF=0 */ - case LTU: /* CF=1 */ - /* Detect overflow checks. They need just the carry flag. */ - if (GET_CODE (op0) == PLUS - && rtx_equal_p (op1, XEXP (op0, 0))) - return CCCmode; - else - return CCmode; - case GTU: /* CF=0 & ZF=0 */ - case LEU: /* CF=1 | ZF=1 */ - /* Detect overflow checks. They need just the carry flag. */ - if (GET_CODE (op0) == MINUS - && rtx_equal_p (op1, XEXP (op0, 0))) - return CCCmode; - else - return CCmode; - /* Codes possibly doable only with sign flag when - comparing against zero. */ - case GE: /* SF=OF or SF=0 */ - case LT: /* SF<>OF or SF=1 */ - if (op1 == const0_rtx) - return CCGOCmode; - else - /* For other cases Carry flag is not required. */ - return CCGCmode; - /* Codes doable only with sign flag when comparing - against zero, but we miss jump instruction for it - so we need to use relational tests against overflow - that thus needs to be zero. */ - case GT: /* ZF=0 & SF=OF */ - case LE: /* ZF=1 | SF<>OF */ - if (op1 == const0_rtx) - return CCNOmode; - else - return CCGCmode; - /* strcmp pattern do (use flags) and combine may ask us for proper - mode. */ - case USE: - return CCmode; - default: - gcc_unreachable (); - } -} - -/* Return the fixed registers used for condition codes. */ - -static bool -ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2) -{ - *p1 = FLAGS_REG; - *p2 = FPSR_REG; - return true; -} - -/* If two condition code modes are compatible, return a condition code - mode which is compatible with both. Otherwise, return - VOIDmode. */ - -static enum machine_mode -ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2) -{ - if (m1 == m2) - return m1; - - if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC) - return VOIDmode; - - if ((m1 == CCGCmode && m2 == CCGOCmode) - || (m1 == CCGOCmode && m2 == CCGCmode)) - return CCGCmode; - - switch (m1) - { - default: - gcc_unreachable (); - - case CCmode: - case CCGCmode: - case CCGOCmode: - case CCNOmode: - case CCAmode: - case CCCmode: - case CCOmode: - case CCSmode: - case CCZmode: - switch (m2) - { - default: - return VOIDmode; - - case CCmode: - case CCGCmode: - case CCGOCmode: - case CCNOmode: - case CCAmode: - case CCCmode: - case CCOmode: - case CCSmode: - case CCZmode: - return CCmode; - } - - case CCFPmode: - case CCFPUmode: - /* These are only compatible with themselves, which we already - checked above. */ - return VOIDmode; - } -} - -/* Split comparison code CODE into comparisons we can do using branch - instructions. BYPASS_CODE is comparison code for branch that will - branch around FIRST_CODE and SECOND_CODE. If some of branches - is not required, set value to UNKNOWN. - We never require more than two branches. */ - -void -ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code, - enum rtx_code *first_code, - enum rtx_code *second_code) -{ - *first_code = code; - *bypass_code = UNKNOWN; - *second_code = UNKNOWN; - - /* The fcomi comparison sets flags as follows: - - cmp ZF PF CF - > 0 0 0 - < 0 0 1 - = 1 0 0 - un 1 1 1 */ - - switch (code) - { - case GT: /* GTU - CF=0 & ZF=0 */ - case GE: /* GEU - CF=0 */ - case ORDERED: /* PF=0 */ - case UNORDERED: /* PF=1 */ - case UNEQ: /* EQ - ZF=1 */ - case UNLT: /* LTU - CF=1 */ - case UNLE: /* LEU - CF=1 | ZF=1 */ - case LTGT: /* EQ - ZF=0 */ - break; - case LT: /* LTU - CF=1 - fails on unordered */ - *first_code = UNLT; - *bypass_code = UNORDERED; - break; - case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */ - *first_code = UNLE; - *bypass_code = UNORDERED; - break; - case EQ: /* EQ - ZF=1 - fails on unordered */ - *first_code = UNEQ; - *bypass_code = UNORDERED; - break; - case NE: /* NE - ZF=0 - fails on unordered */ - *first_code = LTGT; - *second_code = UNORDERED; - break; - case UNGE: /* GEU - CF=0 - fails on unordered */ - *first_code = GE; - *second_code = UNORDERED; - break; - case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */ - *first_code = GT; - *second_code = UNORDERED; - break; - default: - gcc_unreachable (); - } - if (!TARGET_IEEE_FP) - { - *second_code = UNKNOWN; - *bypass_code = UNKNOWN; - } -} - -/* Return cost of comparison done fcom + arithmetics operations on AX. - All following functions do use number of instructions as a cost metrics. - In future this should be tweaked to compute bytes for optimize_size and - take into account performance of various instructions on various CPUs. */ -static int -ix86_fp_comparison_arithmetics_cost (enum rtx_code code) -{ - if (!TARGET_IEEE_FP) - return 4; - /* The cost of code output by ix86_expand_fp_compare. */ - switch (code) - { - case UNLE: - case UNLT: - case LTGT: - case GT: - case GE: - case UNORDERED: - case ORDERED: - case UNEQ: - return 4; - break; - case LT: - case NE: - case EQ: - case UNGE: - return 5; - break; - case LE: - case UNGT: - return 6; - break; - default: - gcc_unreachable (); - } -} - -/* Return cost of comparison done using fcomi operation. - See ix86_fp_comparison_arithmetics_cost for the metrics. */ -static int -ix86_fp_comparison_fcomi_cost (enum rtx_code code) -{ - enum rtx_code bypass_code, first_code, second_code; - /* Return arbitrarily high cost when instruction is not supported - this - prevents gcc from using it. */ - if (!TARGET_CMOVE) - return 1024; - ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); - return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2; -} - -/* Return cost of comparison done using sahf operation. - See ix86_fp_comparison_arithmetics_cost for the metrics. */ -static int -ix86_fp_comparison_sahf_cost (enum rtx_code code) -{ - enum rtx_code bypass_code, first_code, second_code; - /* Return arbitrarily high cost when instruction is not preferred - this - avoids gcc from using it. */ - if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))) - return 1024; - ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); - return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3; -} - -/* Compute cost of the comparison done using any method. - See ix86_fp_comparison_arithmetics_cost for the metrics. */ -static int -ix86_fp_comparison_cost (enum rtx_code code) -{ - int fcomi_cost, sahf_cost, arithmetics_cost = 1024; - int min; - - fcomi_cost = ix86_fp_comparison_fcomi_cost (code); - sahf_cost = ix86_fp_comparison_sahf_cost (code); - - min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code); - if (min > sahf_cost) - min = sahf_cost; - if (min > fcomi_cost) - min = fcomi_cost; - return min; -} - -/* Return true if we should use an FCOMI instruction for this - fp comparison. */ - -int -ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED) -{ - enum rtx_code swapped_code = swap_condition (code); - - return ((ix86_fp_comparison_cost (code) - == ix86_fp_comparison_fcomi_cost (code)) - || (ix86_fp_comparison_cost (swapped_code) - == ix86_fp_comparison_fcomi_cost (swapped_code))); -} - -/* Swap, force into registers, or otherwise massage the two operands - to a fp comparison. The operands are updated in place; the new - comparison code is returned. */ - -static enum rtx_code -ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1) -{ - enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code); - rtx op0 = *pop0, op1 = *pop1; - enum machine_mode op_mode = GET_MODE (op0); - int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode); - - /* All of the unordered compare instructions only work on registers. - The same is true of the fcomi compare instructions. The XFmode - compare instructions require registers except when comparing - against zero or when converting operand 1 from fixed point to - floating point. */ - - if (!is_sse - && (fpcmp_mode == CCFPUmode - || (op_mode == XFmode - && ! (standard_80387_constant_p (op0) == 1 - || standard_80387_constant_p (op1) == 1) - && GET_CODE (op1) != FLOAT) - || ix86_use_fcomi_compare (code))) - { - op0 = force_reg (op_mode, op0); - op1 = force_reg (op_mode, op1); - } - else - { - /* %%% We only allow op1 in memory; op0 must be st(0). So swap - things around if they appear profitable, otherwise force op0 - into a register. */ - - if (standard_80387_constant_p (op0) == 0 - || (MEM_P (op0) - && ! (standard_80387_constant_p (op1) == 0 - || MEM_P (op1)))) - { - rtx tmp; - tmp = op0, op0 = op1, op1 = tmp; - code = swap_condition (code); - } - - if (!REG_P (op0)) - op0 = force_reg (op_mode, op0); - - if (CONSTANT_P (op1)) - { - int tmp = standard_80387_constant_p (op1); - if (tmp == 0) - op1 = validize_mem (force_const_mem (op_mode, op1)); - else if (tmp == 1) - { - if (TARGET_CMOVE) - op1 = force_reg (op_mode, op1); - } - else - op1 = force_reg (op_mode, op1); - } - } - - /* Try to rearrange the comparison to make it cheaper. */ - if (ix86_fp_comparison_cost (code) - > ix86_fp_comparison_cost (swap_condition (code)) - && (REG_P (op1) || can_create_pseudo_p ())) - { - rtx tmp; - tmp = op0, op0 = op1, op1 = tmp; - code = swap_condition (code); - if (!REG_P (op0)) - op0 = force_reg (op_mode, op0); - } - - *pop0 = op0; - *pop1 = op1; - return code; -} - -/* Convert comparison codes we use to represent FP comparison to integer - code that will result in proper branch. Return UNKNOWN if no such code - is available. */ - -enum rtx_code -ix86_fp_compare_code_to_integer (enum rtx_code code) -{ - switch (code) - { - case GT: - return GTU; - case GE: - return GEU; - case ORDERED: - case UNORDERED: - return code; - break; - case UNEQ: - return EQ; - break; - case UNLT: - return LTU; - break; - case UNLE: - return LEU; - break; - case LTGT: - return NE; - break; - default: - return UNKNOWN; - } -} - -/* Generate insn patterns to do a floating point compare of OPERANDS. */ - -static rtx -ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch, - rtx *second_test, rtx *bypass_test) -{ - enum machine_mode fpcmp_mode, intcmp_mode; - rtx tmp, tmp2; - int cost = ix86_fp_comparison_cost (code); - enum rtx_code bypass_code, first_code, second_code; - - fpcmp_mode = ix86_fp_compare_mode (code); - code = ix86_prepare_fp_compare_args (code, &op0, &op1); - - if (second_test) - *second_test = NULL_RTX; - if (bypass_test) - *bypass_test = NULL_RTX; - - ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); - - /* Do fcomi/sahf based test when profitable. */ - if (ix86_fp_comparison_arithmetics_cost (code) > cost - && (bypass_code == UNKNOWN || bypass_test) - && (second_code == UNKNOWN || second_test)) - { - tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1); - tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG), - tmp); - if (TARGET_CMOVE) - emit_insn (tmp); - else - { - gcc_assert (TARGET_SAHF); - - if (!scratch) - scratch = gen_reg_rtx (HImode); - tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch); - - emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2))); - } - - /* The FP codes work out to act like unsigned. */ - intcmp_mode = fpcmp_mode; - code = first_code; - if (bypass_code != UNKNOWN) - *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode, - gen_rtx_REG (intcmp_mode, FLAGS_REG), - const0_rtx); - if (second_code != UNKNOWN) - *second_test = gen_rtx_fmt_ee (second_code, VOIDmode, - gen_rtx_REG (intcmp_mode, FLAGS_REG), - const0_rtx); - } - else - { - /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */ - tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1); - tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW); - if (!scratch) - scratch = gen_reg_rtx (HImode); - emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2)); - - /* In the unordered case, we have to check C2 for NaN's, which - doesn't happen to work out to anything nice combination-wise. - So do some bit twiddling on the value we've got in AH to come - up with an appropriate set of condition codes. */ - - intcmp_mode = CCNOmode; - switch (code) - { - case GT: - case UNGT: - if (code == GT || !TARGET_IEEE_FP) - { - emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45))); - code = EQ; - } - else - { - emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); - emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx)); - emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44))); - intcmp_mode = CCmode; - code = GEU; - } - break; - case LT: - case UNLT: - if (code == LT && TARGET_IEEE_FP) - { - emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); - emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01))); - intcmp_mode = CCmode; - code = EQ; - } - else - { - emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01))); - code = NE; - } - break; - case GE: - case UNGE: - if (code == GE || !TARGET_IEEE_FP) - { - emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05))); - code = EQ; - } - else - { - emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); - emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, - GEN_INT (0x01))); - code = NE; - } - break; - case LE: - case UNLE: - if (code == LE && TARGET_IEEE_FP) - { - emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); - emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx)); - emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40))); - intcmp_mode = CCmode; - code = LTU; - } - else - { - emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45))); - code = NE; - } - break; - case EQ: - case UNEQ: - if (code == EQ && TARGET_IEEE_FP) - { - emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); - emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40))); - intcmp_mode = CCmode; - code = EQ; - } - else - { - emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40))); - code = NE; - break; - } - break; - case NE: - case LTGT: - if (code == NE && TARGET_IEEE_FP) - { - emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); - emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, - GEN_INT (0x40))); - code = NE; - } - else - { - emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40))); - code = EQ; - } - break; - - case UNORDERED: - emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04))); - code = NE; - break; - case ORDERED: - emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04))); - code = EQ; - break; - - default: - gcc_unreachable (); - } - } - - /* Return the test that should be put into the flags user, i.e. - the bcc, scc, or cmov instruction. */ - return gen_rtx_fmt_ee (code, VOIDmode, - gen_rtx_REG (intcmp_mode, FLAGS_REG), - const0_rtx); -} - -rtx -ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test) -{ - rtx op0, op1, ret; - op0 = ix86_compare_op0; - op1 = ix86_compare_op1; - - if (second_test) - *second_test = NULL_RTX; - if (bypass_test) - *bypass_test = NULL_RTX; - - if (ix86_compare_emitted) - { - ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx); - ix86_compare_emitted = NULL_RTX; - } - else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0))) - { - gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0))); - ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX, - second_test, bypass_test); - } - else - ret = ix86_expand_int_compare (code, op0, op1); - - return ret; -} - -/* Return true if the CODE will result in nontrivial jump sequence. */ -bool -ix86_fp_jump_nontrivial_p (enum rtx_code code) -{ - enum rtx_code bypass_code, first_code, second_code; - if (!TARGET_CMOVE) - return true; - ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); - return bypass_code != UNKNOWN || second_code != UNKNOWN; -} - -void -ix86_expand_branch (enum rtx_code code, rtx label) -{ - rtx tmp; - - /* If we have emitted a compare insn, go straight to simple. - ix86_expand_compare won't emit anything if ix86_compare_emitted - is non NULL. */ - if (ix86_compare_emitted) - goto simple; - - switch (GET_MODE (ix86_compare_op0)) - { - case QImode: - case HImode: - case SImode: - simple: - tmp = ix86_expand_compare (code, NULL, NULL); - tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, - gen_rtx_LABEL_REF (VOIDmode, label), - pc_rtx); - emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp)); - return; - - case SFmode: - case DFmode: - case XFmode: - { - rtvec vec; - int use_fcomi; - enum rtx_code bypass_code, first_code, second_code; - - code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0, - &ix86_compare_op1); - - ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); - - /* Check whether we will use the natural sequence with one jump. If - so, we can expand jump early. Otherwise delay expansion by - creating compound insn to not confuse optimizers. */ - if (bypass_code == UNKNOWN && second_code == UNKNOWN) - { - ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1, - gen_rtx_LABEL_REF (VOIDmode, label), - pc_rtx, NULL_RTX, NULL_RTX); - } - else - { - tmp = gen_rtx_fmt_ee (code, VOIDmode, - ix86_compare_op0, ix86_compare_op1); - tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, - gen_rtx_LABEL_REF (VOIDmode, label), - pc_rtx); - tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp); - - use_fcomi = ix86_use_fcomi_compare (code); - vec = rtvec_alloc (3 + !use_fcomi); - RTVEC_ELT (vec, 0) = tmp; - RTVEC_ELT (vec, 1) - = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FPSR_REG)); - RTVEC_ELT (vec, 2) - = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FLAGS_REG)); - if (! use_fcomi) - RTVEC_ELT (vec, 3) - = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode)); - - emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec)); - } - return; - } - - case DImode: - if (TARGET_64BIT) - goto simple; - case TImode: - /* Expand DImode branch into multiple compare+branch. */ - { - rtx lo[2], hi[2], label2; - enum rtx_code code1, code2, code3; - enum machine_mode submode; - - if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1)) - { - tmp = ix86_compare_op0; - ix86_compare_op0 = ix86_compare_op1; - ix86_compare_op1 = tmp; - code = swap_condition (code); - } - if (GET_MODE (ix86_compare_op0) == DImode) - { - split_di (&ix86_compare_op0, 1, lo+0, hi+0); - split_di (&ix86_compare_op1, 1, lo+1, hi+1); - submode = SImode; - } - else - { - split_ti (&ix86_compare_op0, 1, lo+0, hi+0); - split_ti (&ix86_compare_op1, 1, lo+1, hi+1); - submode = DImode; - } - - /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to - avoid two branches. This costs one extra insn, so disable when - optimizing for size. */ - - if ((code == EQ || code == NE) - && (!optimize_insn_for_size_p () - || hi[1] == const0_rtx || lo[1] == const0_rtx)) - { - rtx xor0, xor1; - - xor1 = hi[0]; - if (hi[1] != const0_rtx) - xor1 = expand_binop (submode, xor_optab, xor1, hi[1], - NULL_RTX, 0, OPTAB_WIDEN); - - xor0 = lo[0]; - if (lo[1] != const0_rtx) - xor0 = expand_binop (submode, xor_optab, xor0, lo[1], - NULL_RTX, 0, OPTAB_WIDEN); - - tmp = expand_binop (submode, ior_optab, xor1, xor0, - NULL_RTX, 0, OPTAB_WIDEN); - - ix86_compare_op0 = tmp; - ix86_compare_op1 = const0_rtx; - ix86_expand_branch (code, label); - return; - } - - /* Otherwise, if we are doing less-than or greater-or-equal-than, - op1 is a constant and the low word is zero, then we can just - examine the high word. Similarly for low word -1 and - less-or-equal-than or greater-than. */ - - if (CONST_INT_P (hi[1])) - switch (code) - { - case LT: case LTU: case GE: case GEU: - if (lo[1] == const0_rtx) - { - ix86_compare_op0 = hi[0]; - ix86_compare_op1 = hi[1]; - ix86_expand_branch (code, label); - return; - } - break; - case LE: case LEU: case GT: case GTU: - if (lo[1] == constm1_rtx) - { - ix86_compare_op0 = hi[0]; - ix86_compare_op1 = hi[1]; - ix86_expand_branch (code, label); - return; - } - break; - default: - break; - } - - /* Otherwise, we need two or three jumps. */ - - label2 = gen_label_rtx (); - - code1 = code; - code2 = swap_condition (code); - code3 = unsigned_condition (code); - - switch (code) - { - case LT: case GT: case LTU: case GTU: - break; - - case LE: code1 = LT; code2 = GT; break; - case GE: code1 = GT; code2 = LT; break; - case LEU: code1 = LTU; code2 = GTU; break; - case GEU: code1 = GTU; code2 = LTU; break; - - case EQ: code1 = UNKNOWN; code2 = NE; break; - case NE: code2 = UNKNOWN; break; - - default: - gcc_unreachable (); - } - - /* - * a < b => - * if (hi(a) < hi(b)) goto true; - * if (hi(a) > hi(b)) goto false; - * if (lo(a) < lo(b)) goto true; - * false: - */ - - ix86_compare_op0 = hi[0]; - ix86_compare_op1 = hi[1]; - - if (code1 != UNKNOWN) - ix86_expand_branch (code1, label); - if (code2 != UNKNOWN) - ix86_expand_branch (code2, label2); - - ix86_compare_op0 = lo[0]; - ix86_compare_op1 = lo[1]; - ix86_expand_branch (code3, label); - - if (code2 != UNKNOWN) - emit_label (label2); - return; - } - - default: - gcc_unreachable (); - } -} - -/* Split branch based on floating point condition. */ -void -ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2, - rtx target1, rtx target2, rtx tmp, rtx pushed) -{ - rtx second, bypass; - rtx label = NULL_RTX; - rtx condition; - int bypass_probability = -1, second_probability = -1, probability = -1; - rtx i; - - if (target2 != pc_rtx) - { - rtx tmp = target2; - code = reverse_condition_maybe_unordered (code); - target2 = target1; - target1 = tmp; - } - - condition = ix86_expand_fp_compare (code, op1, op2, - tmp, &second, &bypass); - - /* Remove pushed operand from stack. */ - if (pushed) - ix86_free_from_memory (GET_MODE (pushed)); - - if (split_branch_probability >= 0) - { - /* Distribute the probabilities across the jumps. - Assume the BYPASS and SECOND to be always test - for UNORDERED. */ - probability = split_branch_probability; - - /* Value of 1 is low enough to make no need for probability - to be updated. Later we may run some experiments and see - if unordered values are more frequent in practice. */ - if (bypass) - bypass_probability = 1; - if (second) - second_probability = 1; - } - if (bypass != NULL_RTX) - { - label = gen_label_rtx (); - i = emit_jump_insn (gen_rtx_SET - (VOIDmode, pc_rtx, - gen_rtx_IF_THEN_ELSE (VOIDmode, - bypass, - gen_rtx_LABEL_REF (VOIDmode, - label), - pc_rtx))); - if (bypass_probability >= 0) - REG_NOTES (i) - = gen_rtx_EXPR_LIST (REG_BR_PROB, - GEN_INT (bypass_probability), - REG_NOTES (i)); - } - i = emit_jump_insn (gen_rtx_SET - (VOIDmode, pc_rtx, - gen_rtx_IF_THEN_ELSE (VOIDmode, - condition, target1, target2))); - if (probability >= 0) - REG_NOTES (i) - = gen_rtx_EXPR_LIST (REG_BR_PROB, - GEN_INT (probability), - REG_NOTES (i)); - if (second != NULL_RTX) - { - i = emit_jump_insn (gen_rtx_SET - (VOIDmode, pc_rtx, - gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1, - target2))); - if (second_probability >= 0) - REG_NOTES (i) - = gen_rtx_EXPR_LIST (REG_BR_PROB, - GEN_INT (second_probability), - REG_NOTES (i)); - } - if (label != NULL_RTX) - emit_label (label); -} - -int -ix86_expand_setcc (enum rtx_code code, rtx dest) -{ - rtx ret, tmp, tmpreg, equiv; - rtx second_test, bypass_test; - - if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode)) - return 0; /* FAIL */ - - gcc_assert (GET_MODE (dest) == QImode); - - ret = ix86_expand_compare (code, &second_test, &bypass_test); - PUT_MODE (ret, QImode); - - tmp = dest; - tmpreg = dest; - - emit_insn (gen_rtx_SET (VOIDmode, tmp, ret)); - if (bypass_test || second_test) - { - rtx test = second_test; - int bypass = 0; - rtx tmp2 = gen_reg_rtx (QImode); - if (bypass_test) - { - gcc_assert (!second_test); - test = bypass_test; - bypass = 1; - PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test))); - } - PUT_MODE (test, QImode); - emit_insn (gen_rtx_SET (VOIDmode, tmp2, test)); - - if (bypass) - emit_insn (gen_andqi3 (tmp, tmpreg, tmp2)); - else - emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2)); - } - - /* Attach a REG_EQUAL note describing the comparison result. */ - if (ix86_compare_op0 && ix86_compare_op1) - { - equiv = simplify_gen_relational (code, QImode, - GET_MODE (ix86_compare_op0), - ix86_compare_op0, ix86_compare_op1); - set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv); - } - - return 1; /* DONE */ -} - -/* Expand comparison setting or clearing carry flag. Return true when - successful and set pop for the operation. */ -static bool -ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop) -{ - enum machine_mode mode = - GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1); - - /* Do not handle DImode compares that go through special path. */ - if (mode == (TARGET_64BIT ? TImode : DImode)) - return false; - - if (SCALAR_FLOAT_MODE_P (mode)) - { - rtx second_test = NULL, bypass_test = NULL; - rtx compare_op, compare_seq; - - gcc_assert (!DECIMAL_FLOAT_MODE_P (mode)); - - /* Shortcut: following common codes never translate - into carry flag compares. */ - if (code == EQ || code == NE || code == UNEQ || code == LTGT - || code == ORDERED || code == UNORDERED) - return false; - - /* These comparisons require zero flag; swap operands so they won't. */ - if ((code == GT || code == UNLE || code == LE || code == UNGT) - && !TARGET_IEEE_FP) - { - rtx tmp = op0; - op0 = op1; - op1 = tmp; - code = swap_condition (code); - } - - /* Try to expand the comparison and verify that we end up with - carry flag based comparison. This fails to be true only when - we decide to expand comparison using arithmetic that is not - too common scenario. */ - start_sequence (); - compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX, - &second_test, &bypass_test); - compare_seq = get_insns (); - end_sequence (); - - if (second_test || bypass_test) - return false; - - if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode - || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode) - code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op)); - else - code = GET_CODE (compare_op); - - if (code != LTU && code != GEU) - return false; - - emit_insn (compare_seq); - *pop = compare_op; - return true; - } - - if (!INTEGRAL_MODE_P (mode)) - return false; - - switch (code) - { - case LTU: - case GEU: - break; - - /* Convert a==0 into (unsigned)a<1. */ - case EQ: - case NE: - if (op1 != const0_rtx) - return false; - op1 = const1_rtx; - code = (code == EQ ? LTU : GEU); - break; - - /* Convert a>b into b<a or a>=b-1. */ - case GTU: - case LEU: - if (CONST_INT_P (op1)) - { - op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0)); - /* Bail out on overflow. We still can swap operands but that - would force loading of the constant into register. */ - if (op1 == const0_rtx - || !x86_64_immediate_operand (op1, GET_MODE (op1))) - return false; - code = (code == GTU ? GEU : LTU); - } - else - { - rtx tmp = op1; - op1 = op0; - op0 = tmp; - code = (code == GTU ? LTU : GEU); - } - break; - - /* Convert a>=0 into (unsigned)a<0x80000000. */ - case LT: - case GE: - if (mode == DImode || op1 != const0_rtx) - return false; - op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode); - code = (code == LT ? GEU : LTU); - break; - case LE: - case GT: - if (mode == DImode || op1 != constm1_rtx) - return false; - op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode); - code = (code == LE ? GEU : LTU); - break; - - default: - return false; - } - /* Swapping operands may cause constant to appear as first operand. */ - if (!nonimmediate_operand (op0, VOIDmode)) - { - if (!can_create_pseudo_p ()) - return false; - op0 = force_reg (mode, op0); - } - ix86_compare_op0 = op0; - ix86_compare_op1 = op1; - *pop = ix86_expand_compare (code, NULL, NULL); - gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU); - return true; -} - -int -ix86_expand_int_movcc (rtx operands[]) -{ - enum rtx_code code = GET_CODE (operands[1]), compare_code; - rtx compare_seq, compare_op; - rtx second_test, bypass_test; - enum machine_mode mode = GET_MODE (operands[0]); - bool sign_bit_compare_p = false;; - - start_sequence (); - compare_op = ix86_expand_compare (code, &second_test, &bypass_test); - compare_seq = get_insns (); - end_sequence (); - - compare_code = GET_CODE (compare_op); - - if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT)) - || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE))) - sign_bit_compare_p = true; - - /* Don't attempt mode expansion here -- if we had to expand 5 or 6 - HImode insns, we'd be swallowed in word prefix ops. */ - - if ((mode != HImode || TARGET_FAST_PREFIX) - && (mode != (TARGET_64BIT ? TImode : DImode)) - && CONST_INT_P (operands[2]) - && CONST_INT_P (operands[3])) - { - rtx out = operands[0]; - HOST_WIDE_INT ct = INTVAL (operands[2]); - HOST_WIDE_INT cf = INTVAL (operands[3]); - HOST_WIDE_INT diff; - - diff = ct - cf; - /* Sign bit compares are better done using shifts than we do by using - sbb. */ - if (sign_bit_compare_p - || ix86_expand_carry_flag_compare (code, ix86_compare_op0, - ix86_compare_op1, &compare_op)) - { - /* Detect overlap between destination and compare sources. */ - rtx tmp = out; - - if (!sign_bit_compare_p) - { - bool fpcmp = false; - - compare_code = GET_CODE (compare_op); - - if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode - || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode) - { - fpcmp = true; - compare_code = ix86_fp_compare_code_to_integer (compare_code); - } - - /* To simplify rest of code, restrict to the GEU case. */ - if (compare_code == LTU) - { - HOST_WIDE_INT tmp = ct; - ct = cf; - cf = tmp; - compare_code = reverse_condition (compare_code); - code = reverse_condition (code); - } - else - { - if (fpcmp) - PUT_CODE (compare_op, - reverse_condition_maybe_unordered - (GET_CODE (compare_op))); - else - PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op))); - } - diff = ct - cf; - - if (reg_overlap_mentioned_p (out, ix86_compare_op0) - || reg_overlap_mentioned_p (out, ix86_compare_op1)) - tmp = gen_reg_rtx (mode); - - if (mode == DImode) - emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op)); - else - emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op)); - } - else - { - if (code == GT || code == GE) - code = reverse_condition (code); - else - { - HOST_WIDE_INT tmp = ct; - ct = cf; - cf = tmp; - diff = ct - cf; - } - tmp = emit_store_flag (tmp, code, ix86_compare_op0, - ix86_compare_op1, VOIDmode, 0, -1); - } - - if (diff == 1) - { - /* - * cmpl op0,op1 - * sbbl dest,dest - * [addl dest, ct] - * - * Size 5 - 8. - */ - if (ct) - tmp = expand_simple_binop (mode, PLUS, - tmp, GEN_INT (ct), - copy_rtx (tmp), 1, OPTAB_DIRECT); - } - else if (cf == -1) - { - /* - * cmpl op0,op1 - * sbbl dest,dest - * orl $ct, dest - * - * Size 8. - */ - tmp = expand_simple_binop (mode, IOR, - tmp, GEN_INT (ct), - copy_rtx (tmp), 1, OPTAB_DIRECT); - } - else if (diff == -1 && ct) - { - /* - * cmpl op0,op1 - * sbbl dest,dest - * notl dest - * [addl dest, cf] - * - * Size 8 - 11. - */ - tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1); - if (cf) - tmp = expand_simple_binop (mode, PLUS, - copy_rtx (tmp), GEN_INT (cf), - copy_rtx (tmp), 1, OPTAB_DIRECT); - } - else - { - /* - * cmpl op0,op1 - * sbbl dest,dest - * [notl dest] - * andl cf - ct, dest - * [addl dest, ct] - * - * Size 8 - 11. - */ - - if (cf == 0) - { - cf = ct; - ct = 0; - tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1); - } - - tmp = expand_simple_binop (mode, AND, - copy_rtx (tmp), - gen_int_mode (cf - ct, mode), - copy_rtx (tmp), 1, OPTAB_DIRECT); - if (ct) - tmp = expand_simple_binop (mode, PLUS, - copy_rtx (tmp), GEN_INT (ct), - copy_rtx (tmp), 1, OPTAB_DIRECT); - } - - if (!rtx_equal_p (tmp, out)) - emit_move_insn (copy_rtx (out), copy_rtx (tmp)); - - return 1; /* DONE */ - } - - if (diff < 0) - { - enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0); - - HOST_WIDE_INT tmp; - tmp = ct, ct = cf, cf = tmp; - diff = -diff; - - if (SCALAR_FLOAT_MODE_P (cmp_mode)) - { - gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode)); - - /* We may be reversing unordered compare to normal compare, that - is not valid in general (we may convert non-trapping condition - to trapping one), however on i386 we currently emit all - comparisons unordered. */ - compare_code = reverse_condition_maybe_unordered (compare_code); - code = reverse_condition_maybe_unordered (code); - } - else - { - compare_code = reverse_condition (compare_code); - code = reverse_condition (code); - } - } - - compare_code = UNKNOWN; - if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT - && CONST_INT_P (ix86_compare_op1)) - { - if (ix86_compare_op1 == const0_rtx - && (code == LT || code == GE)) - compare_code = code; - else if (ix86_compare_op1 == constm1_rtx) - { - if (code == LE) - compare_code = LT; - else if (code == GT) - compare_code = GE; - } - } - - /* Optimize dest = (op0 < 0) ? -1 : cf. */ - if (compare_code != UNKNOWN - && GET_MODE (ix86_compare_op0) == GET_MODE (out) - && (cf == -1 || ct == -1)) - { - /* If lea code below could be used, only optimize - if it results in a 2 insn sequence. */ - - if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8 - || diff == 3 || diff == 5 || diff == 9) - || (compare_code == LT && ct == -1) - || (compare_code == GE && cf == -1)) - { - /* - * notl op1 (if necessary) - * sarl $31, op1 - * orl cf, op1 - */ - if (ct != -1) - { - cf = ct; - ct = -1; - code = reverse_condition (code); - } - - out = emit_store_flag (out, code, ix86_compare_op0, - ix86_compare_op1, VOIDmode, 0, -1); - - out = expand_simple_binop (mode, IOR, - out, GEN_INT (cf), - out, 1, OPTAB_DIRECT); - if (out != operands[0]) - emit_move_insn (operands[0], out); - - return 1; /* DONE */ - } - } - - - if ((diff == 1 || diff == 2 || diff == 4 || diff == 8 - || diff == 3 || diff == 5 || diff == 9) - && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL) - && (mode != DImode - || x86_64_immediate_operand (GEN_INT (cf), VOIDmode))) - { - /* - * xorl dest,dest - * cmpl op1,op2 - * setcc dest - * lea cf(dest*(ct-cf)),dest - * - * Size 14. - * - * This also catches the degenerate setcc-only case. - */ - - rtx tmp; - int nops; - - out = emit_store_flag (out, code, ix86_compare_op0, - ix86_compare_op1, VOIDmode, 0, 1); - - nops = 0; - /* On x86_64 the lea instruction operates on Pmode, so we need - to get arithmetics done in proper mode to match. */ - if (diff == 1) - tmp = copy_rtx (out); - else - { - rtx out1; - out1 = copy_rtx (out); - tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1)); - nops++; - if (diff & 1) - { - tmp = gen_rtx_PLUS (mode, tmp, out1); - nops++; - } - } - if (cf != 0) - { - tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf)); - nops++; - } - if (!rtx_equal_p (tmp, out)) - { - if (nops == 1) - out = force_operand (tmp, copy_rtx (out)); - else - emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp))); - } - if (!rtx_equal_p (out, operands[0])) - emit_move_insn (operands[0], copy_rtx (out)); - - return 1; /* DONE */ - } - - /* - * General case: Jumpful: - * xorl dest,dest cmpl op1, op2 - * cmpl op1, op2 movl ct, dest - * setcc dest jcc 1f - * decl dest movl cf, dest - * andl (cf-ct),dest 1: - * addl ct,dest - * - * Size 20. Size 14. - * - * This is reasonably steep, but branch mispredict costs are - * high on modern cpus, so consider failing only if optimizing - * for space. - */ - - if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL)) - && BRANCH_COST (optimize_insn_for_speed_p (), - false) >= 2) - { - if (cf == 0) - { - enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0); - - cf = ct; - ct = 0; - - if (SCALAR_FLOAT_MODE_P (cmp_mode)) - { - gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode)); - - /* We may be reversing unordered compare to normal compare, - that is not valid in general (we may convert non-trapping - condition to trapping one), however on i386 we currently - emit all comparisons unordered. */ - code = reverse_condition_maybe_unordered (code); - } - else - { - code = reverse_condition (code); - if (compare_code != UNKNOWN) - compare_code = reverse_condition (compare_code); - } - } - - if (compare_code != UNKNOWN) - { - /* notl op1 (if needed) - sarl $31, op1 - andl (cf-ct), op1 - addl ct, op1 - - For x < 0 (resp. x <= -1) there will be no notl, - so if possible swap the constants to get rid of the - complement. - True/false will be -1/0 while code below (store flag - followed by decrement) is 0/-1, so the constants need - to be exchanged once more. */ - - if (compare_code == GE || !cf) - { - code = reverse_condition (code); - compare_code = LT; - } - else - { - HOST_WIDE_INT tmp = cf; - cf = ct; - ct = tmp; - } - - out = emit_store_flag (out, code, ix86_compare_op0, - ix86_compare_op1, VOIDmode, 0, -1); - } - else - { - out = emit_store_flag (out, code, ix86_compare_op0, - ix86_compare_op1, VOIDmode, 0, 1); - - out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx, - copy_rtx (out), 1, OPTAB_DIRECT); - } - - out = expand_simple_binop (mode, AND, copy_rtx (out), - gen_int_mode (cf - ct, mode), - copy_rtx (out), 1, OPTAB_DIRECT); - if (ct) - out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct), - copy_rtx (out), 1, OPTAB_DIRECT); - if (!rtx_equal_p (out, operands[0])) - emit_move_insn (operands[0], copy_rtx (out)); - - return 1; /* DONE */ - } - } - - if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL)) - { - /* Try a few things more with specific constants and a variable. */ - - optab op; - rtx var, orig_out, out, tmp; - - if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2) - return 0; /* FAIL */ - - /* If one of the two operands is an interesting constant, load a - constant with the above and mask it in with a logical operation. */ - - if (CONST_INT_P (operands[2])) - { - var = operands[3]; - if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx) - operands[3] = constm1_rtx, op = and_optab; - else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx) - operands[3] = const0_rtx, op = ior_optab; - else - return 0; /* FAIL */ - } - else if (CONST_INT_P (operands[3])) - { - var = operands[2]; - if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx) - operands[2] = constm1_rtx, op = and_optab; - else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx) - operands[2] = const0_rtx, op = ior_optab; - else - return 0; /* FAIL */ - } - else - return 0; /* FAIL */ - - orig_out = operands[0]; - tmp = gen_reg_rtx (mode); - operands[0] = tmp; - - /* Recurse to get the constant loaded. */ - if (ix86_expand_int_movcc (operands) == 0) - return 0; /* FAIL */ - - /* Mask in the interesting variable. */ - out = expand_binop (mode, op, var, tmp, orig_out, 0, - OPTAB_WIDEN); - if (!rtx_equal_p (out, orig_out)) - emit_move_insn (copy_rtx (orig_out), copy_rtx (out)); - - return 1; /* DONE */ - } - - /* - * For comparison with above, - * - * movl cf,dest - * movl ct,tmp - * cmpl op1,op2 - * cmovcc tmp,dest - * - * Size 15. - */ - - if (! nonimmediate_operand (operands[2], mode)) - operands[2] = force_reg (mode, operands[2]); - if (! nonimmediate_operand (operands[3], mode)) - operands[3] = force_reg (mode, operands[3]); - - if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3])) - { - rtx tmp = gen_reg_rtx (mode); - emit_move_insn (tmp, operands[3]); - operands[3] = tmp; - } - if (second_test && reg_overlap_mentioned_p (operands[0], operands[2])) - { - rtx tmp = gen_reg_rtx (mode); - emit_move_insn (tmp, operands[2]); - operands[2] = tmp; - } - - if (! register_operand (operands[2], VOIDmode) - && (mode == QImode - || ! register_operand (operands[3], VOIDmode))) - operands[2] = force_reg (mode, operands[2]); - - if (mode == QImode - && ! register_operand (operands[3], VOIDmode)) - operands[3] = force_reg (mode, operands[3]); - - emit_insn (compare_seq); - emit_insn (gen_rtx_SET (VOIDmode, operands[0], - gen_rtx_IF_THEN_ELSE (mode, - compare_op, operands[2], - operands[3]))); - if (bypass_test) - emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]), - gen_rtx_IF_THEN_ELSE (mode, - bypass_test, - copy_rtx (operands[3]), - copy_rtx (operands[0])))); - if (second_test) - emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]), - gen_rtx_IF_THEN_ELSE (mode, - second_test, - copy_rtx (operands[2]), - copy_rtx (operands[0])))); - - return 1; /* DONE */ -} - -/* Swap, force into registers, or otherwise massage the two operands - to an sse comparison with a mask result. Thus we differ a bit from - ix86_prepare_fp_compare_args which expects to produce a flags result. - - The DEST operand exists to help determine whether to commute commutative - operators. The POP0/POP1 operands are updated in place. The new - comparison code is returned, or UNKNOWN if not implementable. */ - -static enum rtx_code -ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code, - rtx *pop0, rtx *pop1) -{ - rtx tmp; - - switch (code) - { - case LTGT: - case UNEQ: - /* We have no LTGT as an operator. We could implement it with - NE & ORDERED, but this requires an extra temporary. It's - not clear that it's worth it. */ - return UNKNOWN; - - case LT: - case LE: - case UNGT: - case UNGE: - /* These are supported directly. */ - break; - - case EQ: - case NE: - case UNORDERED: - case ORDERED: - /* For commutative operators, try to canonicalize the destination - operand to be first in the comparison - this helps reload to - avoid extra moves. */ - if (!dest || !rtx_equal_p (dest, *pop1)) - break; - /* FALLTHRU */ - - case GE: - case GT: - case UNLE: - case UNLT: - /* These are not supported directly. Swap the comparison operands - to transform into something that is supported. */ - tmp = *pop0; - *pop0 = *pop1; - *pop1 = tmp; - code = swap_condition (code); - break; - - default: - gcc_unreachable (); - } - - return code; -} - -/* Detect conditional moves that exactly match min/max operational - semantics. Note that this is IEEE safe, as long as we don't - interchange the operands. - - Returns FALSE if this conditional move doesn't match a MIN/MAX, - and TRUE if the operation is successful and instructions are emitted. */ - -static bool -ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0, - rtx cmp_op1, rtx if_true, rtx if_false) -{ - enum machine_mode mode; - bool is_min; - rtx tmp; - - if (code == LT) - ; - else if (code == UNGE) - { - tmp = if_true; - if_true = if_false; - if_false = tmp; - } - else - return false; - - if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false)) - is_min = true; - else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false)) - is_min = false; - else - return false; - - mode = GET_MODE (dest); - - /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here, - but MODE may be a vector mode and thus not appropriate. */ - if (!flag_finite_math_only || !flag_unsafe_math_optimizations) - { - int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX; - rtvec v; - - if_true = force_reg (mode, if_true); - v = gen_rtvec (2, if_true, if_false); - tmp = gen_rtx_UNSPEC (mode, v, u); - } - else - { - code = is_min ? SMIN : SMAX; - tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false); - } - - emit_insn (gen_rtx_SET (VOIDmode, dest, tmp)); - return true; -} - -/* Expand an sse vector comparison. Return the register with the result. */ - -static rtx -ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1, - rtx op_true, rtx op_false) -{ - enum machine_mode mode = GET_MODE (dest); - rtx x; - - cmp_op0 = force_reg (mode, cmp_op0); - if (!nonimmediate_operand (cmp_op1, mode)) - cmp_op1 = force_reg (mode, cmp_op1); - - if (optimize - || reg_overlap_mentioned_p (dest, op_true) - || reg_overlap_mentioned_p (dest, op_false)) - dest = gen_reg_rtx (mode); - - x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1); - emit_insn (gen_rtx_SET (VOIDmode, dest, x)); - - return dest; -} - -/* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical - operations. This is used for both scalar and vector conditional moves. */ - -static void -ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false) -{ - enum machine_mode mode = GET_MODE (dest); - rtx t2, t3, x; - - if (op_false == CONST0_RTX (mode)) - { - op_true = force_reg (mode, op_true); - x = gen_rtx_AND (mode, cmp, op_true); - emit_insn (gen_rtx_SET (VOIDmode, dest, x)); - } - else if (op_true == CONST0_RTX (mode)) - { - op_false = force_reg (mode, op_false); - x = gen_rtx_NOT (mode, cmp); - x = gen_rtx_AND (mode, x, op_false); - emit_insn (gen_rtx_SET (VOIDmode, dest, x)); - } - else if (TARGET_SSE5) - { - rtx pcmov = gen_rtx_SET (mode, dest, - gen_rtx_IF_THEN_ELSE (mode, cmp, - op_true, - op_false)); - emit_insn (pcmov); - } - else - { - op_true = force_reg (mode, op_true); - op_false = force_reg (mode, op_false); - - t2 = gen_reg_rtx (mode); - if (optimize) - t3 = gen_reg_rtx (mode); - else - t3 = dest; - - x = gen_rtx_AND (mode, op_true, cmp); - emit_insn (gen_rtx_SET (VOIDmode, t2, x)); - - x = gen_rtx_NOT (mode, cmp); - x = gen_rtx_AND (mode, x, op_false); - emit_insn (gen_rtx_SET (VOIDmode, t3, x)); - - x = gen_rtx_IOR (mode, t3, t2); - emit_insn (gen_rtx_SET (VOIDmode, dest, x)); - } -} - -/* Expand a floating-point conditional move. Return true if successful. */ - -int -ix86_expand_fp_movcc (rtx operands[]) -{ - enum machine_mode mode = GET_MODE (operands[0]); - enum rtx_code code = GET_CODE (operands[1]); - rtx tmp, compare_op, second_test, bypass_test; - - if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode)) - { - enum machine_mode cmode; - - /* Since we've no cmove for sse registers, don't force bad register - allocation just to gain access to it. Deny movcc when the - comparison mode doesn't match the move mode. */ - cmode = GET_MODE (ix86_compare_op0); - if (cmode == VOIDmode) - cmode = GET_MODE (ix86_compare_op1); - if (cmode != mode) - return 0; - - code = ix86_prepare_sse_fp_compare_args (operands[0], code, - &ix86_compare_op0, - &ix86_compare_op1); - if (code == UNKNOWN) - return 0; - - if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0, - ix86_compare_op1, operands[2], - operands[3])) - return 1; - - tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0, - ix86_compare_op1, operands[2], operands[3]); - ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]); - return 1; - } - - /* The floating point conditional move instructions don't directly - support conditions resulting from a signed integer comparison. */ - - compare_op = ix86_expand_compare (code, &second_test, &bypass_test); - - /* The floating point conditional move instructions don't directly - support signed integer comparisons. */ - - if (!fcmov_comparison_operator (compare_op, VOIDmode)) - { - gcc_assert (!second_test && !bypass_test); - tmp = gen_reg_rtx (QImode); - ix86_expand_setcc (code, tmp); - code = NE; - ix86_compare_op0 = tmp; - ix86_compare_op1 = const0_rtx; - compare_op = ix86_expand_compare (code, &second_test, &bypass_test); - } - if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3])) - { - tmp = gen_reg_rtx (mode); - emit_move_insn (tmp, operands[3]); - operands[3] = tmp; - } - if (second_test && reg_overlap_mentioned_p (operands[0], operands[2])) - { - tmp = gen_reg_rtx (mode); - emit_move_insn (tmp, operands[2]); - operands[2] = tmp; - } - - emit_insn (gen_rtx_SET (VOIDmode, operands[0], - gen_rtx_IF_THEN_ELSE (mode, compare_op, - operands[2], operands[3]))); - if (bypass_test) - emit_insn (gen_rtx_SET (VOIDmode, operands[0], - gen_rtx_IF_THEN_ELSE (mode, bypass_test, - operands[3], operands[0]))); - if (second_test) - emit_insn (gen_rtx_SET (VOIDmode, operands[0], - gen_rtx_IF_THEN_ELSE (mode, second_test, - operands[2], operands[0]))); - - return 1; -} - -/* Expand a floating-point vector conditional move; a vcond operation - rather than a movcc operation. */ - -bool -ix86_expand_fp_vcond (rtx operands[]) -{ - enum rtx_code code = GET_CODE (operands[3]); - rtx cmp; - - code = ix86_prepare_sse_fp_compare_args (operands[0], code, - &operands[4], &operands[5]); - if (code == UNKNOWN) - return false; - - if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4], - operands[5], operands[1], operands[2])) - return true; - - cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5], - operands[1], operands[2]); - ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]); - return true; -} - -/* Expand a signed/unsigned integral vector conditional move. */ - -bool -ix86_expand_int_vcond (rtx operands[]) -{ - enum machine_mode mode = GET_MODE (operands[0]); - enum rtx_code code = GET_CODE (operands[3]); - bool negate = false; - rtx x, cop0, cop1; - - cop0 = operands[4]; - cop1 = operands[5]; - - /* SSE5 supports all of the comparisons on all vector int types. */ - if (!TARGET_SSE5) - { - /* Canonicalize the comparison to EQ, GT, GTU. */ - switch (code) - { - case EQ: - case GT: - case GTU: - break; - - case NE: - case LE: - case LEU: - code = reverse_condition (code); - negate = true; - break; - - case GE: - case GEU: - code = reverse_condition (code); - negate = true; - /* FALLTHRU */ - - case LT: - case LTU: - code = swap_condition (code); - x = cop0, cop0 = cop1, cop1 = x; - break; - - default: - gcc_unreachable (); - } - - /* Only SSE4.1/SSE4.2 supports V2DImode. */ - if (mode == V2DImode) - { - switch (code) - { - case EQ: - /* SSE4.1 supports EQ. */ - if (!TARGET_SSE4_1) - return false; - break; - - case GT: - case GTU: - /* SSE4.2 supports GT/GTU. */ - if (!TARGET_SSE4_2) - return false; - break; - - default: - gcc_unreachable (); - } - } - - /* Unsigned parallel compare is not supported by the hardware. - Play some tricks to turn this into a signed comparison - against 0. */ - if (code == GTU) - { - cop0 = force_reg (mode, cop0); - - switch (mode) - { - case V4SImode: - case V2DImode: - { - rtx t1, t2, mask; - rtx (*gen_sub3) (rtx, rtx, rtx); - - /* Subtract (-(INT MAX) - 1) from both operands to make - them signed. */ - mask = ix86_build_signbit_mask (GET_MODE_INNER (mode), - true, false); - gen_sub3 = (mode == V4SImode - ? gen_subv4si3 : gen_subv2di3); - t1 = gen_reg_rtx (mode); - emit_insn (gen_sub3 (t1, cop0, mask)); - - t2 = gen_reg_rtx (mode); - emit_insn (gen_sub3 (t2, cop1, mask)); - - cop0 = t1; - cop1 = t2; - code = GT; - } - break; - - case V16QImode: - case V8HImode: - /* Perform a parallel unsigned saturating subtraction. */ - x = gen_reg_rtx (mode); - emit_insn (gen_rtx_SET (VOIDmode, x, - gen_rtx_US_MINUS (mode, cop0, cop1))); - - cop0 = x; - cop1 = CONST0_RTX (mode); - code = EQ; - negate = !negate; - break; - - default: - gcc_unreachable (); - } - } - } - - x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1, - operands[1+negate], operands[2-negate]); - - ix86_expand_sse_movcc (operands[0], x, operands[1+negate], - operands[2-negate]); - return true; -} - -/* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is - true if we should do zero extension, else sign extension. HIGH_P is - true if we want the N/2 high elements, else the low elements. */ - -void -ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p) -{ - enum machine_mode imode = GET_MODE (operands[1]); - rtx (*unpack)(rtx, rtx, rtx); - rtx se, dest; - - switch (imode) - { - case V16QImode: - if (high_p) - unpack = gen_vec_interleave_highv16qi; - else - unpack = gen_vec_interleave_lowv16qi; - break; - case V8HImode: - if (high_p) - unpack = gen_vec_interleave_highv8hi; - else - unpack = gen_vec_interleave_lowv8hi; - break; - case V4SImode: - if (high_p) - unpack = gen_vec_interleave_highv4si; - else - unpack = gen_vec_interleave_lowv4si; - break; - default: - gcc_unreachable (); - } - - dest = gen_lowpart (imode, operands[0]); - - if (unsigned_p) - se = force_reg (imode, CONST0_RTX (imode)); - else - se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode), - operands[1], pc_rtx, pc_rtx); - - emit_insn (unpack (dest, operands[1], se)); -} - -/* This function performs the same task as ix86_expand_sse_unpack, - but with SSE4.1 instructions. */ - -void -ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p) -{ - enum machine_mode imode = GET_MODE (operands[1]); - rtx (*unpack)(rtx, rtx); - rtx src, dest; - - switch (imode) - { - case V16QImode: - if (unsigned_p) - unpack = gen_sse4_1_zero_extendv8qiv8hi2; - else - unpack = gen_sse4_1_extendv8qiv8hi2; - break; - case V8HImode: - if (unsigned_p) - unpack = gen_sse4_1_zero_extendv4hiv4si2; - else - unpack = gen_sse4_1_extendv4hiv4si2; - break; - case V4SImode: - if (unsigned_p) - unpack = gen_sse4_1_zero_extendv2siv2di2; - else - unpack = gen_sse4_1_extendv2siv2di2; - break; - default: - gcc_unreachable (); - } - - dest = operands[0]; - if (high_p) - { - /* Shift higher 8 bytes to lower 8 bytes. */ - src = gen_reg_rtx (imode); - emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src), - gen_lowpart (TImode, operands[1]), - GEN_INT (64))); - } - else - src = operands[1]; - - emit_insn (unpack (dest, src)); -} - -/* This function performs the same task as ix86_expand_sse_unpack, - but with sse5 instructions. */ - -void -ix86_expand_sse5_unpack (rtx operands[2], bool unsigned_p, bool high_p) -{ - enum machine_mode imode = GET_MODE (operands[1]); - int pperm_bytes[16]; - int i; - int h = (high_p) ? 8 : 0; - int h2; - int sign_extend; - rtvec v = rtvec_alloc (16); - rtvec vs; - rtx x, p; - rtx op0 = operands[0], op1 = operands[1]; - - switch (imode) - { - case V16QImode: - vs = rtvec_alloc (8); - h2 = (high_p) ? 8 : 0; - for (i = 0; i < 8; i++) - { - pperm_bytes[2*i+0] = PPERM_SRC | PPERM_SRC2 | i | h; - pperm_bytes[2*i+1] = ((unsigned_p) - ? PPERM_ZERO - : PPERM_SIGN | PPERM_SRC2 | i | h); - } - - for (i = 0; i < 16; i++) - RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]); - - for (i = 0; i < 8; i++) - RTVEC_ELT (vs, i) = GEN_INT (i + h2); - - p = gen_rtx_PARALLEL (VOIDmode, vs); - x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v)); - if (unsigned_p) - emit_insn (gen_sse5_pperm_zero_v16qi_v8hi (op0, op1, p, x)); - else - emit_insn (gen_sse5_pperm_sign_v16qi_v8hi (op0, op1, p, x)); - break; - - case V8HImode: - vs = rtvec_alloc (4); - h2 = (high_p) ? 4 : 0; - for (i = 0; i < 4; i++) - { - sign_extend = ((unsigned_p) - ? PPERM_ZERO - : PPERM_SIGN | PPERM_SRC2 | ((2*i) + 1 + h)); - pperm_bytes[4*i+0] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 0 + h); - pperm_bytes[4*i+1] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 1 + h); - pperm_bytes[4*i+2] = sign_extend; - pperm_bytes[4*i+3] = sign_extend; - } - - for (i = 0; i < 16; i++) - RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]); - - for (i = 0; i < 4; i++) - RTVEC_ELT (vs, i) = GEN_INT (i + h2); - - p = gen_rtx_PARALLEL (VOIDmode, vs); - x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v)); - if (unsigned_p) - emit_insn (gen_sse5_pperm_zero_v8hi_v4si (op0, op1, p, x)); - else - emit_insn (gen_sse5_pperm_sign_v8hi_v4si (op0, op1, p, x)); - break; - - case V4SImode: - vs = rtvec_alloc (2); - h2 = (high_p) ? 2 : 0; - for (i = 0; i < 2; i++) - { - sign_extend = ((unsigned_p) - ? PPERM_ZERO - : PPERM_SIGN | PPERM_SRC2 | ((4*i) + 3 + h)); - pperm_bytes[8*i+0] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 0 + h); - pperm_bytes[8*i+1] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 1 + h); - pperm_bytes[8*i+2] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 2 + h); - pperm_bytes[8*i+3] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 3 + h); - pperm_bytes[8*i+4] = sign_extend; - pperm_bytes[8*i+5] = sign_extend; - pperm_bytes[8*i+6] = sign_extend; - pperm_bytes[8*i+7] = sign_extend; - } - - for (i = 0; i < 16; i++) - RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]); - - for (i = 0; i < 2; i++) - RTVEC_ELT (vs, i) = GEN_INT (i + h2); - - p = gen_rtx_PARALLEL (VOIDmode, vs); - x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v)); - if (unsigned_p) - emit_insn (gen_sse5_pperm_zero_v4si_v2di (op0, op1, p, x)); - else - emit_insn (gen_sse5_pperm_sign_v4si_v2di (op0, op1, p, x)); - break; - - default: - gcc_unreachable (); - } - - return; -} - -/* Pack the high bits from OPERANDS[1] and low bits from OPERANDS[2] into the - next narrower integer vector type */ -void -ix86_expand_sse5_pack (rtx operands[3]) -{ - enum machine_mode imode = GET_MODE (operands[0]); - int pperm_bytes[16]; - int i; - rtvec v = rtvec_alloc (16); - rtx x; - rtx op0 = operands[0]; - rtx op1 = operands[1]; - rtx op2 = operands[2]; - - switch (imode) - { - case V16QImode: - for (i = 0; i < 8; i++) - { - pperm_bytes[i+0] = PPERM_SRC | PPERM_SRC1 | (i*2); - pperm_bytes[i+8] = PPERM_SRC | PPERM_SRC2 | (i*2); - } - - for (i = 0; i < 16; i++) - RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]); - - x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v)); - emit_insn (gen_sse5_pperm_pack_v8hi_v16qi (op0, op1, op2, x)); - break; - - case V8HImode: - for (i = 0; i < 4; i++) - { - pperm_bytes[(2*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 0); - pperm_bytes[(2*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 1); - pperm_bytes[(2*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 0); - pperm_bytes[(2*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 1); - } - - for (i = 0; i < 16; i++) - RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]); - - x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v)); - emit_insn (gen_sse5_pperm_pack_v4si_v8hi (op0, op1, op2, x)); - break; - - case V4SImode: - for (i = 0; i < 2; i++) - { - pperm_bytes[(4*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 0); - pperm_bytes[(4*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 1); - pperm_bytes[(4*i)+2] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 2); - pperm_bytes[(4*i)+3] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 3); - pperm_bytes[(4*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 0); - pperm_bytes[(4*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 1); - pperm_bytes[(4*i)+10] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 2); - pperm_bytes[(4*i)+11] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 3); - } - - for (i = 0; i < 16; i++) - RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]); - - x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v)); - emit_insn (gen_sse5_pperm_pack_v2di_v4si (op0, op1, op2, x)); - break; - - default: - gcc_unreachable (); - } - - return; -} - -/* Expand conditional increment or decrement using adb/sbb instructions. - The default case using setcc followed by the conditional move can be - done by generic code. */ -int -ix86_expand_int_addcc (rtx operands[]) -{ - enum rtx_code code = GET_CODE (operands[1]); - rtx compare_op; - rtx val = const0_rtx; - bool fpcmp = false; - enum machine_mode mode = GET_MODE (operands[0]); - - if (operands[3] != const1_rtx - && operands[3] != constm1_rtx) - return 0; - if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0, - ix86_compare_op1, &compare_op)) - return 0; - code = GET_CODE (compare_op); - - if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode - || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode) - { - fpcmp = true; - code = ix86_fp_compare_code_to_integer (code); - } - - if (code != LTU) - { - val = constm1_rtx; - if (fpcmp) - PUT_CODE (compare_op, - reverse_condition_maybe_unordered - (GET_CODE (compare_op))); - else - PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op))); - } - PUT_MODE (compare_op, mode); - - /* Construct either adc or sbb insn. */ - if ((code == LTU) == (operands[3] == constm1_rtx)) - { - switch (GET_MODE (operands[0])) - { - case QImode: - emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op)); - break; - case HImode: - emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op)); - break; - case SImode: - emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op)); - break; - case DImode: - emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op)); - break; - default: - gcc_unreachable (); - } - } - else - { - switch (GET_MODE (operands[0])) - { - case QImode: - emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op)); - break; - case HImode: - emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op)); - break; - case SImode: - emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op)); - break; - case DImode: - emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op)); - break; - default: - gcc_unreachable (); - } - } - return 1; /* DONE */ -} - - -/* Split operands 0 and 1 into SImode parts. Similar to split_di, but - works for floating pointer parameters and nonoffsetable memories. - For pushes, it returns just stack offsets; the values will be saved - in the right order. Maximally three parts are generated. */ - -static int -ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode) -{ - int size; - - if (!TARGET_64BIT) - size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4; - else - size = (GET_MODE_SIZE (mode) + 4) / 8; - - gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand))); - gcc_assert (size >= 2 && size <= 4); - - /* Optimize constant pool reference to immediates. This is used by fp - moves, that force all constants to memory to allow combining. */ - if (MEM_P (operand) && MEM_READONLY_P (operand)) - { - rtx tmp = maybe_get_pool_constant (operand); - if (tmp) - operand = tmp; - } - - if (MEM_P (operand) && !offsettable_memref_p (operand)) - { - /* The only non-offsetable memories we handle are pushes. */ - int ok = push_operand (operand, VOIDmode); - - gcc_assert (ok); - - operand = copy_rtx (operand); - PUT_MODE (operand, Pmode); - parts[0] = parts[1] = parts[2] = parts[3] = operand; - return size; - } - - if (GET_CODE (operand) == CONST_VECTOR) - { - enum machine_mode imode = int_mode_for_mode (mode); - /* Caution: if we looked through a constant pool memory above, - the operand may actually have a different mode now. That's - ok, since we want to pun this all the way back to an integer. */ - operand = simplify_subreg (imode, operand, GET_MODE (operand), 0); - gcc_assert (operand != NULL); - mode = imode; - } - - if (!TARGET_64BIT) - { - if (mode == DImode) - split_di (&operand, 1, &parts[0], &parts[1]); - else - { - int i; - - if (REG_P (operand)) - { - gcc_assert (reload_completed); - for (i = 0; i < size; i++) - parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i); - } - else if (offsettable_memref_p (operand)) - { - operand = adjust_address (operand, SImode, 0); - parts[0] = operand; - for (i = 1; i < size; i++) - parts[i] = adjust_address (operand, SImode, 4 * i); - } - else if (GET_CODE (operand) == CONST_DOUBLE) - { - REAL_VALUE_TYPE r; - long l[4]; - - REAL_VALUE_FROM_CONST_DOUBLE (r, operand); - switch (mode) - { - case TFmode: - real_to_target (l, &r, mode); - parts[3] = gen_int_mode (l[3], SImode); - parts[2] = gen_int_mode (l[2], SImode); - break; - case XFmode: - REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l); - parts[2] = gen_int_mode (l[2], SImode); - break; - case DFmode: - REAL_VALUE_TO_TARGET_DOUBLE (r, l); - break; - default: - gcc_unreachable (); - } - parts[1] = gen_int_mode (l[1], SImode); - parts[0] = gen_int_mode (l[0], SImode); - } - else - gcc_unreachable (); - } - } - else - { - if (mode == TImode) - split_ti (&operand, 1, &parts[0], &parts[1]); - if (mode == XFmode || mode == TFmode) - { - enum machine_mode upper_mode = mode==XFmode ? SImode : DImode; - if (REG_P (operand)) - { - gcc_assert (reload_completed); - parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0); - parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1); - } - else if (offsettable_memref_p (operand)) - { - operand = adjust_address (operand, DImode, 0); - parts[0] = operand; - parts[1] = adjust_address (operand, upper_mode, 8); - } - else if (GET_CODE (operand) == CONST_DOUBLE) - { - REAL_VALUE_TYPE r; - long l[4]; - - REAL_VALUE_FROM_CONST_DOUBLE (r, operand); - real_to_target (l, &r, mode); - - /* Do not use shift by 32 to avoid warning on 32bit systems. */ - if (HOST_BITS_PER_WIDE_INT >= 64) - parts[0] - = gen_int_mode - ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1)) - + ((((HOST_WIDE_INT) l[1]) << 31) << 1), - DImode); - else - parts[0] = immed_double_const (l[0], l[1], DImode); - - if (upper_mode == SImode) - parts[1] = gen_int_mode (l[2], SImode); - else if (HOST_BITS_PER_WIDE_INT >= 64) - parts[1] - = gen_int_mode - ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1)) - + ((((HOST_WIDE_INT) l[3]) << 31) << 1), - DImode); - else - parts[1] = immed_double_const (l[2], l[3], DImode); - } - else - gcc_unreachable (); - } - } - - return size; -} - -/* Emit insns to perform a move or push of DI, DF, XF, and TF values. - Return false when normal moves are needed; true when all required - insns have been emitted. Operands 2-4 contain the input values - int the correct order; operands 5-7 contain the output values. */ - -void -ix86_split_long_move (rtx operands[]) -{ - rtx part[2][4]; - int nparts, i, j; - int push = 0; - int collisions = 0; - enum machine_mode mode = GET_MODE (operands[0]); - bool collisionparts[4]; - - /* The DFmode expanders may ask us to move double. - For 64bit target this is single move. By hiding the fact - here we simplify i386.md splitters. */ - if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT) - { - /* Optimize constant pool reference to immediates. This is used by - fp moves, that force all constants to memory to allow combining. */ - - if (MEM_P (operands[1]) - && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF - && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0))) - operands[1] = get_pool_constant (XEXP (operands[1], 0)); - if (push_operand (operands[0], VOIDmode)) - { - operands[0] = copy_rtx (operands[0]); - PUT_MODE (operands[0], Pmode); - } - else - operands[0] = gen_lowpart (DImode, operands[0]); - operands[1] = gen_lowpart (DImode, operands[1]); - emit_move_insn (operands[0], operands[1]); - return; - } - - /* The only non-offsettable memory we handle is push. */ - if (push_operand (operands[0], VOIDmode)) - push = 1; - else - gcc_assert (!MEM_P (operands[0]) - || offsettable_memref_p (operands[0])); - - nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0])); - ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0])); - - /* When emitting push, take care for source operands on the stack. */ - if (push && MEM_P (operands[1]) - && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1])) - { - rtx src_base = XEXP (part[1][nparts - 1], 0); - - /* Compensate for the stack decrement by 4. */ - if (!TARGET_64BIT && nparts == 3 - && mode == XFmode && TARGET_128BIT_LONG_DOUBLE) - src_base = plus_constant (src_base, 4); - - /* src_base refers to the stack pointer and is - automatically decreased by emitted push. */ - for (i = 0; i < nparts; i++) - part[1][i] = change_address (part[1][i], - GET_MODE (part[1][i]), src_base); - } - - /* We need to do copy in the right order in case an address register - of the source overlaps the destination. */ - if (REG_P (part[0][0]) && MEM_P (part[1][0])) - { - rtx tmp; - - for (i = 0; i < nparts; i++) - { - collisionparts[i] - = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0)); - if (collisionparts[i]) - collisions++; - } - - /* Collision in the middle part can be handled by reordering. */ - if (collisions == 1 && nparts == 3 && collisionparts [1]) - { - tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp; - tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp; - } - else if (collisions == 1 - && nparts == 4 - && (collisionparts [1] || collisionparts [2])) - { - if (collisionparts [1]) - { - tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp; - tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp; - } - else - { - tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp; - tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp; - } - } - - /* If there are more collisions, we can't handle it by reordering. - Do an lea to the last part and use only one colliding move. */ - else if (collisions > 1) - { - rtx base; - - collisions = 1; - - base = part[0][nparts - 1]; - - /* Handle the case when the last part isn't valid for lea. - Happens in 64-bit mode storing the 12-byte XFmode. */ - if (GET_MODE (base) != Pmode) - base = gen_rtx_REG (Pmode, REGNO (base)); - - emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0))); - part[1][0] = replace_equiv_address (part[1][0], base); - for (i = 1; i < nparts; i++) - { - tmp = plus_constant (base, UNITS_PER_WORD * i); - part[1][i] = replace_equiv_address (part[1][i], tmp); - } - } - } - - if (push) - { - if (!TARGET_64BIT) - { - if (nparts == 3) - { - if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode) - emit_insn (gen_addsi3 (stack_pointer_rtx, - stack_pointer_rtx, GEN_INT (-4))); - emit_move_insn (part[0][2], part[1][2]); - } - else if (nparts == 4) - { - emit_move_insn (part[0][3], part[1][3]); - emit_move_insn (part[0][2], part[1][2]); - } - } - else - { - /* In 64bit mode we don't have 32bit push available. In case this is - register, it is OK - we will just use larger counterpart. We also - retype memory - these comes from attempt to avoid REX prefix on - moving of second half of TFmode value. */ - if (GET_MODE (part[1][1]) == SImode) - { - switch (GET_CODE (part[1][1])) - { - case MEM: - part[1][1] = adjust_address (part[1][1], DImode, 0); - break; - - case REG: - part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1])); - break; - - default: - gcc_unreachable (); - } - - if (GET_MODE (part[1][0]) == SImode) - part[1][0] = part[1][1]; - } - } - emit_move_insn (part[0][1], part[1][1]); - emit_move_insn (part[0][0], part[1][0]); - return; - } - - /* Choose correct order to not overwrite the source before it is copied. */ - if ((REG_P (part[0][0]) - && REG_P (part[1][1]) - && (REGNO (part[0][0]) == REGNO (part[1][1]) - || (nparts == 3 - && REGNO (part[0][0]) == REGNO (part[1][2])) - || (nparts == 4 - && REGNO (part[0][0]) == REGNO (part[1][3])))) - || (collisions > 0 - && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))) - { - for (i = 0, j = nparts - 1; i < nparts; i++, j--) - { - operands[2 + i] = part[0][j]; - operands[6 + i] = part[1][j]; - } - } - else - { - for (i = 0; i < nparts; i++) - { - operands[2 + i] = part[0][i]; - operands[6 + i] = part[1][i]; - } - } - - /* If optimizing for size, attempt to locally unCSE nonzero constants. */ - if (optimize_insn_for_size_p ()) - { - for (j = 0; j < nparts - 1; j++) - if (CONST_INT_P (operands[6 + j]) - && operands[6 + j] != const0_rtx - && REG_P (operands[2 + j])) - for (i = j; i < nparts - 1; i++) - if (CONST_INT_P (operands[7 + i]) - && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j])) - operands[7 + i] = operands[2 + j]; - } - - for (i = 0; i < nparts; i++) - emit_move_insn (operands[2 + i], operands[6 + i]); - - return; -} - -/* Helper function of ix86_split_ashl used to generate an SImode/DImode - left shift by a constant, either using a single shift or - a sequence of add instructions. */ - -static void -ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode) -{ - if (count == 1) - { - emit_insn ((mode == DImode - ? gen_addsi3 - : gen_adddi3) (operand, operand, operand)); - } - else if (!optimize_insn_for_size_p () - && count * ix86_cost->add <= ix86_cost->shift_const) - { - int i; - for (i=0; i<count; i++) - { - emit_insn ((mode == DImode - ? gen_addsi3 - : gen_adddi3) (operand, operand, operand)); - } - } - else - emit_insn ((mode == DImode - ? gen_ashlsi3 - : gen_ashldi3) (operand, operand, GEN_INT (count))); -} - -void -ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode) -{ - rtx low[2], high[2]; - int count; - const int single_width = mode == DImode ? 32 : 64; - - if (CONST_INT_P (operands[2])) - { - (mode == DImode ? split_di : split_ti) (operands, 2, low, high); - count = INTVAL (operands[2]) & (single_width * 2 - 1); - - if (count >= single_width) - { - emit_move_insn (high[0], low[1]); - emit_move_insn (low[0], const0_rtx); - - if (count > single_width) - ix86_expand_ashl_const (high[0], count - single_width, mode); - } - else - { - if (!rtx_equal_p (operands[0], operands[1])) - emit_move_insn (operands[0], operands[1]); - emit_insn ((mode == DImode - ? gen_x86_shld - : gen_x86_64_shld) (high[0], low[0], GEN_INT (count))); - ix86_expand_ashl_const (low[0], count, mode); - } - return; - } - - (mode == DImode ? split_di : split_ti) (operands, 1, low, high); - - if (operands[1] == const1_rtx) - { - /* Assuming we've chosen a QImode capable registers, then 1 << N - can be done with two 32/64-bit shifts, no branches, no cmoves. */ - if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0])) - { - rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG); - - ix86_expand_clear (low[0]); - ix86_expand_clear (high[0]); - emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width))); - - d = gen_lowpart (QImode, low[0]); - d = gen_rtx_STRICT_LOW_PART (VOIDmode, d); - s = gen_rtx_EQ (QImode, flags, const0_rtx); - emit_insn (gen_rtx_SET (VOIDmode, d, s)); - - d = gen_lowpart (QImode, high[0]); - d = gen_rtx_STRICT_LOW_PART (VOIDmode, d); - s = gen_rtx_NE (QImode, flags, const0_rtx); - emit_insn (gen_rtx_SET (VOIDmode, d, s)); - } - - /* Otherwise, we can get the same results by manually performing - a bit extract operation on bit 5/6, and then performing the two - shifts. The two methods of getting 0/1 into low/high are exactly - the same size. Avoiding the shift in the bit extract case helps - pentium4 a bit; no one else seems to care much either way. */ - else - { - rtx x; - - if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ()) - x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]); - else - x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]); - emit_insn (gen_rtx_SET (VOIDmode, high[0], x)); - - emit_insn ((mode == DImode - ? gen_lshrsi3 - : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6))); - emit_insn ((mode == DImode - ? gen_andsi3 - : gen_anddi3) (high[0], high[0], GEN_INT (1))); - emit_move_insn (low[0], high[0]); - emit_insn ((mode == DImode - ? gen_xorsi3 - : gen_xordi3) (low[0], low[0], GEN_INT (1))); - } - - emit_insn ((mode == DImode - ? gen_ashlsi3 - : gen_ashldi3) (low[0], low[0], operands[2])); - emit_insn ((mode == DImode - ? gen_ashlsi3 - : gen_ashldi3) (high[0], high[0], operands[2])); - return; - } - - if (operands[1] == constm1_rtx) - { - /* For -1 << N, we can avoid the shld instruction, because we - know that we're shifting 0...31/63 ones into a -1. */ - emit_move_insn (low[0], constm1_rtx); - if (optimize_insn_for_size_p ()) - emit_move_insn (high[0], low[0]); - else - emit_move_insn (high[0], constm1_rtx); - } - else - { - if (!rtx_equal_p (operands[0], operands[1])) - emit_move_insn (operands[0], operands[1]); - - (mode == DImode ? split_di : split_ti) (operands, 1, low, high); - emit_insn ((mode == DImode - ? gen_x86_shld - : gen_x86_64_shld) (high[0], low[0], operands[2])); - } - - emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2])); - - if (TARGET_CMOVE && scratch) - { - ix86_expand_clear (scratch); - emit_insn ((mode == DImode - ? gen_x86_shift_adj_1 - : gen_x86_64_shift_adj_1) (high[0], low[0], operands[2], - scratch)); - } - else - emit_insn ((mode == DImode - ? gen_x86_shift_adj_2 - : gen_x86_64_shift_adj_2) (high[0], low[0], operands[2])); -} - -void -ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode) -{ - rtx low[2], high[2]; - int count; - const int single_width = mode == DImode ? 32 : 64; - - if (CONST_INT_P (operands[2])) - { - (mode == DImode ? split_di : split_ti) (operands, 2, low, high); - count = INTVAL (operands[2]) & (single_width * 2 - 1); - - if (count == single_width * 2 - 1) - { - emit_move_insn (high[0], high[1]); - emit_insn ((mode == DImode - ? gen_ashrsi3 - : gen_ashrdi3) (high[0], high[0], - GEN_INT (single_width - 1))); - emit_move_insn (low[0], high[0]); - - } - else if (count >= single_width) - { - emit_move_insn (low[0], high[1]); - emit_move_insn (high[0], low[0]); - emit_insn ((mode == DImode - ? gen_ashrsi3 - : gen_ashrdi3) (high[0], high[0], - GEN_INT (single_width - 1))); - if (count > single_width) - emit_insn ((mode == DImode - ? gen_ashrsi3 - : gen_ashrdi3) (low[0], low[0], - GEN_INT (count - single_width))); - } - else - { - if (!rtx_equal_p (operands[0], operands[1])) - emit_move_insn (operands[0], operands[1]); - emit_insn ((mode == DImode - ? gen_x86_shrd - : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count))); - emit_insn ((mode == DImode - ? gen_ashrsi3 - : gen_ashrdi3) (high[0], high[0], GEN_INT (count))); - } - } - else - { - if (!rtx_equal_p (operands[0], operands[1])) - emit_move_insn (operands[0], operands[1]); - - (mode == DImode ? split_di : split_ti) (operands, 1, low, high); - - emit_insn ((mode == DImode - ? gen_x86_shrd - : gen_x86_64_shrd) (low[0], high[0], operands[2])); - emit_insn ((mode == DImode - ? gen_ashrsi3 - : gen_ashrdi3) (high[0], high[0], operands[2])); - - if (TARGET_CMOVE && scratch) - { - emit_move_insn (scratch, high[0]); - emit_insn ((mode == DImode - ? gen_ashrsi3 - : gen_ashrdi3) (scratch, scratch, - GEN_INT (single_width - 1))); - emit_insn ((mode == DImode - ? gen_x86_shift_adj_1 - : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2], - scratch)); - } - else - emit_insn ((mode == DImode - ? gen_x86_shift_adj_3 - : gen_x86_64_shift_adj_3) (low[0], high[0], operands[2])); - } -} - -void -ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode) -{ - rtx low[2], high[2]; - int count; - const int single_width = mode == DImode ? 32 : 64; - - if (CONST_INT_P (operands[2])) - { - (mode == DImode ? split_di : split_ti) (operands, 2, low, high); - count = INTVAL (operands[2]) & (single_width * 2 - 1); - - if (count >= single_width) - { - emit_move_insn (low[0], high[1]); - ix86_expand_clear (high[0]); - - if (count > single_width) - emit_insn ((mode == DImode - ? gen_lshrsi3 - : gen_lshrdi3) (low[0], low[0], - GEN_INT (count - single_width))); - } - else - { - if (!rtx_equal_p (operands[0], operands[1])) - emit_move_insn (operands[0], operands[1]); - emit_insn ((mode == DImode - ? gen_x86_shrd - : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count))); - emit_insn ((mode == DImode - ? gen_lshrsi3 - : gen_lshrdi3) (high[0], high[0], GEN_INT (count))); - } - } - else - { - if (!rtx_equal_p (operands[0], operands[1])) - emit_move_insn (operands[0], operands[1]); - - (mode == DImode ? split_di : split_ti) (operands, 1, low, high); - - emit_insn ((mode == DImode - ? gen_x86_shrd - : gen_x86_64_shrd) (low[0], high[0], operands[2])); - emit_insn ((mode == DImode - ? gen_lshrsi3 - : gen_lshrdi3) (high[0], high[0], operands[2])); - - /* Heh. By reversing the arguments, we can reuse this pattern. */ - if (TARGET_CMOVE && scratch) - { - ix86_expand_clear (scratch); - emit_insn ((mode == DImode - ? gen_x86_shift_adj_1 - : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2], - scratch)); - } - else - emit_insn ((mode == DImode - ? gen_x86_shift_adj_2 - : gen_x86_64_shift_adj_2) (low[0], high[0], operands[2])); - } -} - -/* Predict just emitted jump instruction to be taken with probability PROB. */ -static void -predict_jump (int prob) -{ - rtx insn = get_last_insn (); - gcc_assert (JUMP_P (insn)); - REG_NOTES (insn) - = gen_rtx_EXPR_LIST (REG_BR_PROB, - GEN_INT (prob), - REG_NOTES (insn)); -} - -/* Helper function for the string operations below. Dest VARIABLE whether - it is aligned to VALUE bytes. If true, jump to the label. */ -static rtx -ix86_expand_aligntest (rtx variable, int value, bool epilogue) -{ - rtx label = gen_label_rtx (); - rtx tmpcount = gen_reg_rtx (GET_MODE (variable)); - if (GET_MODE (variable) == DImode) - emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value))); - else - emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value))); - emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable), - 1, label); - if (epilogue) - predict_jump (REG_BR_PROB_BASE * 50 / 100); - else - predict_jump (REG_BR_PROB_BASE * 90 / 100); - return label; -} - -/* Adjust COUNTER by the VALUE. */ -static void -ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value) -{ - if (GET_MODE (countreg) == DImode) - emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value))); - else - emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value))); -} - -/* Zero extend possibly SImode EXP to Pmode register. */ -rtx -ix86_zero_extend_to_Pmode (rtx exp) -{ - rtx r; - if (GET_MODE (exp) == VOIDmode) - return force_reg (Pmode, exp); - if (GET_MODE (exp) == Pmode) - return copy_to_mode_reg (Pmode, exp); - r = gen_reg_rtx (Pmode); - emit_insn (gen_zero_extendsidi2 (r, exp)); - return r; -} - -/* Divide COUNTREG by SCALE. */ -static rtx -scale_counter (rtx countreg, int scale) -{ - rtx sc; - rtx piece_size_mask; - - if (scale == 1) - return countreg; - if (CONST_INT_P (countreg)) - return GEN_INT (INTVAL (countreg) / scale); - gcc_assert (REG_P (countreg)); - - piece_size_mask = GEN_INT (scale - 1); - sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg, - GEN_INT (exact_log2 (scale)), - NULL, 1, OPTAB_DIRECT); - return sc; -} - -/* Return mode for the memcpy/memset loop counter. Prefer SImode over - DImode for constant loop counts. */ - -static enum machine_mode -counter_mode (rtx count_exp) -{ - if (GET_MODE (count_exp) != VOIDmode) - return GET_MODE (count_exp); - if (GET_CODE (count_exp) != CONST_INT) - return Pmode; - if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff)) - return DImode; - return SImode; -} - -/* When SRCPTR is non-NULL, output simple loop to move memory - pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times, - overall size is COUNT specified in bytes. When SRCPTR is NULL, output the - equivalent loop to set memory by VALUE (supposed to be in MODE). - - The size is rounded down to whole number of chunk size moved at once. - SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */ - - -static void -expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem, - rtx destptr, rtx srcptr, rtx value, - rtx count, enum machine_mode mode, int unroll, - int expected_size) -{ - rtx out_label, top_label, iter, tmp; - enum machine_mode iter_mode = counter_mode (count); - rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll); - rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1)); - rtx size; - rtx x_addr; - rtx y_addr; - int i; - - top_label = gen_label_rtx (); - out_label = gen_label_rtx (); - iter = gen_reg_rtx (iter_mode); - - size = expand_simple_binop (iter_mode, AND, count, piece_size_mask, - NULL, 1, OPTAB_DIRECT); - /* Those two should combine. */ - if (piece_size == const1_rtx) - { - emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode, - true, out_label); - predict_jump (REG_BR_PROB_BASE * 10 / 100); - } - emit_move_insn (iter, const0_rtx); - - emit_label (top_label); - - tmp = convert_modes (Pmode, iter_mode, iter, true); - x_addr = gen_rtx_PLUS (Pmode, destptr, tmp); - destmem = change_address (destmem, mode, x_addr); - - if (srcmem) - { - y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp)); - srcmem = change_address (srcmem, mode, y_addr); - - /* When unrolling for chips that reorder memory reads and writes, - we can save registers by using single temporary. - Also using 4 temporaries is overkill in 32bit mode. */ - if (!TARGET_64BIT && 0) - { - for (i = 0; i < unroll; i++) - { - if (i) - { - destmem = - adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode)); - srcmem = - adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode)); - } - emit_move_insn (destmem, srcmem); - } - } - else - { - rtx tmpreg[4]; - gcc_assert (unroll <= 4); - for (i = 0; i < unroll; i++) - { - tmpreg[i] = gen_reg_rtx (mode); - if (i) - { - srcmem = - adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode)); - } - emit_move_insn (tmpreg[i], srcmem); - } - for (i = 0; i < unroll; i++) - { - if (i) - { - destmem = - adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode)); - } - emit_move_insn (destmem, tmpreg[i]); - } - } - } - else - for (i = 0; i < unroll; i++) - { - if (i) - destmem = - adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode)); - emit_move_insn (destmem, value); - } - - tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter, - true, OPTAB_LIB_WIDEN); - if (tmp != iter) - emit_move_insn (iter, tmp); - - emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode, - true, top_label); - if (expected_size != -1) - { - expected_size /= GET_MODE_SIZE (mode) * unroll; - if (expected_size == 0) - predict_jump (0); - else if (expected_size > REG_BR_PROB_BASE) - predict_jump (REG_BR_PROB_BASE - 1); - else - predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size); - } - else - predict_jump (REG_BR_PROB_BASE * 80 / 100); - iter = ix86_zero_extend_to_Pmode (iter); - tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr, - true, OPTAB_LIB_WIDEN); - if (tmp != destptr) - emit_move_insn (destptr, tmp); - if (srcptr) - { - tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr, - true, OPTAB_LIB_WIDEN); - if (tmp != srcptr) - emit_move_insn (srcptr, tmp); - } - emit_label (out_label); -} - -/* Output "rep; mov" instruction. - Arguments have same meaning as for previous function */ -static void -expand_movmem_via_rep_mov (rtx destmem, rtx srcmem, - rtx destptr, rtx srcptr, - rtx count, - enum machine_mode mode) -{ - rtx destexp; - rtx srcexp; - rtx countreg; - - /* If the size is known, it is shorter to use rep movs. */ - if (mode == QImode && CONST_INT_P (count) - && !(INTVAL (count) & 3)) - mode = SImode; - - if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode) - destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0); - if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode) - srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0); - countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode))); - if (mode != QImode) - { - destexp = gen_rtx_ASHIFT (Pmode, countreg, - GEN_INT (exact_log2 (GET_MODE_SIZE (mode)))); - destexp = gen_rtx_PLUS (Pmode, destexp, destptr); - srcexp = gen_rtx_ASHIFT (Pmode, countreg, - GEN_INT (exact_log2 (GET_MODE_SIZE (mode)))); - srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr); - } - else - { - destexp = gen_rtx_PLUS (Pmode, destptr, countreg); - srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg); - } - if (CONST_INT_P (count)) - { - count = GEN_INT (INTVAL (count) - & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1)); - destmem = shallow_copy_rtx (destmem); - srcmem = shallow_copy_rtx (srcmem); - set_mem_size (destmem, count); - set_mem_size (srcmem, count); - } - else - { - if (MEM_SIZE (destmem)) - set_mem_size (destmem, NULL_RTX); - if (MEM_SIZE (srcmem)) - set_mem_size (srcmem, NULL_RTX); - } - emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg, - destexp, srcexp)); -} - -/* Output "rep; stos" instruction. - Arguments have same meaning as for previous function */ -static void -expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value, - rtx count, enum machine_mode mode, - rtx orig_value) -{ - rtx destexp; - rtx countreg; - - if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode) - destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0); - value = force_reg (mode, gen_lowpart (mode, value)); - countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode))); - if (mode != QImode) - { - destexp = gen_rtx_ASHIFT (Pmode, countreg, - GEN_INT (exact_log2 (GET_MODE_SIZE (mode)))); - destexp = gen_rtx_PLUS (Pmode, destexp, destptr); - } - else - destexp = gen_rtx_PLUS (Pmode, destptr, countreg); - if (orig_value == const0_rtx && CONST_INT_P (count)) - { - count = GEN_INT (INTVAL (count) - & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1)); - destmem = shallow_copy_rtx (destmem); - set_mem_size (destmem, count); - } - else if (MEM_SIZE (destmem)) - set_mem_size (destmem, NULL_RTX); - emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp)); -} - -static void -emit_strmov (rtx destmem, rtx srcmem, - rtx destptr, rtx srcptr, enum machine_mode mode, int offset) -{ - rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset); - rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset); - emit_insn (gen_strmov (destptr, dest, srcptr, src)); -} - -/* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */ -static void -expand_movmem_epilogue (rtx destmem, rtx srcmem, - rtx destptr, rtx srcptr, rtx count, int max_size) -{ - rtx src, dest; - if (CONST_INT_P (count)) - { - HOST_WIDE_INT countval = INTVAL (count); - int offset = 0; - - if ((countval & 0x10) && max_size > 16) - { - if (TARGET_64BIT) - { - emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset); - emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8); - } - else - gcc_unreachable (); - offset += 16; - } - if ((countval & 0x08) && max_size > 8) - { - if (TARGET_64BIT) - emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset); - else - { - emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset); - emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4); - } - offset += 8; - } - if ((countval & 0x04) && max_size > 4) - { - emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset); - offset += 4; - } - if ((countval & 0x02) && max_size > 2) - { - emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset); - offset += 2; - } - if ((countval & 0x01) && max_size > 1) - { - emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset); - offset += 1; - } - return; - } - if (max_size > 8) - { - count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1), - count, 1, OPTAB_DIRECT); - expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL, - count, QImode, 1, 4); - return; - } - - /* When there are stringops, we can cheaply increase dest and src pointers. - Otherwise we save code size by maintaining offset (zero is readily - available from preceding rep operation) and using x86 addressing modes. - */ - if (TARGET_SINGLE_STRINGOP) - { - if (max_size > 4) - { - rtx label = ix86_expand_aligntest (count, 4, true); - src = change_address (srcmem, SImode, srcptr); - dest = change_address (destmem, SImode, destptr); - emit_insn (gen_strmov (destptr, dest, srcptr, src)); - emit_label (label); - LABEL_NUSES (label) = 1; - } - if (max_size > 2) - { - rtx label = ix86_expand_aligntest (count, 2, true); - src = change_address (srcmem, HImode, srcptr); - dest = change_address (destmem, HImode, destptr); - emit_insn (gen_strmov (destptr, dest, srcptr, src)); - emit_label (label); - LABEL_NUSES (label) = 1; - } - if (max_size > 1) - { - rtx label = ix86_expand_aligntest (count, 1, true); - src = change_address (srcmem, QImode, srcptr); - dest = change_address (destmem, QImode, destptr); - emit_insn (gen_strmov (destptr, dest, srcptr, src)); - emit_label (label); - LABEL_NUSES (label) = 1; - } - } - else - { - rtx offset = force_reg (Pmode, const0_rtx); - rtx tmp; - - if (max_size > 4) - { - rtx label = ix86_expand_aligntest (count, 4, true); - src = change_address (srcmem, SImode, srcptr); - dest = change_address (destmem, SImode, destptr); - emit_move_insn (dest, src); - tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL, - true, OPTAB_LIB_WIDEN); - if (tmp != offset) - emit_move_insn (offset, tmp); - emit_label (label); - LABEL_NUSES (label) = 1; - } - if (max_size > 2) - { - rtx label = ix86_expand_aligntest (count, 2, true); - tmp = gen_rtx_PLUS (Pmode, srcptr, offset); - src = change_address (srcmem, HImode, tmp); - tmp = gen_rtx_PLUS (Pmode, destptr, offset); - dest = change_address (destmem, HImode, tmp); - emit_move_insn (dest, src); - tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp, - true, OPTAB_LIB_WIDEN); - if (tmp != offset) - emit_move_insn (offset, tmp); - emit_label (label); - LABEL_NUSES (label) = 1; - } - if (max_size > 1) - { - rtx label = ix86_expand_aligntest (count, 1, true); - tmp = gen_rtx_PLUS (Pmode, srcptr, offset); - src = change_address (srcmem, QImode, tmp); - tmp = gen_rtx_PLUS (Pmode, destptr, offset); - dest = change_address (destmem, QImode, tmp); - emit_move_insn (dest, src); - emit_label (label); - LABEL_NUSES (label) = 1; - } - } -} - -/* Output code to set at most count & (max_size - 1) bytes starting by DEST. */ -static void -expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value, - rtx count, int max_size) -{ - count = - expand_simple_binop (counter_mode (count), AND, count, - GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT); - expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL, - gen_lowpart (QImode, value), count, QImode, - 1, max_size / 2); -} - -/* Output code to set at most count & (max_size - 1) bytes starting by DEST. */ -static void -expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size) -{ - rtx dest; - - if (CONST_INT_P (count)) - { - HOST_WIDE_INT countval = INTVAL (count); - int offset = 0; - - if ((countval & 0x10) && max_size > 16) - { - if (TARGET_64BIT) - { - dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset); - emit_insn (gen_strset (destptr, dest, value)); - dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8); - emit_insn (gen_strset (destptr, dest, value)); - } - else - gcc_unreachable (); - offset += 16; - } - if ((countval & 0x08) && max_size > 8) - { - if (TARGET_64BIT) - { - dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset); - emit_insn (gen_strset (destptr, dest, value)); - } - else - { - dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset); - emit_insn (gen_strset (destptr, dest, value)); - dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4); - emit_insn (gen_strset (destptr, dest, value)); - } - offset += 8; - } - if ((countval & 0x04) && max_size > 4) - { - dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset); - emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value))); - offset += 4; - } - if ((countval & 0x02) && max_size > 2) - { - dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset); - emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value))); - offset += 2; - } - if ((countval & 0x01) && max_size > 1) - { - dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset); - emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value))); - offset += 1; - } - return; - } - if (max_size > 32) - { - expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size); - return; - } - if (max_size > 16) - { - rtx label = ix86_expand_aligntest (count, 16, true); - if (TARGET_64BIT) - { - dest = change_address (destmem, DImode, destptr); - emit_insn (gen_strset (destptr, dest, value)); - emit_insn (gen_strset (destptr, dest, value)); - } - else - { - dest = change_address (destmem, SImode, destptr); - emit_insn (gen_strset (destptr, dest, value)); - emit_insn (gen_strset (destptr, dest, value)); - emit_insn (gen_strset (destptr, dest, value)); - emit_insn (gen_strset (destptr, dest, value)); - } - emit_label (label); - LABEL_NUSES (label) = 1; - } - if (max_size > 8) - { - rtx label = ix86_expand_aligntest (count, 8, true); - if (TARGET_64BIT) - { - dest = change_address (destmem, DImode, destptr); - emit_insn (gen_strset (destptr, dest, value)); - } - else - { - dest = change_address (destmem, SImode, destptr); - emit_insn (gen_strset (destptr, dest, value)); - emit_insn (gen_strset (destptr, dest, value)); - } - emit_label (label); - LABEL_NUSES (label) = 1; - } - if (max_size > 4) - { - rtx label = ix86_expand_aligntest (count, 4, true); - dest = change_address (destmem, SImode, destptr); - emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value))); - emit_label (label); - LABEL_NUSES (label) = 1; - } - if (max_size > 2) - { - rtx label = ix86_expand_aligntest (count, 2, true); - dest = change_address (destmem, HImode, destptr); - emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value))); - emit_label (label); - LABEL_NUSES (label) = 1; - } - if (max_size > 1) - { - rtx label = ix86_expand_aligntest (count, 1, true); - dest = change_address (destmem, QImode, destptr); - emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value))); - emit_label (label); - LABEL_NUSES (label) = 1; - } -} - -/* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to - DESIRED_ALIGNMENT. */ -static void -expand_movmem_prologue (rtx destmem, rtx srcmem, - rtx destptr, rtx srcptr, rtx count, - int align, int desired_alignment) -{ - if (align <= 1 && desired_alignment > 1) - { - rtx label = ix86_expand_aligntest (destptr, 1, false); - srcmem = change_address (srcmem, QImode, srcptr); - destmem = change_address (destmem, QImode, destptr); - emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem)); - ix86_adjust_counter (count, 1); - emit_label (label); - LABEL_NUSES (label) = 1; - } - if (align <= 2 && desired_alignment > 2) - { - rtx label = ix86_expand_aligntest (destptr, 2, false); - srcmem = change_address (srcmem, HImode, srcptr); - destmem = change_address (destmem, HImode, destptr); - emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem)); - ix86_adjust_counter (count, 2); - emit_label (label); - LABEL_NUSES (label) = 1; - } - if (align <= 4 && desired_alignment > 4) - { - rtx label = ix86_expand_aligntest (destptr, 4, false); - srcmem = change_address (srcmem, SImode, srcptr); - destmem = change_address (destmem, SImode, destptr); - emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem)); - ix86_adjust_counter (count, 4); - emit_label (label); - LABEL_NUSES (label) = 1; - } - gcc_assert (desired_alignment <= 8); -} - -/* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN. - ALIGN_BYTES is how many bytes need to be copied. */ -static rtx -expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg, - int desired_align, int align_bytes) -{ - rtx src = *srcp; - rtx src_size, dst_size; - int off = 0; - int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT); - if (src_align_bytes >= 0) - src_align_bytes = desired_align - src_align_bytes; - src_size = MEM_SIZE (src); - dst_size = MEM_SIZE (dst); - if (align_bytes & 1) - { - dst = adjust_automodify_address_nv (dst, QImode, destreg, 0); - src = adjust_automodify_address_nv (src, QImode, srcreg, 0); - off = 1; - emit_insn (gen_strmov (destreg, dst, srcreg, src)); - } - if (align_bytes & 2) - { - dst = adjust_automodify_address_nv (dst, HImode, destreg, off); - src = adjust_automodify_address_nv (src, HImode, srcreg, off); - if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT) - set_mem_align (dst, 2 * BITS_PER_UNIT); - if (src_align_bytes >= 0 - && (src_align_bytes & 1) == (align_bytes & 1) - && MEM_ALIGN (src) < 2 * BITS_PER_UNIT) - set_mem_align (src, 2 * BITS_PER_UNIT); - off = 2; - emit_insn (gen_strmov (destreg, dst, srcreg, src)); - } - if (align_bytes & 4) - { - dst = adjust_automodify_address_nv (dst, SImode, destreg, off); - src = adjust_automodify_address_nv (src, SImode, srcreg, off); - if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT) - set_mem_align (dst, 4 * BITS_PER_UNIT); - if (src_align_bytes >= 0) - { - unsigned int src_align = 0; - if ((src_align_bytes & 3) == (align_bytes & 3)) - src_align = 4; - else if ((src_align_bytes & 1) == (align_bytes & 1)) - src_align = 2; - if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT) - set_mem_align (src, src_align * BITS_PER_UNIT); - } - off = 4; - emit_insn (gen_strmov (destreg, dst, srcreg, src)); - } - dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off); - src = adjust_automodify_address_nv (src, BLKmode, srcreg, off); - if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT) - set_mem_align (dst, desired_align * BITS_PER_UNIT); - if (src_align_bytes >= 0) - { - unsigned int src_align = 0; - if ((src_align_bytes & 7) == (align_bytes & 7)) - src_align = 8; - else if ((src_align_bytes & 3) == (align_bytes & 3)) - src_align = 4; - else if ((src_align_bytes & 1) == (align_bytes & 1)) - src_align = 2; - if (src_align > (unsigned int) desired_align) - src_align = desired_align; - if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT) - set_mem_align (src, src_align * BITS_PER_UNIT); - } - if (dst_size) - set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes)); - if (src_size) - set_mem_size (dst, GEN_INT (INTVAL (src_size) - align_bytes)); - *srcp = src; - return dst; -} - -/* Set enough from DEST to align DEST known to by aligned by ALIGN to - DESIRED_ALIGNMENT. */ -static void -expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count, - int align, int desired_alignment) -{ - if (align <= 1 && desired_alignment > 1) - { - rtx label = ix86_expand_aligntest (destptr, 1, false); - destmem = change_address (destmem, QImode, destptr); - emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value))); - ix86_adjust_counter (count, 1); - emit_label (label); - LABEL_NUSES (label) = 1; - } - if (align <= 2 && desired_alignment > 2) - { - rtx label = ix86_expand_aligntest (destptr, 2, false); - destmem = change_address (destmem, HImode, destptr); - emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value))); - ix86_adjust_counter (count, 2); - emit_label (label); - LABEL_NUSES (label) = 1; - } - if (align <= 4 && desired_alignment > 4) - { - rtx label = ix86_expand_aligntest (destptr, 4, false); - destmem = change_address (destmem, SImode, destptr); - emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value))); - ix86_adjust_counter (count, 4); - emit_label (label); - LABEL_NUSES (label) = 1; - } - gcc_assert (desired_alignment <= 8); -} - -/* Set enough from DST to align DST known to by aligned by ALIGN to - DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */ -static rtx -expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value, - int desired_align, int align_bytes) -{ - int off = 0; - rtx dst_size = MEM_SIZE (dst); - if (align_bytes & 1) - { - dst = adjust_automodify_address_nv (dst, QImode, destreg, 0); - off = 1; - emit_insn (gen_strset (destreg, dst, - gen_lowpart (QImode, value))); - } - if (align_bytes & 2) - { - dst = adjust_automodify_address_nv (dst, HImode, destreg, off); - if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT) - set_mem_align (dst, 2 * BITS_PER_UNIT); - off = 2; - emit_insn (gen_strset (destreg, dst, - gen_lowpart (HImode, value))); - } - if (align_bytes & 4) - { - dst = adjust_automodify_address_nv (dst, SImode, destreg, off); - if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT) - set_mem_align (dst, 4 * BITS_PER_UNIT); - off = 4; - emit_insn (gen_strset (destreg, dst, - gen_lowpart (SImode, value))); - } - dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off); - if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT) - set_mem_align (dst, desired_align * BITS_PER_UNIT); - if (dst_size) - set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes)); - return dst; -} - -/* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */ -static enum stringop_alg -decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset, - int *dynamic_check) -{ - const struct stringop_algs * algs; - bool optimize_for_speed; - /* Algorithms using the rep prefix want at least edi and ecx; - additionally, memset wants eax and memcpy wants esi. Don't - consider such algorithms if the user has appropriated those - registers for their own purposes. */ - bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG] - || (memset - ? fixed_regs[AX_REG] : fixed_regs[SI_REG])); - -#define ALG_USABLE_P(alg) (rep_prefix_usable \ - || (alg != rep_prefix_1_byte \ - && alg != rep_prefix_4_byte \ - && alg != rep_prefix_8_byte)) - const struct processor_costs *cost; - - /* Even if the string operation call is cold, we still might spend a lot - of time processing large blocks. */ - if (optimize_function_for_size_p (cfun) - || (optimize_insn_for_size_p () - && expected_size != -1 && expected_size < 256)) - optimize_for_speed = false; - else - optimize_for_speed = true; - - cost = optimize_for_speed ? ix86_cost : &ix86_size_cost; - - *dynamic_check = -1; - if (memset) - algs = &cost->memset[TARGET_64BIT != 0]; - else - algs = &cost->memcpy[TARGET_64BIT != 0]; - if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg)) - return stringop_alg; - /* rep; movq or rep; movl is the smallest variant. */ - else if (!optimize_for_speed) - { - if (!count || (count & 3)) - return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte; - else - return rep_prefix_usable ? rep_prefix_4_byte : loop; - } - /* Very tiny blocks are best handled via the loop, REP is expensive to setup. - */ - else if (expected_size != -1 && expected_size < 4) - return loop_1_byte; - else if (expected_size != -1) - { - unsigned int i; - enum stringop_alg alg = libcall; - for (i = 0; i < NAX_STRINGOP_ALGS; i++) - { - /* We get here if the algorithms that were not libcall-based - were rep-prefix based and we are unable to use rep prefixes - based on global register usage. Break out of the loop and - use the heuristic below. */ - if (algs->size[i].max == 0) - break; - if (algs->size[i].max >= expected_size || algs->size[i].max == -1) - { - enum stringop_alg candidate = algs->size[i].alg; - - if (candidate != libcall && ALG_USABLE_P (candidate)) - alg = candidate; - /* Honor TARGET_INLINE_ALL_STRINGOPS by picking - last non-libcall inline algorithm. */ - if (TARGET_INLINE_ALL_STRINGOPS) - { - /* When the current size is best to be copied by a libcall, - but we are still forced to inline, run the heuristic below - that will pick code for medium sized blocks. */ - if (alg != libcall) - return alg; - break; - } - else if (ALG_USABLE_P (candidate)) - return candidate; - } - } - gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable); - } - /* When asked to inline the call anyway, try to pick meaningful choice. - We look for maximal size of block that is faster to copy by hand and - take blocks of at most of that size guessing that average size will - be roughly half of the block. - - If this turns out to be bad, we might simply specify the preferred - choice in ix86_costs. */ - if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY) - && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size))) - { - int max = -1; - enum stringop_alg alg; - int i; - bool any_alg_usable_p = true; - - for (i = 0; i < NAX_STRINGOP_ALGS; i++) - { - enum stringop_alg candidate = algs->size[i].alg; - any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate); - - if (candidate != libcall && candidate - && ALG_USABLE_P (candidate)) - max = algs->size[i].max; - } - /* If there aren't any usable algorithms, then recursing on - smaller sizes isn't going to find anything. Just return the - simple byte-at-a-time copy loop. */ - if (!any_alg_usable_p) - { - /* Pick something reasonable. */ - if (TARGET_INLINE_STRINGOPS_DYNAMICALLY) - *dynamic_check = 128; - return loop_1_byte; - } - if (max == -1) - max = 4096; - alg = decide_alg (count, max / 2, memset, dynamic_check); - gcc_assert (*dynamic_check == -1); - gcc_assert (alg != libcall); - if (TARGET_INLINE_STRINGOPS_DYNAMICALLY) - *dynamic_check = max; - return alg; - } - return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall; -#undef ALG_USABLE_P -} - -/* Decide on alignment. We know that the operand is already aligned to ALIGN - (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */ -static int -decide_alignment (int align, - enum stringop_alg alg, - int expected_size) -{ - int desired_align = 0; - switch (alg) - { - case no_stringop: - gcc_unreachable (); - case loop: - case unrolled_loop: - desired_align = GET_MODE_SIZE (Pmode); - break; - case rep_prefix_8_byte: - desired_align = 8; - break; - case rep_prefix_4_byte: - /* PentiumPro has special logic triggering for 8 byte aligned blocks. - copying whole cacheline at once. */ - if (TARGET_PENTIUMPRO) - desired_align = 8; - else - desired_align = 4; - break; - case rep_prefix_1_byte: - /* PentiumPro has special logic triggering for 8 byte aligned blocks. - copying whole cacheline at once. */ - if (TARGET_PENTIUMPRO) - desired_align = 8; - else - desired_align = 1; - break; - case loop_1_byte: - desired_align = 1; - break; - case libcall: - return 0; - } - - if (optimize_size) - desired_align = 1; - if (desired_align < align) - desired_align = align; - if (expected_size != -1 && expected_size < 4) - desired_align = align; - return desired_align; -} - -/* Return the smallest power of 2 greater than VAL. */ -static int -smallest_pow2_greater_than (int val) -{ - int ret = 1; - while (ret <= val) - ret <<= 1; - return ret; -} - -/* Expand string move (memcpy) operation. Use i386 string operations when - profitable. expand_setmem contains similar code. The code depends upon - architecture, block size and alignment, but always has the same - overall structure: - - 1) Prologue guard: Conditional that jumps up to epilogues for small - blocks that can be handled by epilogue alone. This is faster but - also needed for correctness, since prologue assume the block is larger - than the desired alignment. - - Optional dynamic check for size and libcall for large - blocks is emitted here too, with -minline-stringops-dynamically. - - 2) Prologue: copy first few bytes in order to get destination aligned - to DESIRED_ALIGN. It is emitted only when ALIGN is less than - DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied. - We emit either a jump tree on power of two sized blocks, or a byte loop. - - 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks - with specified algorithm. - - 4) Epilogue: code copying tail of the block that is too small to be - handled by main body (or up to size guarded by prologue guard). */ - -int -ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp, - rtx expected_align_exp, rtx expected_size_exp) -{ - rtx destreg; - rtx srcreg; - rtx label = NULL; - rtx tmp; - rtx jump_around_label = NULL; - HOST_WIDE_INT align = 1; - unsigned HOST_WIDE_INT count = 0; - HOST_WIDE_INT expected_size = -1; - int size_needed = 0, epilogue_size_needed; - int desired_align = 0, align_bytes = 0; - enum stringop_alg alg; - int dynamic_check; - bool need_zero_guard = false; - - if (CONST_INT_P (align_exp)) - align = INTVAL (align_exp); - /* i386 can do misaligned access on reasonably increased cost. */ - if (CONST_INT_P (expected_align_exp) - && INTVAL (expected_align_exp) > align) - align = INTVAL (expected_align_exp); - /* ALIGN is the minimum of destination and source alignment, but we care here - just about destination alignment. */ - else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT) - align = MEM_ALIGN (dst) / BITS_PER_UNIT; - - if (CONST_INT_P (count_exp)) - count = expected_size = INTVAL (count_exp); - if (CONST_INT_P (expected_size_exp) && count == 0) - expected_size = INTVAL (expected_size_exp); - - /* Make sure we don't need to care about overflow later on. */ - if (count > ((unsigned HOST_WIDE_INT) 1 << 30)) - return 0; - - /* Step 0: Decide on preferred algorithm, desired alignment and - size of chunks to be copied by main loop. */ - - alg = decide_alg (count, expected_size, false, &dynamic_check); - desired_align = decide_alignment (align, alg, expected_size); - - if (!TARGET_ALIGN_STRINGOPS) - align = desired_align; - - if (alg == libcall) - return 0; - gcc_assert (alg != no_stringop); - if (!count) - count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp); - destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0)); - srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0)); - switch (alg) - { - case libcall: - case no_stringop: - gcc_unreachable (); - case loop: - need_zero_guard = true; - size_needed = GET_MODE_SIZE (Pmode); - break; - case unrolled_loop: - need_zero_guard = true; - size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2); - break; - case rep_prefix_8_byte: - size_needed = 8; - break; - case rep_prefix_4_byte: - size_needed = 4; - break; - case rep_prefix_1_byte: - size_needed = 1; - break; - case loop_1_byte: - need_zero_guard = true; - size_needed = 1; - break; - } - - epilogue_size_needed = size_needed; - - /* Step 1: Prologue guard. */ - - /* Alignment code needs count to be in register. */ - if (CONST_INT_P (count_exp) && desired_align > align) - { - if (INTVAL (count_exp) > desired_align - && INTVAL (count_exp) > size_needed) - { - align_bytes - = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT); - if (align_bytes <= 0) - align_bytes = 0; - else - align_bytes = desired_align - align_bytes; - } - if (align_bytes == 0) - count_exp = force_reg (counter_mode (count_exp), count_exp); - } - gcc_assert (desired_align >= 1 && align >= 1); - - /* Ensure that alignment prologue won't copy past end of block. */ - if (size_needed > 1 || (desired_align > 1 && desired_align > align)) - { - epilogue_size_needed = MAX (size_needed - 1, desired_align - align); - /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes. - Make sure it is power of 2. */ - epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed); - - if (count) - { - if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed) - { - /* If main algorithm works on QImode, no epilogue is needed. - For small sizes just don't align anything. */ - if (size_needed == 1) - desired_align = align; - else - goto epilogue; - } - } - else - { - label = gen_label_rtx (); - emit_cmp_and_jump_insns (count_exp, - GEN_INT (epilogue_size_needed), - LTU, 0, counter_mode (count_exp), 1, label); - if (expected_size == -1 || expected_size < epilogue_size_needed) - predict_jump (REG_BR_PROB_BASE * 60 / 100); - else - predict_jump (REG_BR_PROB_BASE * 20 / 100); - } - } - - /* Emit code to decide on runtime whether library call or inline should be - used. */ - if (dynamic_check != -1) - { - if (CONST_INT_P (count_exp)) - { - if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check) - { - emit_block_move_via_libcall (dst, src, count_exp, false); - count_exp = const0_rtx; - goto epilogue; - } - } - else - { - rtx hot_label = gen_label_rtx (); - jump_around_label = gen_label_rtx (); - emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1), - LEU, 0, GET_MODE (count_exp), 1, hot_label); - predict_jump (REG_BR_PROB_BASE * 90 / 100); - emit_block_move_via_libcall (dst, src, count_exp, false); - emit_jump (jump_around_label); - emit_label (hot_label); - } - } - - /* Step 2: Alignment prologue. */ - - if (desired_align > align) - { - if (align_bytes == 0) - { - /* Except for the first move in epilogue, we no longer know - constant offset in aliasing info. It don't seems to worth - the pain to maintain it for the first move, so throw away - the info early. */ - src = change_address (src, BLKmode, srcreg); - dst = change_address (dst, BLKmode, destreg); - expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align, - desired_align); - } - else - { - /* If we know how many bytes need to be stored before dst is - sufficiently aligned, maintain aliasing info accurately. */ - dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg, - desired_align, align_bytes); - count_exp = plus_constant (count_exp, -align_bytes); - count -= align_bytes; - } - if (need_zero_guard - && (count < (unsigned HOST_WIDE_INT) size_needed - || (align_bytes == 0 - && count < ((unsigned HOST_WIDE_INT) size_needed - + desired_align - align)))) - { - /* It is possible that we copied enough so the main loop will not - execute. */ - gcc_assert (size_needed > 1); - if (label == NULL_RTX) - label = gen_label_rtx (); - emit_cmp_and_jump_insns (count_exp, - GEN_INT (size_needed), - LTU, 0, counter_mode (count_exp), 1, label); - if (expected_size == -1 - || expected_size < (desired_align - align) / 2 + size_needed) - predict_jump (REG_BR_PROB_BASE * 20 / 100); - else - predict_jump (REG_BR_PROB_BASE * 60 / 100); - } - } - if (label && size_needed == 1) - { - emit_label (label); - LABEL_NUSES (label) = 1; - label = NULL; - epilogue_size_needed = 1; - } - else if (label == NULL_RTX) - epilogue_size_needed = size_needed; - - /* Step 3: Main loop. */ - - switch (alg) - { - case libcall: - case no_stringop: - gcc_unreachable (); - case loop_1_byte: - expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL, - count_exp, QImode, 1, expected_size); - break; - case loop: - expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL, - count_exp, Pmode, 1, expected_size); - break; - case unrolled_loop: - /* Unroll only by factor of 2 in 32bit mode, since we don't have enough - registers for 4 temporaries anyway. */ - expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL, - count_exp, Pmode, TARGET_64BIT ? 4 : 2, - expected_size); - break; - case rep_prefix_8_byte: - expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp, - DImode); - break; - case rep_prefix_4_byte: - expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp, - SImode); - break; - case rep_prefix_1_byte: - expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp, - QImode); - break; - } - /* Adjust properly the offset of src and dest memory for aliasing. */ - if (CONST_INT_P (count_exp)) - { - src = adjust_automodify_address_nv (src, BLKmode, srcreg, - (count / size_needed) * size_needed); - dst = adjust_automodify_address_nv (dst, BLKmode, destreg, - (count / size_needed) * size_needed); - } - else - { - src = change_address (src, BLKmode, srcreg); - dst = change_address (dst, BLKmode, destreg); - } - - /* Step 4: Epilogue to copy the remaining bytes. */ - epilogue: - if (label) - { - /* When the main loop is done, COUNT_EXP might hold original count, - while we want to copy only COUNT_EXP & SIZE_NEEDED bytes. - Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED - bytes. Compensate if needed. */ - - if (size_needed < epilogue_size_needed) - { - tmp = - expand_simple_binop (counter_mode (count_exp), AND, count_exp, - GEN_INT (size_needed - 1), count_exp, 1, - OPTAB_DIRECT); - if (tmp != count_exp) - emit_move_insn (count_exp, tmp); - } - emit_label (label); - LABEL_NUSES (label) = 1; - } - - if (count_exp != const0_rtx && epilogue_size_needed > 1) - expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp, - epilogue_size_needed); - if (jump_around_label) - emit_label (jump_around_label); - return 1; -} - -/* Helper function for memcpy. For QImode value 0xXY produce - 0xXYXYXYXY of wide specified by MODE. This is essentially - a * 0x10101010, but we can do slightly better than - synth_mult by unwinding the sequence by hand on CPUs with - slow multiply. */ -static rtx -promote_duplicated_reg (enum machine_mode mode, rtx val) -{ - enum machine_mode valmode = GET_MODE (val); - rtx tmp; - int nops = mode == DImode ? 3 : 2; - - gcc_assert (mode == SImode || mode == DImode); - if (val == const0_rtx) - return copy_to_mode_reg (mode, const0_rtx); - if (CONST_INT_P (val)) - { - HOST_WIDE_INT v = INTVAL (val) & 255; - - v |= v << 8; - v |= v << 16; - if (mode == DImode) - v |= (v << 16) << 16; - return copy_to_mode_reg (mode, gen_int_mode (v, mode)); - } - - if (valmode == VOIDmode) - valmode = QImode; - if (valmode != QImode) - val = gen_lowpart (QImode, val); - if (mode == QImode) - return val; - if (!TARGET_PARTIAL_REG_STALL) - nops--; - if (ix86_cost->mult_init[mode == DImode ? 3 : 2] - + ix86_cost->mult_bit * (mode == DImode ? 8 : 4) - <= (ix86_cost->shift_const + ix86_cost->add) * nops - + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0))) - { - rtx reg = convert_modes (mode, QImode, val, true); - tmp = promote_duplicated_reg (mode, const1_rtx); - return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1, - OPTAB_DIRECT); - } - else - { - rtx reg = convert_modes (mode, QImode, val, true); - - if (!TARGET_PARTIAL_REG_STALL) - if (mode == SImode) - emit_insn (gen_movsi_insv_1 (reg, reg)); - else - emit_insn (gen_movdi_insv_1_rex64 (reg, reg)); - else - { - tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8), - NULL, 1, OPTAB_DIRECT); - reg = - expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT); - } - tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16), - NULL, 1, OPTAB_DIRECT); - reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT); - if (mode == SImode) - return reg; - tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32), - NULL, 1, OPTAB_DIRECT); - reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT); - return reg; - } -} - -/* Duplicate value VAL using promote_duplicated_reg into maximal size that will - be needed by main loop copying SIZE_NEEDED chunks and prologue getting - alignment from ALIGN to DESIRED_ALIGN. */ -static rtx -promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align) -{ - rtx promoted_val; - - if (TARGET_64BIT - && (size_needed > 4 || (desired_align > align && desired_align > 4))) - promoted_val = promote_duplicated_reg (DImode, val); - else if (size_needed > 2 || (desired_align > align && desired_align > 2)) - promoted_val = promote_duplicated_reg (SImode, val); - else if (size_needed > 1 || (desired_align > align && desired_align > 1)) - promoted_val = promote_duplicated_reg (HImode, val); - else - promoted_val = val; - - return promoted_val; -} - -/* Expand string clear operation (bzero). Use i386 string operations when - profitable. See expand_movmem comment for explanation of individual - steps performed. */ -int -ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp, - rtx expected_align_exp, rtx expected_size_exp) -{ - rtx destreg; - rtx label = NULL; - rtx tmp; - rtx jump_around_label = NULL; - HOST_WIDE_INT align = 1; - unsigned HOST_WIDE_INT count = 0; - HOST_WIDE_INT expected_size = -1; - int size_needed = 0, epilogue_size_needed; - int desired_align = 0, align_bytes = 0; - enum stringop_alg alg; - rtx promoted_val = NULL; - bool force_loopy_epilogue = false; - int dynamic_check; - bool need_zero_guard = false; - - if (CONST_INT_P (align_exp)) - align = INTVAL (align_exp); - /* i386 can do misaligned access on reasonably increased cost. */ - if (CONST_INT_P (expected_align_exp) - && INTVAL (expected_align_exp) > align) - align = INTVAL (expected_align_exp); - if (CONST_INT_P (count_exp)) - count = expected_size = INTVAL (count_exp); - if (CONST_INT_P (expected_size_exp) && count == 0) - expected_size = INTVAL (expected_size_exp); - - /* Make sure we don't need to care about overflow later on. */ - if (count > ((unsigned HOST_WIDE_INT) 1 << 30)) - return 0; - - /* Step 0: Decide on preferred algorithm, desired alignment and - size of chunks to be copied by main loop. */ - - alg = decide_alg (count, expected_size, true, &dynamic_check); - desired_align = decide_alignment (align, alg, expected_size); - - if (!TARGET_ALIGN_STRINGOPS) - align = desired_align; - - if (alg == libcall) - return 0; - gcc_assert (alg != no_stringop); - if (!count) - count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp); - destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0)); - switch (alg) - { - case libcall: - case no_stringop: - gcc_unreachable (); - case loop: - need_zero_guard = true; - size_needed = GET_MODE_SIZE (Pmode); - break; - case unrolled_loop: - need_zero_guard = true; - size_needed = GET_MODE_SIZE (Pmode) * 4; - break; - case rep_prefix_8_byte: - size_needed = 8; - break; - case rep_prefix_4_byte: - size_needed = 4; - break; - case rep_prefix_1_byte: - size_needed = 1; - break; - case loop_1_byte: - need_zero_guard = true; - size_needed = 1; - break; - } - epilogue_size_needed = size_needed; - - /* Step 1: Prologue guard. */ - - /* Alignment code needs count to be in register. */ - if (CONST_INT_P (count_exp) && desired_align > align) - { - if (INTVAL (count_exp) > desired_align - && INTVAL (count_exp) > size_needed) - { - align_bytes - = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT); - if (align_bytes <= 0) - align_bytes = 0; - else - align_bytes = desired_align - align_bytes; - } - if (align_bytes == 0) - { - enum machine_mode mode = SImode; - if (TARGET_64BIT && (count & ~0xffffffff)) - mode = DImode; - count_exp = force_reg (mode, count_exp); - } - } - /* Do the cheap promotion to allow better CSE across the - main loop and epilogue (ie one load of the big constant in the - front of all code. */ - if (CONST_INT_P (val_exp)) - promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed, - desired_align, align); - /* Ensure that alignment prologue won't copy past end of block. */ - if (size_needed > 1 || (desired_align > 1 && desired_align > align)) - { - epilogue_size_needed = MAX (size_needed - 1, desired_align - align); - /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes. - Make sure it is power of 2. */ - epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed); - - /* To improve performance of small blocks, we jump around the VAL - promoting mode. This mean that if the promoted VAL is not constant, - we might not use it in the epilogue and have to use byte - loop variant. */ - if (epilogue_size_needed > 2 && !promoted_val) - force_loopy_epilogue = true; - if (count) - { - if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed) - { - /* If main algorithm works on QImode, no epilogue is needed. - For small sizes just don't align anything. */ - if (size_needed == 1) - desired_align = align; - else - goto epilogue; - } - } - else - { - label = gen_label_rtx (); - emit_cmp_and_jump_insns (count_exp, - GEN_INT (epilogue_size_needed), - LTU, 0, counter_mode (count_exp), 1, label); - if (expected_size == -1 || expected_size <= epilogue_size_needed) - predict_jump (REG_BR_PROB_BASE * 60 / 100); - else - predict_jump (REG_BR_PROB_BASE * 20 / 100); - } - } - if (dynamic_check != -1) - { - rtx hot_label = gen_label_rtx (); - jump_around_label = gen_label_rtx (); - emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1), - LEU, 0, counter_mode (count_exp), 1, hot_label); - predict_jump (REG_BR_PROB_BASE * 90 / 100); - set_storage_via_libcall (dst, count_exp, val_exp, false); - emit_jump (jump_around_label); - emit_label (hot_label); - } - - /* Step 2: Alignment prologue. */ - - /* Do the expensive promotion once we branched off the small blocks. */ - if (!promoted_val) - promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed, - desired_align, align); - gcc_assert (desired_align >= 1 && align >= 1); - - if (desired_align > align) - { - if (align_bytes == 0) - { - /* Except for the first move in epilogue, we no longer know - constant offset in aliasing info. It don't seems to worth - the pain to maintain it for the first move, so throw away - the info early. */ - dst = change_address (dst, BLKmode, destreg); - expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align, - desired_align); - } - else - { - /* If we know how many bytes need to be stored before dst is - sufficiently aligned, maintain aliasing info accurately. */ - dst = expand_constant_setmem_prologue (dst, destreg, promoted_val, - desired_align, align_bytes); - count_exp = plus_constant (count_exp, -align_bytes); - count -= align_bytes; - } - if (need_zero_guard - && (count < (unsigned HOST_WIDE_INT) size_needed - || (align_bytes == 0 - && count < ((unsigned HOST_WIDE_INT) size_needed - + desired_align - align)))) - { - /* It is possible that we copied enough so the main loop will not - execute. */ - gcc_assert (size_needed > 1); - if (label == NULL_RTX) - label = gen_label_rtx (); - emit_cmp_and_jump_insns (count_exp, - GEN_INT (size_needed), - LTU, 0, counter_mode (count_exp), 1, label); - if (expected_size == -1 - || expected_size < (desired_align - align) / 2 + size_needed) - predict_jump (REG_BR_PROB_BASE * 20 / 100); - else - predict_jump (REG_BR_PROB_BASE * 60 / 100); - } - } - if (label && size_needed == 1) - { - emit_label (label); - LABEL_NUSES (label) = 1; - label = NULL; - promoted_val = val_exp; - epilogue_size_needed = 1; - } - else if (label == NULL_RTX) - epilogue_size_needed = size_needed; - - /* Step 3: Main loop. */ - - switch (alg) - { - case libcall: - case no_stringop: - gcc_unreachable (); - case loop_1_byte: - expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val, - count_exp, QImode, 1, expected_size); - break; - case loop: - expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val, - count_exp, Pmode, 1, expected_size); - break; - case unrolled_loop: - expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val, - count_exp, Pmode, 4, expected_size); - break; - case rep_prefix_8_byte: - expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp, - DImode, val_exp); - break; - case rep_prefix_4_byte: - expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp, - SImode, val_exp); - break; - case rep_prefix_1_byte: - expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp, - QImode, val_exp); - break; - } - /* Adjust properly the offset of src and dest memory for aliasing. */ - if (CONST_INT_P (count_exp)) - dst = adjust_automodify_address_nv (dst, BLKmode, destreg, - (count / size_needed) * size_needed); - else - dst = change_address (dst, BLKmode, destreg); - - /* Step 4: Epilogue to copy the remaining bytes. */ - - if (label) - { - /* When the main loop is done, COUNT_EXP might hold original count, - while we want to copy only COUNT_EXP & SIZE_NEEDED bytes. - Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED - bytes. Compensate if needed. */ - - if (size_needed < epilogue_size_needed) - { - tmp = - expand_simple_binop (counter_mode (count_exp), AND, count_exp, - GEN_INT (size_needed - 1), count_exp, 1, - OPTAB_DIRECT); - if (tmp != count_exp) - emit_move_insn (count_exp, tmp); - } - emit_label (label); - LABEL_NUSES (label) = 1; - } - epilogue: - if (count_exp != const0_rtx && epilogue_size_needed > 1) - { - if (force_loopy_epilogue) - expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp, - epilogue_size_needed); - else - expand_setmem_epilogue (dst, destreg, promoted_val, count_exp, - epilogue_size_needed); - } - if (jump_around_label) - emit_label (jump_around_label); - return 1; -} - -/* Expand the appropriate insns for doing strlen if not just doing - repnz; scasb - - out = result, initialized with the start address - align_rtx = alignment of the address. - scratch = scratch register, initialized with the startaddress when - not aligned, otherwise undefined - - This is just the body. It needs the initializations mentioned above and - some address computing at the end. These things are done in i386.md. */ - -static void -ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx) -{ - int align; - rtx tmp; - rtx align_2_label = NULL_RTX; - rtx align_3_label = NULL_RTX; - rtx align_4_label = gen_label_rtx (); - rtx end_0_label = gen_label_rtx (); - rtx mem; - rtx tmpreg = gen_reg_rtx (SImode); - rtx scratch = gen_reg_rtx (SImode); - rtx cmp; - - align = 0; - if (CONST_INT_P (align_rtx)) - align = INTVAL (align_rtx); - - /* Loop to check 1..3 bytes for null to get an aligned pointer. */ - - /* Is there a known alignment and is it less than 4? */ - if (align < 4) - { - rtx scratch1 = gen_reg_rtx (Pmode); - emit_move_insn (scratch1, out); - /* Is there a known alignment and is it not 2? */ - if (align != 2) - { - align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */ - align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */ - - /* Leave just the 3 lower bits. */ - align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3), - NULL_RTX, 0, OPTAB_WIDEN); - - emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL, - Pmode, 1, align_4_label); - emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL, - Pmode, 1, align_2_label); - emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL, - Pmode, 1, align_3_label); - } - else - { - /* Since the alignment is 2, we have to check 2 or 0 bytes; - check if is aligned to 4 - byte. */ - - align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx, - NULL_RTX, 0, OPTAB_WIDEN); - - emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL, - Pmode, 1, align_4_label); - } - - mem = change_address (src, QImode, out); - - /* Now compare the bytes. */ - - /* Compare the first n unaligned byte on a byte per byte basis. */ - emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, - QImode, 1, end_0_label); - - /* Increment the address. */ - emit_insn ((*ix86_gen_add3) (out, out, const1_rtx)); - - /* Not needed with an alignment of 2 */ - if (align != 2) - { - emit_label (align_2_label); - - emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1, - end_0_label); - - emit_insn ((*ix86_gen_add3) (out, out, const1_rtx)); - - emit_label (align_3_label); - } - - emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1, - end_0_label); - - emit_insn ((*ix86_gen_add3) (out, out, const1_rtx)); - } - - /* Generate loop to check 4 bytes at a time. It is not a good idea to - align this loop. It gives only huge programs, but does not help to - speed up. */ - emit_label (align_4_label); - - mem = change_address (src, SImode, out); - emit_move_insn (scratch, mem); - emit_insn ((*ix86_gen_add3) (out, out, GEN_INT (4))); - - /* This formula yields a nonzero result iff one of the bytes is zero. - This saves three branches inside loop and many cycles. */ - - emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101))); - emit_insn (gen_one_cmplsi2 (scratch, scratch)); - emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch)); - emit_insn (gen_andsi3 (tmpreg, tmpreg, - gen_int_mode (0x80808080, SImode))); - emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1, - align_4_label); - - if (TARGET_CMOVE) - { - rtx reg = gen_reg_rtx (SImode); - rtx reg2 = gen_reg_rtx (Pmode); - emit_move_insn (reg, tmpreg); - emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16))); - - /* If zero is not in the first two bytes, move two bytes forward. */ - emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080))); - tmp = gen_rtx_REG (CCNOmode, FLAGS_REG); - tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx); - emit_insn (gen_rtx_SET (VOIDmode, tmpreg, - gen_rtx_IF_THEN_ELSE (SImode, tmp, - reg, - tmpreg))); - /* Emit lea manually to avoid clobbering of flags. */ - emit_insn (gen_rtx_SET (SImode, reg2, - gen_rtx_PLUS (Pmode, out, const2_rtx))); - - tmp = gen_rtx_REG (CCNOmode, FLAGS_REG); - tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx); - emit_insn (gen_rtx_SET (VOIDmode, out, - gen_rtx_IF_THEN_ELSE (Pmode, tmp, - reg2, - out))); - - } - else - { - rtx end_2_label = gen_label_rtx (); - /* Is zero in the first two bytes? */ - - emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080))); - tmp = gen_rtx_REG (CCNOmode, FLAGS_REG); - tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx); - tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, - gen_rtx_LABEL_REF (VOIDmode, end_2_label), - pc_rtx); - tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp)); - JUMP_LABEL (tmp) = end_2_label; - - /* Not in the first two. Move two bytes forward. */ - emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16))); - emit_insn ((*ix86_gen_add3) (out, out, const2_rtx)); - - emit_label (end_2_label); - - } - - /* Avoid branch in fixing the byte. */ - tmpreg = gen_lowpart (QImode, tmpreg); - emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg)); - cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx); - emit_insn ((*ix86_gen_sub3_carry) (out, out, GEN_INT (3), cmp)); - - emit_label (end_0_label); -} - -/* Expand strlen. */ - -int -ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align) -{ - rtx addr, scratch1, scratch2, scratch3, scratch4; - - /* The generic case of strlen expander is long. Avoid it's - expanding unless TARGET_INLINE_ALL_STRINGOPS. */ - - if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1 - && !TARGET_INLINE_ALL_STRINGOPS - && !optimize_insn_for_size_p () - && (!CONST_INT_P (align) || INTVAL (align) < 4)) - return 0; - - addr = force_reg (Pmode, XEXP (src, 0)); - scratch1 = gen_reg_rtx (Pmode); - - if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1 - && !optimize_insn_for_size_p ()) - { - /* Well it seems that some optimizer does not combine a call like - foo(strlen(bar), strlen(bar)); - when the move and the subtraction is done here. It does calculate - the length just once when these instructions are done inside of - output_strlen_unroll(). But I think since &bar[strlen(bar)] is - often used and I use one fewer register for the lifetime of - output_strlen_unroll() this is better. */ - - emit_move_insn (out, addr); - - ix86_expand_strlensi_unroll_1 (out, src, align); - - /* strlensi_unroll_1 returns the address of the zero at the end of - the string, like memchr(), so compute the length by subtracting - the start address. */ - emit_insn ((*ix86_gen_sub3) (out, out, addr)); - } - else - { - rtx unspec; - - /* Can't use this if the user has appropriated eax, ecx, or edi. */ - if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG]) - return false; - - scratch2 = gen_reg_rtx (Pmode); - scratch3 = gen_reg_rtx (Pmode); - scratch4 = force_reg (Pmode, constm1_rtx); - - emit_move_insn (scratch3, addr); - eoschar = force_reg (QImode, eoschar); - - src = replace_equiv_address_nv (src, scratch3); - - /* If .md starts supporting :P, this can be done in .md. */ - unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align, - scratch4), UNSPEC_SCAS); - emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec)); - emit_insn ((*ix86_gen_one_cmpl2) (scratch2, scratch1)); - emit_insn ((*ix86_gen_add3) (out, scratch2, constm1_rtx)); - } - return 1; -} - -/* For given symbol (function) construct code to compute address of it's PLT - entry in large x86-64 PIC model. */ -rtx -construct_plt_address (rtx symbol) -{ - rtx tmp = gen_reg_rtx (Pmode); - rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF); - - gcc_assert (GET_CODE (symbol) == SYMBOL_REF); - gcc_assert (ix86_cmodel == CM_LARGE_PIC); - - emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec)); - emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx)); - return tmp; -} - -void -ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1, - rtx callarg2, - rtx pop, int sibcall) -{ - rtx use = NULL, call; - - if (pop == const0_rtx) - pop = NULL; - gcc_assert (!TARGET_64BIT || !pop); - - if (TARGET_MACHO && !TARGET_64BIT) - { -#if TARGET_MACHO - if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF) - fnaddr = machopic_indirect_call_target (fnaddr); -#endif - } - else - { - /* Static functions and indirect calls don't need the pic register. */ - if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC) - && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF - && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0))) - use_reg (&use, pic_offset_table_rtx); - } - - if (TARGET_64BIT && INTVAL (callarg2) >= 0) - { - rtx al = gen_rtx_REG (QImode, AX_REG); - emit_move_insn (al, callarg2); - use_reg (&use, al); - } - - if (ix86_cmodel == CM_LARGE_PIC - && GET_CODE (fnaddr) == MEM - && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF - && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode)) - fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0))); - else if (sibcall - ? !sibcall_insn_operand (XEXP (fnaddr, 0), Pmode) - : !call_insn_operand (XEXP (fnaddr, 0), Pmode)) - { - fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0)); - fnaddr = gen_rtx_MEM (QImode, fnaddr); - } - - call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1); - if (retval) - call = gen_rtx_SET (VOIDmode, retval, call); - if (pop) - { - pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop); - pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop); - call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop)); - } - if (TARGET_64BIT - && ix86_cfun_abi () == MS_ABI - && (!callarg2 || INTVAL (callarg2) != -2)) - { - /* We need to represent that SI and DI registers are clobbered - by SYSV calls. */ - static int clobbered_registers[] = { - XMM6_REG, XMM7_REG, XMM8_REG, - XMM9_REG, XMM10_REG, XMM11_REG, - XMM12_REG, XMM13_REG, XMM14_REG, - XMM15_REG, SI_REG, DI_REG - }; - unsigned int i; - rtx vec[ARRAY_SIZE (clobbered_registers) + 2]; - rtx unspec = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), - UNSPEC_MS_TO_SYSV_CALL); - - vec[0] = call; - vec[1] = unspec; - for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++) - vec[i + 2] = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i]) - ? TImode : DImode, - gen_rtx_REG - (SSE_REGNO_P (clobbered_registers[i]) - ? TImode : DImode, - clobbered_registers[i])); - - call = gen_rtx_PARALLEL (VOIDmode, - gen_rtvec_v (ARRAY_SIZE (clobbered_registers) - + 2, vec)); - } - - call = emit_call_insn (call); - if (use) - CALL_INSN_FUNCTION_USAGE (call) = use; -} - - -/* Clear stack slot assignments remembered from previous functions. - This is called from INIT_EXPANDERS once before RTL is emitted for each - function. */ - -static struct machine_function * -ix86_init_machine_status (void) -{ - struct machine_function *f; - - f = GGC_CNEW (struct machine_function); - f->use_fast_prologue_epilogue_nregs = -1; - f->tls_descriptor_call_expanded_p = 0; - f->call_abi = ix86_abi; - - return f; -} - -/* Return a MEM corresponding to a stack slot with mode MODE. - Allocate a new slot if necessary. - - The RTL for a function can have several slots available: N is - which slot to use. */ - -rtx -assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n) -{ - struct stack_local_entry *s; - - gcc_assert (n < MAX_386_STACK_LOCALS); - - /* Virtual slot is valid only before vregs are instantiated. */ - gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated); - - for (s = ix86_stack_locals; s; s = s->next) - if (s->mode == mode && s->n == n) - return copy_rtx (s->rtl); - - s = (struct stack_local_entry *) - ggc_alloc (sizeof (struct stack_local_entry)); - s->n = n; - s->mode = mode; - s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0); - - s->next = ix86_stack_locals; - ix86_stack_locals = s; - return s->rtl; -} - -/* Construct the SYMBOL_REF for the tls_get_addr function. */ - -static GTY(()) rtx ix86_tls_symbol; -rtx -ix86_tls_get_addr (void) -{ - - if (!ix86_tls_symbol) - { - ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, - (TARGET_ANY_GNU_TLS - && !TARGET_64BIT) - ? "___tls_get_addr" - : "__tls_get_addr"); - } - - return ix86_tls_symbol; -} - -/* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */ - -static GTY(()) rtx ix86_tls_module_base_symbol; -rtx -ix86_tls_module_base (void) -{ - - if (!ix86_tls_module_base_symbol) - { - ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode, - "_TLS_MODULE_BASE_"); - SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol) - |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT; - } - - return ix86_tls_module_base_symbol; -} - -/* Calculate the length of the memory address in the instruction - encoding. Does not include the one-byte modrm, opcode, or prefix. */ - -int -memory_address_length (rtx addr) -{ - struct ix86_address parts; - rtx base, index, disp; - int len; - int ok; - - if (GET_CODE (addr) == PRE_DEC - || GET_CODE (addr) == POST_INC - || GET_CODE (addr) == PRE_MODIFY - || GET_CODE (addr) == POST_MODIFY) - return 0; - - ok = ix86_decompose_address (addr, &parts); - gcc_assert (ok); - - if (parts.base && GET_CODE (parts.base) == SUBREG) - parts.base = SUBREG_REG (parts.base); - if (parts.index && GET_CODE (parts.index) == SUBREG) - parts.index = SUBREG_REG (parts.index); - - base = parts.base; - index = parts.index; - disp = parts.disp; - len = 0; - - /* Rule of thumb: - - esp as the base always wants an index, - - ebp as the base always wants a displacement. */ - - /* Register Indirect. */ - if (base && !index && !disp) - { - /* esp (for its index) and ebp (for its displacement) need - the two-byte modrm form. */ - if (addr == stack_pointer_rtx - || addr == arg_pointer_rtx - || addr == frame_pointer_rtx - || addr == hard_frame_pointer_rtx) - len = 1; - } - - /* Direct Addressing. */ - else if (disp && !base && !index) - len = 4; - - else - { - /* Find the length of the displacement constant. */ - if (disp) - { - if (base && satisfies_constraint_K (disp)) - len = 1; - else - len = 4; - } - /* ebp always wants a displacement. */ - else if (base == hard_frame_pointer_rtx) - len = 1; - - /* An index requires the two-byte modrm form.... */ - if (index - /* ...like esp, which always wants an index. */ - || base == stack_pointer_rtx - || base == arg_pointer_rtx - || base == frame_pointer_rtx) - len += 1; - } - - return len; -} - -/* Compute default value for "length_immediate" attribute. When SHORTFORM - is set, expect that insn have 8bit immediate alternative. */ -int -ix86_attr_length_immediate_default (rtx insn, int shortform) -{ - int len = 0; - int i; - extract_insn_cached (insn); - for (i = recog_data.n_operands - 1; i >= 0; --i) - if (CONSTANT_P (recog_data.operand[i])) - { - gcc_assert (!len); - if (shortform && satisfies_constraint_K (recog_data.operand[i])) - len = 1; - else - { - switch (get_attr_mode (insn)) - { - case MODE_QI: - len+=1; - break; - case MODE_HI: - len+=2; - break; - case MODE_SI: - len+=4; - break; - /* Immediates for DImode instructions are encoded as 32bit sign extended values. */ - case MODE_DI: - len+=4; - break; - default: - fatal_insn ("unknown insn mode", insn); - } - } - } - return len; -} -/* Compute default value for "length_address" attribute. */ -int -ix86_attr_length_address_default (rtx insn) -{ - int i; - - if (get_attr_type (insn) == TYPE_LEA) - { - rtx set = PATTERN (insn); - - if (GET_CODE (set) == PARALLEL) - set = XVECEXP (set, 0, 0); - - gcc_assert (GET_CODE (set) == SET); - - return memory_address_length (SET_SRC (set)); - } - - extract_insn_cached (insn); - for (i = recog_data.n_operands - 1; i >= 0; --i) - if (MEM_P (recog_data.operand[i])) - { - return memory_address_length (XEXP (recog_data.operand[i], 0)); - break; - } - return 0; -} - -/* Compute default value for "length_vex" attribute. It includes - 2 or 3 byte VEX prefix and 1 opcode byte. */ - -int -ix86_attr_length_vex_default (rtx insn, int has_0f_opcode, - int has_vex_w) -{ - int i; - - /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3 - byte VEX prefix. */ - if (!has_0f_opcode || has_vex_w) - return 3 + 1; - - /* We can always use 2 byte VEX prefix in 32bit. */ - if (!TARGET_64BIT) - return 2 + 1; - - extract_insn_cached (insn); - - for (i = recog_data.n_operands - 1; i >= 0; --i) - if (REG_P (recog_data.operand[i])) - { - /* REX.W bit uses 3 byte VEX prefix. */ - if (GET_MODE (recog_data.operand[i]) == DImode) - return 3 + 1; - } - else - { - /* REX.X or REX.B bits use 3 byte VEX prefix. */ - if (MEM_P (recog_data.operand[i]) - && x86_extended_reg_mentioned_p (recog_data.operand[i])) - return 3 + 1; - } - - return 2 + 1; -} - -/* Return the maximum number of instructions a cpu can issue. */ - -static int -ix86_issue_rate (void) -{ - switch (ix86_tune) - { - case PROCESSOR_PENTIUM: - case PROCESSOR_ATOM: - case PROCESSOR_K6: - return 2; - - case PROCESSOR_PENTIUMPRO: - case PROCESSOR_PENTIUM4: - case PROCESSOR_ATHLON: - case PROCESSOR_K8: - case PROCESSOR_AMDFAM10: - case PROCESSOR_NOCONA: - case PROCESSOR_GENERIC32: - case PROCESSOR_GENERIC64: - return 3; - - case PROCESSOR_CORE2: - return 4; - - default: - return 1; - } -} - -/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set - by DEP_INSN and nothing set by DEP_INSN. */ - -static int -ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type) -{ - rtx set, set2; - - /* Simplify the test for uninteresting insns. */ - if (insn_type != TYPE_SETCC - && insn_type != TYPE_ICMOV - && insn_type != TYPE_FCMOV - && insn_type != TYPE_IBR) - return 0; - - if ((set = single_set (dep_insn)) != 0) - { - set = SET_DEST (set); - set2 = NULL_RTX; - } - else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL - && XVECLEN (PATTERN (dep_insn), 0) == 2 - && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET - && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET) - { - set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0)); - set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0)); - } - else - return 0; - - if (!REG_P (set) || REGNO (set) != FLAGS_REG) - return 0; - - /* This test is true if the dependent insn reads the flags but - not any other potentially set register. */ - if (!reg_overlap_mentioned_p (set, PATTERN (insn))) - return 0; - - if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn))) - return 0; - - return 1; -} - -/* Return true iff USE_INSN has a memory address with operands set by - SET_INSN. */ - -bool -ix86_agi_dependent (rtx set_insn, rtx use_insn) -{ - int i; - extract_insn_cached (use_insn); - for (i = recog_data.n_operands - 1; i >= 0; --i) - if (MEM_P (recog_data.operand[i])) - { - rtx addr = XEXP (recog_data.operand[i], 0); - return modified_in_p (addr, set_insn) != 0; - } - return false; -} - -static int -ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost) -{ - enum attr_type insn_type, dep_insn_type; - enum attr_memory memory; - rtx set, set2; - int dep_insn_code_number; - - /* Anti and output dependencies have zero cost on all CPUs. */ - if (REG_NOTE_KIND (link) != 0) - return 0; - - dep_insn_code_number = recog_memoized (dep_insn); - - /* If we can't recognize the insns, we can't really do anything. */ - if (dep_insn_code_number < 0 || recog_memoized (insn) < 0) - return cost; - - insn_type = get_attr_type (insn); - dep_insn_type = get_attr_type (dep_insn); - - switch (ix86_tune) - { - case PROCESSOR_PENTIUM: - /* Address Generation Interlock adds a cycle of latency. */ - if (insn_type == TYPE_LEA) - { - rtx addr = PATTERN (insn); - - if (GET_CODE (addr) == PARALLEL) - addr = XVECEXP (addr, 0, 0); - - gcc_assert (GET_CODE (addr) == SET); - - addr = SET_SRC (addr); - if (modified_in_p (addr, dep_insn)) - cost += 1; - } - else if (ix86_agi_dependent (dep_insn, insn)) - cost += 1; - - /* ??? Compares pair with jump/setcc. */ - if (ix86_flags_dependent (insn, dep_insn, insn_type)) - cost = 0; - - /* Floating point stores require value to be ready one cycle earlier. */ - if (insn_type == TYPE_FMOV - && get_attr_memory (insn) == MEMORY_STORE - && !ix86_agi_dependent (dep_insn, insn)) - cost += 1; - break; - - case PROCESSOR_PENTIUMPRO: - memory = get_attr_memory (insn); - - /* INT->FP conversion is expensive. */ - if (get_attr_fp_int_src (dep_insn)) - cost += 5; - - /* There is one cycle extra latency between an FP op and a store. */ - if (insn_type == TYPE_FMOV - && (set = single_set (dep_insn)) != NULL_RTX - && (set2 = single_set (insn)) != NULL_RTX - && rtx_equal_p (SET_DEST (set), SET_SRC (set2)) - && MEM_P (SET_DEST (set2))) - cost += 1; - - /* Show ability of reorder buffer to hide latency of load by executing - in parallel with previous instruction in case - previous instruction is not needed to compute the address. */ - if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH) - && !ix86_agi_dependent (dep_insn, insn)) - { - /* Claim moves to take one cycle, as core can issue one load - at time and the next load can start cycle later. */ - if (dep_insn_type == TYPE_IMOV - || dep_insn_type == TYPE_FMOV) - cost = 1; - else if (cost > 1) - cost--; - } - break; - - case PROCESSOR_K6: - memory = get_attr_memory (insn); - - /* The esp dependency is resolved before the instruction is really - finished. */ - if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP) - && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP)) - return 1; - - /* INT->FP conversion is expensive. */ - if (get_attr_fp_int_src (dep_insn)) - cost += 5; - - /* Show ability of reorder buffer to hide latency of load by executing - in parallel with previous instruction in case - previous instruction is not needed to compute the address. */ - if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH) - && !ix86_agi_dependent (dep_insn, insn)) - { - /* Claim moves to take one cycle, as core can issue one load - at time and the next load can start cycle later. */ - if (dep_insn_type == TYPE_IMOV - || dep_insn_type == TYPE_FMOV) - cost = 1; - else if (cost > 2) - cost -= 2; - else - cost = 1; - } - break; - - case PROCESSOR_ATHLON: - case PROCESSOR_K8: - case PROCESSOR_AMDFAM10: - case PROCESSOR_ATOM: - case PROCESSOR_GENERIC32: - case PROCESSOR_GENERIC64: - memory = get_attr_memory (insn); - - /* Show ability of reorder buffer to hide latency of load by executing - in parallel with previous instruction in case - previous instruction is not needed to compute the address. */ - if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH) - && !ix86_agi_dependent (dep_insn, insn)) - { - enum attr_unit unit = get_attr_unit (insn); - int loadcost = 3; - - /* Because of the difference between the length of integer and - floating unit pipeline preparation stages, the memory operands - for floating point are cheaper. - - ??? For Athlon it the difference is most probably 2. */ - if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN) - loadcost = 3; - else - loadcost = TARGET_ATHLON ? 2 : 0; - - if (cost >= loadcost) - cost -= loadcost; - else - cost = 0; - } - - default: - break; - } - - return cost; -} - -/* How many alternative schedules to try. This should be as wide as the - scheduling freedom in the DFA, but no wider. Making this value too - large results extra work for the scheduler. */ - -static int -ia32_multipass_dfa_lookahead (void) -{ - switch (ix86_tune) - { - case PROCESSOR_PENTIUM: - return 2; - - case PROCESSOR_PENTIUMPRO: - case PROCESSOR_K6: - return 1; - - default: - return 0; - } -} - - -/* Compute the alignment given to a constant that is being placed in memory. - EXP is the constant and ALIGN is the alignment that the object would - ordinarily have. - The value of this function is used instead of that alignment to align - the object. */ - -int -ix86_constant_alignment (tree exp, int align) -{ - if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST - || TREE_CODE (exp) == INTEGER_CST) - { - if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64) - return 64; - else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128) - return 128; - } - else if (!optimize_size && TREE_CODE (exp) == STRING_CST - && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD) - return BITS_PER_WORD; - - return align; -} - -/* Compute the alignment for a static variable. - TYPE is the data type, and ALIGN is the alignment that - the object would ordinarily have. The value of this function is used - instead of that alignment to align the object. */ - -int -ix86_data_alignment (tree type, int align) -{ - int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT); - - if (AGGREGATE_TYPE_P (type) - && TYPE_SIZE (type) - && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST - && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align - || TREE_INT_CST_HIGH (TYPE_SIZE (type))) - && align < max_align) - align = max_align; - - /* x86-64 ABI requires arrays greater than 16 bytes to be aligned - to 16byte boundary. */ - if (TARGET_64BIT) - { - if (AGGREGATE_TYPE_P (type) - && TYPE_SIZE (type) - && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST - && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128 - || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128) - return 128; - } - - if (TREE_CODE (type) == ARRAY_TYPE) - { - if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64) - return 64; - if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128) - return 128; - } - else if (TREE_CODE (type) == COMPLEX_TYPE) - { - - if (TYPE_MODE (type) == DCmode && align < 64) - return 64; - if ((TYPE_MODE (type) == XCmode - || TYPE_MODE (type) == TCmode) && align < 128) - return 128; - } - else if ((TREE_CODE (type) == RECORD_TYPE - || TREE_CODE (type) == UNION_TYPE - || TREE_CODE (type) == QUAL_UNION_TYPE) - && TYPE_FIELDS (type)) - { - if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64) - return 64; - if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128) - return 128; - } - else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE - || TREE_CODE (type) == INTEGER_TYPE) - { - if (TYPE_MODE (type) == DFmode && align < 64) - return 64; - if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128) - return 128; - } - - return align; -} - -/* Compute the alignment for a local variable or a stack slot. EXP is - the data type or decl itself, MODE is the widest mode available and - ALIGN is the alignment that the object would ordinarily have. The - value of this macro is used instead of that alignment to align the - object. */ - -unsigned int -ix86_local_alignment (tree exp, enum machine_mode mode, - unsigned int align) -{ - tree type, decl; - - if (exp && DECL_P (exp)) - { - type = TREE_TYPE (exp); - decl = exp; - } - else - { - type = exp; - decl = NULL; - } - - /* Don't do dynamic stack realignment for long long objects with - -mpreferred-stack-boundary=2. */ - if (!TARGET_64BIT - && align == 64 - && ix86_preferred_stack_boundary < 64 - && (mode == DImode || (type && TYPE_MODE (type) == DImode)) - && (!type || !TYPE_USER_ALIGN (type)) - && (!decl || !DECL_USER_ALIGN (decl))) - align = 32; - - /* If TYPE is NULL, we are allocating a stack slot for caller-save - register in MODE. We will return the largest alignment of XF - and DF. */ - if (!type) - { - if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode)) - align = GET_MODE_ALIGNMENT (DFmode); - return align; - } - - /* x86-64 ABI requires arrays greater than 16 bytes to be aligned - to 16byte boundary. */ - if (TARGET_64BIT) - { - if (AGGREGATE_TYPE_P (type) - && TYPE_SIZE (type) - && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST - && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16 - || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128) - return 128; - } - if (TREE_CODE (type) == ARRAY_TYPE) - { - if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64) - return 64; - if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128) - return 128; - } - else if (TREE_CODE (type) == COMPLEX_TYPE) - { - if (TYPE_MODE (type) == DCmode && align < 64) - return 64; - if ((TYPE_MODE (type) == XCmode - || TYPE_MODE (type) == TCmode) && align < 128) - return 128; - } - else if ((TREE_CODE (type) == RECORD_TYPE - || TREE_CODE (type) == UNION_TYPE - || TREE_CODE (type) == QUAL_UNION_TYPE) - && TYPE_FIELDS (type)) - { - if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64) - return 64; - if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128) - return 128; - } - else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE - || TREE_CODE (type) == INTEGER_TYPE) - { - - if (TYPE_MODE (type) == DFmode && align < 64) - return 64; - if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128) - return 128; - } - return align; -} - -/* Compute the minimum required alignment for dynamic stack realignment - purposes for a local variable, parameter or a stack slot. EXP is - the data type or decl itself, MODE is its mode and ALIGN is the - alignment that the object would ordinarily have. */ - -unsigned int -ix86_minimum_alignment (tree exp, enum machine_mode mode, - unsigned int align) -{ - tree type, decl; - - if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64) - return align; - - if (exp && DECL_P (exp)) - { - type = TREE_TYPE (exp); - decl = exp; - } - else - { - type = exp; - decl = NULL; - } - - /* Don't do dynamic stack realignment for long long objects with - -mpreferred-stack-boundary=2. */ - if ((mode == DImode || (type && TYPE_MODE (type) == DImode)) - && (!type || !TYPE_USER_ALIGN (type)) - && (!decl || !DECL_USER_ALIGN (decl))) - return 32; - - return align; -} - -/* Emit RTL insns to initialize the variable parts of a trampoline. - FNADDR is an RTX for the address of the function's pure code. - CXT is an RTX for the static chain value for the function. */ -void -x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt) -{ - if (!TARGET_64BIT) - { - /* Compute offset from the end of the jmp to the target function. */ - rtx disp = expand_binop (SImode, sub_optab, fnaddr, - plus_constant (tramp, 10), - NULL_RTX, 1, OPTAB_DIRECT); - emit_move_insn (gen_rtx_MEM (QImode, tramp), - gen_int_mode (0xb9, QImode)); - emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt); - emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)), - gen_int_mode (0xe9, QImode)); - emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp); - } - else - { - int offset = 0; - /* Try to load address using shorter movl instead of movabs. - We may want to support movq for kernel mode, but kernel does not use - trampolines at the moment. */ - if (x86_64_zext_immediate_operand (fnaddr, VOIDmode)) - { - fnaddr = copy_to_mode_reg (DImode, fnaddr); - emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)), - gen_int_mode (0xbb41, HImode)); - emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)), - gen_lowpart (SImode, fnaddr)); - offset += 6; - } - else - { - emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)), - gen_int_mode (0xbb49, HImode)); - emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)), - fnaddr); - offset += 10; - } - /* Load static chain using movabs to r10. */ - emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)), - gen_int_mode (0xba49, HImode)); - emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)), - cxt); - offset += 10; - /* Jump to the r11 */ - emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)), - gen_int_mode (0xff49, HImode)); - emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)), - gen_int_mode (0xe3, QImode)); - offset += 3; - gcc_assert (offset <= TRAMPOLINE_SIZE); - } - -#ifdef ENABLE_EXECUTE_STACK - emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"), - LCT_NORMAL, VOIDmode, 1, tramp, Pmode); -#endif -} - -/* Codes for all the SSE/MMX builtins. */ -enum ix86_builtins -{ - IX86_BUILTIN_ADDPS, - IX86_BUILTIN_ADDSS, - IX86_BUILTIN_DIVPS, - IX86_BUILTIN_DIVSS, - IX86_BUILTIN_MULPS, - IX86_BUILTIN_MULSS, - IX86_BUILTIN_SUBPS, - IX86_BUILTIN_SUBSS, - - IX86_BUILTIN_CMPEQPS, - IX86_BUILTIN_CMPLTPS, - IX86_BUILTIN_CMPLEPS, - IX86_BUILTIN_CMPGTPS, - IX86_BUILTIN_CMPGEPS, - IX86_BUILTIN_CMPNEQPS, - IX86_BUILTIN_CMPNLTPS, - IX86_BUILTIN_CMPNLEPS, - IX86_BUILTIN_CMPNGTPS, - IX86_BUILTIN_CMPNGEPS, - IX86_BUILTIN_CMPORDPS, - IX86_BUILTIN_CMPUNORDPS, - IX86_BUILTIN_CMPEQSS, - IX86_BUILTIN_CMPLTSS, - IX86_BUILTIN_CMPLESS, - IX86_BUILTIN_CMPNEQSS, - IX86_BUILTIN_CMPNLTSS, - IX86_BUILTIN_CMPNLESS, - IX86_BUILTIN_CMPNGTSS, - IX86_BUILTIN_CMPNGESS, - IX86_BUILTIN_CMPORDSS, - IX86_BUILTIN_CMPUNORDSS, - - IX86_BUILTIN_COMIEQSS, - IX86_BUILTIN_COMILTSS, - IX86_BUILTIN_COMILESS, - IX86_BUILTIN_COMIGTSS, - IX86_BUILTIN_COMIGESS, - IX86_BUILTIN_COMINEQSS, - IX86_BUILTIN_UCOMIEQSS, - IX86_BUILTIN_UCOMILTSS, - IX86_BUILTIN_UCOMILESS, - IX86_BUILTIN_UCOMIGTSS, - IX86_BUILTIN_UCOMIGESS, - IX86_BUILTIN_UCOMINEQSS, - - IX86_BUILTIN_CVTPI2PS, - IX86_BUILTIN_CVTPS2PI, - IX86_BUILTIN_CVTSI2SS, - IX86_BUILTIN_CVTSI642SS, - IX86_BUILTIN_CVTSS2SI, - IX86_BUILTIN_CVTSS2SI64, - IX86_BUILTIN_CVTTPS2PI, - IX86_BUILTIN_CVTTSS2SI, - IX86_BUILTIN_CVTTSS2SI64, - - IX86_BUILTIN_MAXPS, - IX86_BUILTIN_MAXSS, - IX86_BUILTIN_MINPS, - IX86_BUILTIN_MINSS, - - IX86_BUILTIN_LOADUPS, - IX86_BUILTIN_STOREUPS, - IX86_BUILTIN_MOVSS, - - IX86_BUILTIN_MOVHLPS, - IX86_BUILTIN_MOVLHPS, - IX86_BUILTIN_LOADHPS, - IX86_BUILTIN_LOADLPS, - IX86_BUILTIN_STOREHPS, - IX86_BUILTIN_STORELPS, - - IX86_BUILTIN_MASKMOVQ, - IX86_BUILTIN_MOVMSKPS, - IX86_BUILTIN_PMOVMSKB, - - IX86_BUILTIN_MOVNTPS, - IX86_BUILTIN_MOVNTQ, - - IX86_BUILTIN_LOADDQU, - IX86_BUILTIN_STOREDQU, - - IX86_BUILTIN_PACKSSWB, - IX86_BUILTIN_PACKSSDW, - IX86_BUILTIN_PACKUSWB, - - IX86_BUILTIN_PADDB, - IX86_BUILTIN_PADDW, - IX86_BUILTIN_PADDD, - IX86_BUILTIN_PADDQ, - IX86_BUILTIN_PADDSB, - IX86_BUILTIN_PADDSW, - IX86_BUILTIN_PADDUSB, - IX86_BUILTIN_PADDUSW, - IX86_BUILTIN_PSUBB, - IX86_BUILTIN_PSUBW, - IX86_BUILTIN_PSUBD, - IX86_BUILTIN_PSUBQ, - IX86_BUILTIN_PSUBSB, - IX86_BUILTIN_PSUBSW, - IX86_BUILTIN_PSUBUSB, - IX86_BUILTIN_PSUBUSW, - - IX86_BUILTIN_PAND, - IX86_BUILTIN_PANDN, - IX86_BUILTIN_POR, - IX86_BUILTIN_PXOR, - - IX86_BUILTIN_PAVGB, - IX86_BUILTIN_PAVGW, - - IX86_BUILTIN_PCMPEQB, - IX86_BUILTIN_PCMPEQW, - IX86_BUILTIN_PCMPEQD, - IX86_BUILTIN_PCMPGTB, - IX86_BUILTIN_PCMPGTW, - IX86_BUILTIN_PCMPGTD, - - IX86_BUILTIN_PMADDWD, - - IX86_BUILTIN_PMAXSW, - IX86_BUILTIN_PMAXUB, - IX86_BUILTIN_PMINSW, - IX86_BUILTIN_PMINUB, - - IX86_BUILTIN_PMULHUW, - IX86_BUILTIN_PMULHW, - IX86_BUILTIN_PMULLW, - - IX86_BUILTIN_PSADBW, - IX86_BUILTIN_PSHUFW, - - IX86_BUILTIN_PSLLW, - IX86_BUILTIN_PSLLD, - IX86_BUILTIN_PSLLQ, - IX86_BUILTIN_PSRAW, - IX86_BUILTIN_PSRAD, - IX86_BUILTIN_PSRLW, - IX86_BUILTIN_PSRLD, - IX86_BUILTIN_PSRLQ, - IX86_BUILTIN_PSLLWI, - IX86_BUILTIN_PSLLDI, - IX86_BUILTIN_PSLLQI, - IX86_BUILTIN_PSRAWI, - IX86_BUILTIN_PSRADI, - IX86_BUILTIN_PSRLWI, - IX86_BUILTIN_PSRLDI, - IX86_BUILTIN_PSRLQI, - - IX86_BUILTIN_PUNPCKHBW, - IX86_BUILTIN_PUNPCKHWD, - IX86_BUILTIN_PUNPCKHDQ, - IX86_BUILTIN_PUNPCKLBW, - IX86_BUILTIN_PUNPCKLWD, - IX86_BUILTIN_PUNPCKLDQ, - - IX86_BUILTIN_SHUFPS, - - IX86_BUILTIN_RCPPS, - IX86_BUILTIN_RCPSS, - IX86_BUILTIN_RSQRTPS, - IX86_BUILTIN_RSQRTPS_NR, - IX86_BUILTIN_RSQRTSS, - IX86_BUILTIN_RSQRTF, - IX86_BUILTIN_SQRTPS, - IX86_BUILTIN_SQRTPS_NR, - IX86_BUILTIN_SQRTSS, - - IX86_BUILTIN_UNPCKHPS, - IX86_BUILTIN_UNPCKLPS, - - IX86_BUILTIN_ANDPS, - IX86_BUILTIN_ANDNPS, - IX86_BUILTIN_ORPS, - IX86_BUILTIN_XORPS, - - IX86_BUILTIN_EMMS, - IX86_BUILTIN_LDMXCSR, - IX86_BUILTIN_STMXCSR, - IX86_BUILTIN_SFENCE, - - /* 3DNow! Original */ - IX86_BUILTIN_FEMMS, - IX86_BUILTIN_PAVGUSB, - IX86_BUILTIN_PF2ID, - IX86_BUILTIN_PFACC, - IX86_BUILTIN_PFADD, - IX86_BUILTIN_PFCMPEQ, - IX86_BUILTIN_PFCMPGE, - IX86_BUILTIN_PFCMPGT, - IX86_BUILTIN_PFMAX, - IX86_BUILTIN_PFMIN, - IX86_BUILTIN_PFMUL, - IX86_BUILTIN_PFRCP, - IX86_BUILTIN_PFRCPIT1, - IX86_BUILTIN_PFRCPIT2, - IX86_BUILTIN_PFRSQIT1, - IX86_BUILTIN_PFRSQRT, - IX86_BUILTIN_PFSUB, - IX86_BUILTIN_PFSUBR, - IX86_BUILTIN_PI2FD, - IX86_BUILTIN_PMULHRW, - - /* 3DNow! Athlon Extensions */ - IX86_BUILTIN_PF2IW, - IX86_BUILTIN_PFNACC, - IX86_BUILTIN_PFPNACC, - IX86_BUILTIN_PI2FW, - IX86_BUILTIN_PSWAPDSI, - IX86_BUILTIN_PSWAPDSF, - - /* SSE2 */ - IX86_BUILTIN_ADDPD, - IX86_BUILTIN_ADDSD, - IX86_BUILTIN_DIVPD, - IX86_BUILTIN_DIVSD, - IX86_BUILTIN_MULPD, - IX86_BUILTIN_MULSD, - IX86_BUILTIN_SUBPD, - IX86_BUILTIN_SUBSD, - - IX86_BUILTIN_CMPEQPD, - IX86_BUILTIN_CMPLTPD, - IX86_BUILTIN_CMPLEPD, - IX86_BUILTIN_CMPGTPD, - IX86_BUILTIN_CMPGEPD, - IX86_BUILTIN_CMPNEQPD, - IX86_BUILTIN_CMPNLTPD, - IX86_BUILTIN_CMPNLEPD, - IX86_BUILTIN_CMPNGTPD, - IX86_BUILTIN_CMPNGEPD, - IX86_BUILTIN_CMPORDPD, - IX86_BUILTIN_CMPUNORDPD, - IX86_BUILTIN_CMPEQSD, - IX86_BUILTIN_CMPLTSD, - IX86_BUILTIN_CMPLESD, - IX86_BUILTIN_CMPNEQSD, - IX86_BUILTIN_CMPNLTSD, - IX86_BUILTIN_CMPNLESD, - IX86_BUILTIN_CMPORDSD, - IX86_BUILTIN_CMPUNORDSD, - - IX86_BUILTIN_COMIEQSD, - IX86_BUILTIN_COMILTSD, - IX86_BUILTIN_COMILESD, - IX86_BUILTIN_COMIGTSD, - IX86_BUILTIN_COMIGESD, - IX86_BUILTIN_COMINEQSD, - IX86_BUILTIN_UCOMIEQSD, - IX86_BUILTIN_UCOMILTSD, - IX86_BUILTIN_UCOMILESD, - IX86_BUILTIN_UCOMIGTSD, - IX86_BUILTIN_UCOMIGESD, - IX86_BUILTIN_UCOMINEQSD, - - IX86_BUILTIN_MAXPD, - IX86_BUILTIN_MAXSD, - IX86_BUILTIN_MINPD, - IX86_BUILTIN_MINSD, - - IX86_BUILTIN_ANDPD, - IX86_BUILTIN_ANDNPD, - IX86_BUILTIN_ORPD, - IX86_BUILTIN_XORPD, - - IX86_BUILTIN_SQRTPD, - IX86_BUILTIN_SQRTSD, - - IX86_BUILTIN_UNPCKHPD, - IX86_BUILTIN_UNPCKLPD, - - IX86_BUILTIN_SHUFPD, - - IX86_BUILTIN_LOADUPD, - IX86_BUILTIN_STOREUPD, - IX86_BUILTIN_MOVSD, - - IX86_BUILTIN_LOADHPD, - IX86_BUILTIN_LOADLPD, - - IX86_BUILTIN_CVTDQ2PD, - IX86_BUILTIN_CVTDQ2PS, - - IX86_BUILTIN_CVTPD2DQ, - IX86_BUILTIN_CVTPD2PI, - IX86_BUILTIN_CVTPD2PS, - IX86_BUILTIN_CVTTPD2DQ, - IX86_BUILTIN_CVTTPD2PI, - - IX86_BUILTIN_CVTPI2PD, - IX86_BUILTIN_CVTSI2SD, - IX86_BUILTIN_CVTSI642SD, - - IX86_BUILTIN_CVTSD2SI, - IX86_BUILTIN_CVTSD2SI64, - IX86_BUILTIN_CVTSD2SS, - IX86_BUILTIN_CVTSS2SD, - IX86_BUILTIN_CVTTSD2SI, - IX86_BUILTIN_CVTTSD2SI64, - - IX86_BUILTIN_CVTPS2DQ, - IX86_BUILTIN_CVTPS2PD, - IX86_BUILTIN_CVTTPS2DQ, - - IX86_BUILTIN_MOVNTI, - IX86_BUILTIN_MOVNTPD, - IX86_BUILTIN_MOVNTDQ, - - IX86_BUILTIN_MOVQ128, - - /* SSE2 MMX */ - IX86_BUILTIN_MASKMOVDQU, - IX86_BUILTIN_MOVMSKPD, - IX86_BUILTIN_PMOVMSKB128, - - IX86_BUILTIN_PACKSSWB128, - IX86_BUILTIN_PACKSSDW128, - IX86_BUILTIN_PACKUSWB128, - - IX86_BUILTIN_PADDB128, - IX86_BUILTIN_PADDW128, - IX86_BUILTIN_PADDD128, - IX86_BUILTIN_PADDQ128, - IX86_BUILTIN_PADDSB128, - IX86_BUILTIN_PADDSW128, - IX86_BUILTIN_PADDUSB128, - IX86_BUILTIN_PADDUSW128, - IX86_BUILTIN_PSUBB128, - IX86_BUILTIN_PSUBW128, - IX86_BUILTIN_PSUBD128, - IX86_BUILTIN_PSUBQ128, - IX86_BUILTIN_PSUBSB128, - IX86_BUILTIN_PSUBSW128, - IX86_BUILTIN_PSUBUSB128, - IX86_BUILTIN_PSUBUSW128, - - IX86_BUILTIN_PAND128, - IX86_BUILTIN_PANDN128, - IX86_BUILTIN_POR128, - IX86_BUILTIN_PXOR128, - - IX86_BUILTIN_PAVGB128, - IX86_BUILTIN_PAVGW128, - - IX86_BUILTIN_PCMPEQB128, - IX86_BUILTIN_PCMPEQW128, - IX86_BUILTIN_PCMPEQD128, - IX86_BUILTIN_PCMPGTB128, - IX86_BUILTIN_PCMPGTW128, - IX86_BUILTIN_PCMPGTD128, - - IX86_BUILTIN_PMADDWD128, - - IX86_BUILTIN_PMAXSW128, - IX86_BUILTIN_PMAXUB128, - IX86_BUILTIN_PMINSW128, - IX86_BUILTIN_PMINUB128, - - IX86_BUILTIN_PMULUDQ, - IX86_BUILTIN_PMULUDQ128, - IX86_BUILTIN_PMULHUW128, - IX86_BUILTIN_PMULHW128, - IX86_BUILTIN_PMULLW128, - - IX86_BUILTIN_PSADBW128, - IX86_BUILTIN_PSHUFHW, - IX86_BUILTIN_PSHUFLW, - IX86_BUILTIN_PSHUFD, - - IX86_BUILTIN_PSLLDQI128, - IX86_BUILTIN_PSLLWI128, - IX86_BUILTIN_PSLLDI128, - IX86_BUILTIN_PSLLQI128, - IX86_BUILTIN_PSRAWI128, - IX86_BUILTIN_PSRADI128, - IX86_BUILTIN_PSRLDQI128, - IX86_BUILTIN_PSRLWI128, - IX86_BUILTIN_PSRLDI128, - IX86_BUILTIN_PSRLQI128, - - IX86_BUILTIN_PSLLDQ128, - IX86_BUILTIN_PSLLW128, - IX86_BUILTIN_PSLLD128, - IX86_BUILTIN_PSLLQ128, - IX86_BUILTIN_PSRAW128, - IX86_BUILTIN_PSRAD128, - IX86_BUILTIN_PSRLW128, - IX86_BUILTIN_PSRLD128, - IX86_BUILTIN_PSRLQ128, - - IX86_BUILTIN_PUNPCKHBW128, - IX86_BUILTIN_PUNPCKHWD128, - IX86_BUILTIN_PUNPCKHDQ128, - IX86_BUILTIN_PUNPCKHQDQ128, - IX86_BUILTIN_PUNPCKLBW128, - IX86_BUILTIN_PUNPCKLWD128, - IX86_BUILTIN_PUNPCKLDQ128, - IX86_BUILTIN_PUNPCKLQDQ128, - - IX86_BUILTIN_CLFLUSH, - IX86_BUILTIN_MFENCE, - IX86_BUILTIN_LFENCE, - - /* SSE3. */ - IX86_BUILTIN_ADDSUBPS, - IX86_BUILTIN_HADDPS, - IX86_BUILTIN_HSUBPS, - IX86_BUILTIN_MOVSHDUP, - IX86_BUILTIN_MOVSLDUP, - IX86_BUILTIN_ADDSUBPD, - IX86_BUILTIN_HADDPD, - IX86_BUILTIN_HSUBPD, - IX86_BUILTIN_LDDQU, - - IX86_BUILTIN_MONITOR, - IX86_BUILTIN_MWAIT, - - /* SSSE3. */ - IX86_BUILTIN_PHADDW, - IX86_BUILTIN_PHADDD, - IX86_BUILTIN_PHADDSW, - IX86_BUILTIN_PHSUBW, - IX86_BUILTIN_PHSUBD, - IX86_BUILTIN_PHSUBSW, - IX86_BUILTIN_PMADDUBSW, - IX86_BUILTIN_PMULHRSW, - IX86_BUILTIN_PSHUFB, - IX86_BUILTIN_PSIGNB, - IX86_BUILTIN_PSIGNW, - IX86_BUILTIN_PSIGND, - IX86_BUILTIN_PALIGNR, - IX86_BUILTIN_PABSB, - IX86_BUILTIN_PABSW, - IX86_BUILTIN_PABSD, - - IX86_BUILTIN_PHADDW128, - IX86_BUILTIN_PHADDD128, - IX86_BUILTIN_PHADDSW128, - IX86_BUILTIN_PHSUBW128, - IX86_BUILTIN_PHSUBD128, - IX86_BUILTIN_PHSUBSW128, - IX86_BUILTIN_PMADDUBSW128, - IX86_BUILTIN_PMULHRSW128, - IX86_BUILTIN_PSHUFB128, - IX86_BUILTIN_PSIGNB128, - IX86_BUILTIN_PSIGNW128, - IX86_BUILTIN_PSIGND128, - IX86_BUILTIN_PALIGNR128, - IX86_BUILTIN_PABSB128, - IX86_BUILTIN_PABSW128, - IX86_BUILTIN_PABSD128, - - /* AMDFAM10 - SSE4A New Instructions. */ - IX86_BUILTIN_MOVNTSD, - IX86_BUILTIN_MOVNTSS, - IX86_BUILTIN_EXTRQI, - IX86_BUILTIN_EXTRQ, - IX86_BUILTIN_INSERTQI, - IX86_BUILTIN_INSERTQ, - - /* SSE4.1. */ - IX86_BUILTIN_BLENDPD, - IX86_BUILTIN_BLENDPS, - IX86_BUILTIN_BLENDVPD, - IX86_BUILTIN_BLENDVPS, - IX86_BUILTIN_PBLENDVB128, - IX86_BUILTIN_PBLENDW128, - - IX86_BUILTIN_DPPD, - IX86_BUILTIN_DPPS, - - IX86_BUILTIN_INSERTPS128, - - IX86_BUILTIN_MOVNTDQA, - IX86_BUILTIN_MPSADBW128, - IX86_BUILTIN_PACKUSDW128, - IX86_BUILTIN_PCMPEQQ, - IX86_BUILTIN_PHMINPOSUW128, - - IX86_BUILTIN_PMAXSB128, - IX86_BUILTIN_PMAXSD128, - IX86_BUILTIN_PMAXUD128, - IX86_BUILTIN_PMAXUW128, - - IX86_BUILTIN_PMINSB128, - IX86_BUILTIN_PMINSD128, - IX86_BUILTIN_PMINUD128, - IX86_BUILTIN_PMINUW128, - - IX86_BUILTIN_PMOVSXBW128, - IX86_BUILTIN_PMOVSXBD128, - IX86_BUILTIN_PMOVSXBQ128, - IX86_BUILTIN_PMOVSXWD128, - IX86_BUILTIN_PMOVSXWQ128, - IX86_BUILTIN_PMOVSXDQ128, - - IX86_BUILTIN_PMOVZXBW128, - IX86_BUILTIN_PMOVZXBD128, - IX86_BUILTIN_PMOVZXBQ128, - IX86_BUILTIN_PMOVZXWD128, - IX86_BUILTIN_PMOVZXWQ128, - IX86_BUILTIN_PMOVZXDQ128, - - IX86_BUILTIN_PMULDQ128, - IX86_BUILTIN_PMULLD128, - - IX86_BUILTIN_ROUNDPD, - IX86_BUILTIN_ROUNDPS, - IX86_BUILTIN_ROUNDSD, - IX86_BUILTIN_ROUNDSS, - - IX86_BUILTIN_PTESTZ, - IX86_BUILTIN_PTESTC, - IX86_BUILTIN_PTESTNZC, - - IX86_BUILTIN_VEC_INIT_V2SI, - IX86_BUILTIN_VEC_INIT_V4HI, - IX86_BUILTIN_VEC_INIT_V8QI, - IX86_BUILTIN_VEC_EXT_V2DF, - IX86_BUILTIN_VEC_EXT_V2DI, - IX86_BUILTIN_VEC_EXT_V4SF, - IX86_BUILTIN_VEC_EXT_V4SI, - IX86_BUILTIN_VEC_EXT_V8HI, - IX86_BUILTIN_VEC_EXT_V2SI, - IX86_BUILTIN_VEC_EXT_V4HI, - IX86_BUILTIN_VEC_EXT_V16QI, - IX86_BUILTIN_VEC_SET_V2DI, - IX86_BUILTIN_VEC_SET_V4SF, - IX86_BUILTIN_VEC_SET_V4SI, - IX86_BUILTIN_VEC_SET_V8HI, - IX86_BUILTIN_VEC_SET_V4HI, - IX86_BUILTIN_VEC_SET_V16QI, - - IX86_BUILTIN_VEC_PACK_SFIX, - - /* SSE4.2. */ - IX86_BUILTIN_CRC32QI, - IX86_BUILTIN_CRC32HI, - IX86_BUILTIN_CRC32SI, - IX86_BUILTIN_CRC32DI, - - IX86_BUILTIN_PCMPESTRI128, - IX86_BUILTIN_PCMPESTRM128, - IX86_BUILTIN_PCMPESTRA128, - IX86_BUILTIN_PCMPESTRC128, - IX86_BUILTIN_PCMPESTRO128, - IX86_BUILTIN_PCMPESTRS128, - IX86_BUILTIN_PCMPESTRZ128, - IX86_BUILTIN_PCMPISTRI128, - IX86_BUILTIN_PCMPISTRM128, - IX86_BUILTIN_PCMPISTRA128, - IX86_BUILTIN_PCMPISTRC128, - IX86_BUILTIN_PCMPISTRO128, - IX86_BUILTIN_PCMPISTRS128, - IX86_BUILTIN_PCMPISTRZ128, - - IX86_BUILTIN_PCMPGTQ, - - /* AES instructions */ - IX86_BUILTIN_AESENC128, - IX86_BUILTIN_AESENCLAST128, - IX86_BUILTIN_AESDEC128, - IX86_BUILTIN_AESDECLAST128, - IX86_BUILTIN_AESIMC128, - IX86_BUILTIN_AESKEYGENASSIST128, - - /* PCLMUL instruction */ - IX86_BUILTIN_PCLMULQDQ128, - - /* AVX */ - IX86_BUILTIN_ADDPD256, - IX86_BUILTIN_ADDPS256, - IX86_BUILTIN_ADDSUBPD256, - IX86_BUILTIN_ADDSUBPS256, - IX86_BUILTIN_ANDPD256, - IX86_BUILTIN_ANDPS256, - IX86_BUILTIN_ANDNPD256, - IX86_BUILTIN_ANDNPS256, - IX86_BUILTIN_BLENDPD256, - IX86_BUILTIN_BLENDPS256, - IX86_BUILTIN_BLENDVPD256, - IX86_BUILTIN_BLENDVPS256, - IX86_BUILTIN_DIVPD256, - IX86_BUILTIN_DIVPS256, - IX86_BUILTIN_DPPS256, - IX86_BUILTIN_HADDPD256, - IX86_BUILTIN_HADDPS256, - IX86_BUILTIN_HSUBPD256, - IX86_BUILTIN_HSUBPS256, - IX86_BUILTIN_MAXPD256, - IX86_BUILTIN_MAXPS256, - IX86_BUILTIN_MINPD256, - IX86_BUILTIN_MINPS256, - IX86_BUILTIN_MULPD256, - IX86_BUILTIN_MULPS256, - IX86_BUILTIN_ORPD256, - IX86_BUILTIN_ORPS256, - IX86_BUILTIN_SHUFPD256, - IX86_BUILTIN_SHUFPS256, - IX86_BUILTIN_SUBPD256, - IX86_BUILTIN_SUBPS256, - IX86_BUILTIN_XORPD256, - IX86_BUILTIN_XORPS256, - IX86_BUILTIN_CMPSD, - IX86_BUILTIN_CMPSS, - IX86_BUILTIN_CMPPD, - IX86_BUILTIN_CMPPS, - IX86_BUILTIN_CMPPD256, - IX86_BUILTIN_CMPPS256, - IX86_BUILTIN_CVTDQ2PD256, - IX86_BUILTIN_CVTDQ2PS256, - IX86_BUILTIN_CVTPD2PS256, - IX86_BUILTIN_CVTPS2DQ256, - IX86_BUILTIN_CVTPS2PD256, - IX86_BUILTIN_CVTTPD2DQ256, - IX86_BUILTIN_CVTPD2DQ256, - IX86_BUILTIN_CVTTPS2DQ256, - IX86_BUILTIN_EXTRACTF128PD256, - IX86_BUILTIN_EXTRACTF128PS256, - IX86_BUILTIN_EXTRACTF128SI256, - IX86_BUILTIN_VZEROALL, - IX86_BUILTIN_VZEROUPPER, - IX86_BUILTIN_VZEROUPPER_REX64, - IX86_BUILTIN_VPERMILVARPD, - IX86_BUILTIN_VPERMILVARPS, - IX86_BUILTIN_VPERMILVARPD256, - IX86_BUILTIN_VPERMILVARPS256, - IX86_BUILTIN_VPERMILPD, - IX86_BUILTIN_VPERMILPS, - IX86_BUILTIN_VPERMILPD256, - IX86_BUILTIN_VPERMILPS256, - IX86_BUILTIN_VPERM2F128PD256, - IX86_BUILTIN_VPERM2F128PS256, - IX86_BUILTIN_VPERM2F128SI256, - IX86_BUILTIN_VBROADCASTSS, - IX86_BUILTIN_VBROADCASTSD256, - IX86_BUILTIN_VBROADCASTSS256, - IX86_BUILTIN_VBROADCASTPD256, - IX86_BUILTIN_VBROADCASTPS256, - IX86_BUILTIN_VINSERTF128PD256, - IX86_BUILTIN_VINSERTF128PS256, - IX86_BUILTIN_VINSERTF128SI256, - IX86_BUILTIN_LOADUPD256, - IX86_BUILTIN_LOADUPS256, - IX86_BUILTIN_STOREUPD256, - IX86_BUILTIN_STOREUPS256, - IX86_BUILTIN_LDDQU256, - IX86_BUILTIN_MOVNTDQ256, - IX86_BUILTIN_MOVNTPD256, - IX86_BUILTIN_MOVNTPS256, - IX86_BUILTIN_LOADDQU256, - IX86_BUILTIN_STOREDQU256, - IX86_BUILTIN_MASKLOADPD, - IX86_BUILTIN_MASKLOADPS, - IX86_BUILTIN_MASKSTOREPD, - IX86_BUILTIN_MASKSTOREPS, - IX86_BUILTIN_MASKLOADPD256, - IX86_BUILTIN_MASKLOADPS256, - IX86_BUILTIN_MASKSTOREPD256, - IX86_BUILTIN_MASKSTOREPS256, - IX86_BUILTIN_MOVSHDUP256, - IX86_BUILTIN_MOVSLDUP256, - IX86_BUILTIN_MOVDDUP256, - - IX86_BUILTIN_SQRTPD256, - IX86_BUILTIN_SQRTPS256, - IX86_BUILTIN_SQRTPS_NR256, - IX86_BUILTIN_RSQRTPS256, - IX86_BUILTIN_RSQRTPS_NR256, - - IX86_BUILTIN_RCPPS256, - - IX86_BUILTIN_ROUNDPD256, - IX86_BUILTIN_ROUNDPS256, - - IX86_BUILTIN_UNPCKHPD256, - IX86_BUILTIN_UNPCKLPD256, - IX86_BUILTIN_UNPCKHPS256, - IX86_BUILTIN_UNPCKLPS256, - - IX86_BUILTIN_SI256_SI, - IX86_BUILTIN_PS256_PS, - IX86_BUILTIN_PD256_PD, - IX86_BUILTIN_SI_SI256, - IX86_BUILTIN_PS_PS256, - IX86_BUILTIN_PD_PD256, - - IX86_BUILTIN_VTESTZPD, - IX86_BUILTIN_VTESTCPD, - IX86_BUILTIN_VTESTNZCPD, - IX86_BUILTIN_VTESTZPS, - IX86_BUILTIN_VTESTCPS, - IX86_BUILTIN_VTESTNZCPS, - IX86_BUILTIN_VTESTZPD256, - IX86_BUILTIN_VTESTCPD256, - IX86_BUILTIN_VTESTNZCPD256, - IX86_BUILTIN_VTESTZPS256, - IX86_BUILTIN_VTESTCPS256, - IX86_BUILTIN_VTESTNZCPS256, - IX86_BUILTIN_PTESTZ256, - IX86_BUILTIN_PTESTC256, - IX86_BUILTIN_PTESTNZC256, - - IX86_BUILTIN_MOVMSKPD256, - IX86_BUILTIN_MOVMSKPS256, - - /* TFmode support builtins. */ - IX86_BUILTIN_INFQ, - IX86_BUILTIN_FABSQ, - IX86_BUILTIN_COPYSIGNQ, - - /* SSE5 instructions */ - IX86_BUILTIN_FMADDSS, - IX86_BUILTIN_FMADDSD, - IX86_BUILTIN_FMADDPS, - IX86_BUILTIN_FMADDPD, - IX86_BUILTIN_FMSUBSS, - IX86_BUILTIN_FMSUBSD, - IX86_BUILTIN_FMSUBPS, - IX86_BUILTIN_FMSUBPD, - IX86_BUILTIN_FNMADDSS, - IX86_BUILTIN_FNMADDSD, - IX86_BUILTIN_FNMADDPS, - IX86_BUILTIN_FNMADDPD, - IX86_BUILTIN_FNMSUBSS, - IX86_BUILTIN_FNMSUBSD, - IX86_BUILTIN_FNMSUBPS, - IX86_BUILTIN_FNMSUBPD, - IX86_BUILTIN_PCMOV, - IX86_BUILTIN_PCMOV_V2DI, - IX86_BUILTIN_PCMOV_V4SI, - IX86_BUILTIN_PCMOV_V8HI, - IX86_BUILTIN_PCMOV_V16QI, - IX86_BUILTIN_PCMOV_V4SF, - IX86_BUILTIN_PCMOV_V2DF, - IX86_BUILTIN_PPERM, - IX86_BUILTIN_PERMPS, - IX86_BUILTIN_PERMPD, - IX86_BUILTIN_PMACSSWW, - IX86_BUILTIN_PMACSWW, - IX86_BUILTIN_PMACSSWD, - IX86_BUILTIN_PMACSWD, - IX86_BUILTIN_PMACSSDD, - IX86_BUILTIN_PMACSDD, - IX86_BUILTIN_PMACSSDQL, - IX86_BUILTIN_PMACSSDQH, - IX86_BUILTIN_PMACSDQL, - IX86_BUILTIN_PMACSDQH, - IX86_BUILTIN_PMADCSSWD, - IX86_BUILTIN_PMADCSWD, - IX86_BUILTIN_PHADDBW, - IX86_BUILTIN_PHADDBD, - IX86_BUILTIN_PHADDBQ, - IX86_BUILTIN_PHADDWD, - IX86_BUILTIN_PHADDWQ, - IX86_BUILTIN_PHADDDQ, - IX86_BUILTIN_PHADDUBW, - IX86_BUILTIN_PHADDUBD, - IX86_BUILTIN_PHADDUBQ, - IX86_BUILTIN_PHADDUWD, - IX86_BUILTIN_PHADDUWQ, - IX86_BUILTIN_PHADDUDQ, - IX86_BUILTIN_PHSUBBW, - IX86_BUILTIN_PHSUBWD, - IX86_BUILTIN_PHSUBDQ, - IX86_BUILTIN_PROTB, - IX86_BUILTIN_PROTW, - IX86_BUILTIN_PROTD, - IX86_BUILTIN_PROTQ, - IX86_BUILTIN_PROTB_IMM, - IX86_BUILTIN_PROTW_IMM, - IX86_BUILTIN_PROTD_IMM, - IX86_BUILTIN_PROTQ_IMM, - IX86_BUILTIN_PSHLB, - IX86_BUILTIN_PSHLW, - IX86_BUILTIN_PSHLD, - IX86_BUILTIN_PSHLQ, - IX86_BUILTIN_PSHAB, - IX86_BUILTIN_PSHAW, - IX86_BUILTIN_PSHAD, - IX86_BUILTIN_PSHAQ, - IX86_BUILTIN_FRCZSS, - IX86_BUILTIN_FRCZSD, - IX86_BUILTIN_FRCZPS, - IX86_BUILTIN_FRCZPD, - IX86_BUILTIN_CVTPH2PS, - IX86_BUILTIN_CVTPS2PH, - - IX86_BUILTIN_COMEQSS, - IX86_BUILTIN_COMNESS, - IX86_BUILTIN_COMLTSS, - IX86_BUILTIN_COMLESS, - IX86_BUILTIN_COMGTSS, - IX86_BUILTIN_COMGESS, - IX86_BUILTIN_COMUEQSS, - IX86_BUILTIN_COMUNESS, - IX86_BUILTIN_COMULTSS, - IX86_BUILTIN_COMULESS, - IX86_BUILTIN_COMUGTSS, - IX86_BUILTIN_COMUGESS, - IX86_BUILTIN_COMORDSS, - IX86_BUILTIN_COMUNORDSS, - IX86_BUILTIN_COMFALSESS, - IX86_BUILTIN_COMTRUESS, - - IX86_BUILTIN_COMEQSD, - IX86_BUILTIN_COMNESD, - IX86_BUILTIN_COMLTSD, - IX86_BUILTIN_COMLESD, - IX86_BUILTIN_COMGTSD, - IX86_BUILTIN_COMGESD, - IX86_BUILTIN_COMUEQSD, - IX86_BUILTIN_COMUNESD, - IX86_BUILTIN_COMULTSD, - IX86_BUILTIN_COMULESD, - IX86_BUILTIN_COMUGTSD, - IX86_BUILTIN_COMUGESD, - IX86_BUILTIN_COMORDSD, - IX86_BUILTIN_COMUNORDSD, - IX86_BUILTIN_COMFALSESD, - IX86_BUILTIN_COMTRUESD, - - IX86_BUILTIN_COMEQPS, - IX86_BUILTIN_COMNEPS, - IX86_BUILTIN_COMLTPS, - IX86_BUILTIN_COMLEPS, - IX86_BUILTIN_COMGTPS, - IX86_BUILTIN_COMGEPS, - IX86_BUILTIN_COMUEQPS, - IX86_BUILTIN_COMUNEPS, - IX86_BUILTIN_COMULTPS, - IX86_BUILTIN_COMULEPS, - IX86_BUILTIN_COMUGTPS, - IX86_BUILTIN_COMUGEPS, - IX86_BUILTIN_COMORDPS, - IX86_BUILTIN_COMUNORDPS, - IX86_BUILTIN_COMFALSEPS, - IX86_BUILTIN_COMTRUEPS, - - IX86_BUILTIN_COMEQPD, - IX86_BUILTIN_COMNEPD, - IX86_BUILTIN_COMLTPD, - IX86_BUILTIN_COMLEPD, - IX86_BUILTIN_COMGTPD, - IX86_BUILTIN_COMGEPD, - IX86_BUILTIN_COMUEQPD, - IX86_BUILTIN_COMUNEPD, - IX86_BUILTIN_COMULTPD, - IX86_BUILTIN_COMULEPD, - IX86_BUILTIN_COMUGTPD, - IX86_BUILTIN_COMUGEPD, - IX86_BUILTIN_COMORDPD, - IX86_BUILTIN_COMUNORDPD, - IX86_BUILTIN_COMFALSEPD, - IX86_BUILTIN_COMTRUEPD, - - IX86_BUILTIN_PCOMEQUB, - IX86_BUILTIN_PCOMNEUB, - IX86_BUILTIN_PCOMLTUB, - IX86_BUILTIN_PCOMLEUB, - IX86_BUILTIN_PCOMGTUB, - IX86_BUILTIN_PCOMGEUB, - IX86_BUILTIN_PCOMFALSEUB, - IX86_BUILTIN_PCOMTRUEUB, - IX86_BUILTIN_PCOMEQUW, - IX86_BUILTIN_PCOMNEUW, - IX86_BUILTIN_PCOMLTUW, - IX86_BUILTIN_PCOMLEUW, - IX86_BUILTIN_PCOMGTUW, - IX86_BUILTIN_PCOMGEUW, - IX86_BUILTIN_PCOMFALSEUW, - IX86_BUILTIN_PCOMTRUEUW, - IX86_BUILTIN_PCOMEQUD, - IX86_BUILTIN_PCOMNEUD, - IX86_BUILTIN_PCOMLTUD, - IX86_BUILTIN_PCOMLEUD, - IX86_BUILTIN_PCOMGTUD, - IX86_BUILTIN_PCOMGEUD, - IX86_BUILTIN_PCOMFALSEUD, - IX86_BUILTIN_PCOMTRUEUD, - IX86_BUILTIN_PCOMEQUQ, - IX86_BUILTIN_PCOMNEUQ, - IX86_BUILTIN_PCOMLTUQ, - IX86_BUILTIN_PCOMLEUQ, - IX86_BUILTIN_PCOMGTUQ, - IX86_BUILTIN_PCOMGEUQ, - IX86_BUILTIN_PCOMFALSEUQ, - IX86_BUILTIN_PCOMTRUEUQ, - - IX86_BUILTIN_PCOMEQB, - IX86_BUILTIN_PCOMNEB, - IX86_BUILTIN_PCOMLTB, - IX86_BUILTIN_PCOMLEB, - IX86_BUILTIN_PCOMGTB, - IX86_BUILTIN_PCOMGEB, - IX86_BUILTIN_PCOMFALSEB, - IX86_BUILTIN_PCOMTRUEB, - IX86_BUILTIN_PCOMEQW, - IX86_BUILTIN_PCOMNEW, - IX86_BUILTIN_PCOMLTW, - IX86_BUILTIN_PCOMLEW, - IX86_BUILTIN_PCOMGTW, - IX86_BUILTIN_PCOMGEW, - IX86_BUILTIN_PCOMFALSEW, - IX86_BUILTIN_PCOMTRUEW, - IX86_BUILTIN_PCOMEQD, - IX86_BUILTIN_PCOMNED, - IX86_BUILTIN_PCOMLTD, - IX86_BUILTIN_PCOMLED, - IX86_BUILTIN_PCOMGTD, - IX86_BUILTIN_PCOMGED, - IX86_BUILTIN_PCOMFALSED, - IX86_BUILTIN_PCOMTRUED, - IX86_BUILTIN_PCOMEQQ, - IX86_BUILTIN_PCOMNEQ, - IX86_BUILTIN_PCOMLTQ, - IX86_BUILTIN_PCOMLEQ, - IX86_BUILTIN_PCOMGTQ, - IX86_BUILTIN_PCOMGEQ, - IX86_BUILTIN_PCOMFALSEQ, - IX86_BUILTIN_PCOMTRUEQ, - - /* LWP instructions. */ - IX86_BUILTIN_LLWPCB, - IX86_BUILTIN_SLWPCB, - IX86_BUILTIN_LWPVAL32, - IX86_BUILTIN_LWPVAL64, - IX86_BUILTIN_LWPINS32, - IX86_BUILTIN_LWPINS64, - - IX86_BUILTIN_MAX -}; - -/* Table for the ix86 builtin decls. */ -static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX]; - -/* Table of all of the builtin functions that are possible with different ISA's - but are waiting to be built until a function is declared to use that - ISA. */ -struct builtin_isa GTY(()) -{ - tree type; /* builtin type to use in the declaration */ - const char *name; /* function name */ - int isa; /* isa_flags this builtin is defined for */ - bool const_p; /* true if the declaration is constant */ -}; - -static GTY(()) struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX]; - - -/* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK - * of which isa_flags to use in the ix86_builtins_isa array. Stores the - * function decl in the ix86_builtins array. Returns the function decl or - * NULL_TREE, if the builtin was not added. - * - * If the front end has a special hook for builtin functions, delay adding - * builtin functions that aren't in the current ISA until the ISA is changed - * with function specific optimization. Doing so, can save about 300K for the - * default compiler. When the builtin is expanded, check at that time whether - * it is valid. - * - * If the front end doesn't have a special hook, record all builtins, even if - * it isn't an instruction set in the current ISA in case the user uses - * function specific options for a different ISA, so that we don't get scope - * errors if a builtin is added in the middle of a function scope. */ - -static inline tree -def_builtin (int mask, const char *name, tree type, enum ix86_builtins code) -{ - tree decl = NULL_TREE; - - if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT) - { - ix86_builtins_isa[(int) code].isa = mask; - - if ((mask & ix86_isa_flags) != 0 - || (lang_hooks.builtin_function - == lang_hooks.builtin_function_ext_scope)) - - { - decl = add_builtin_function (name, type, code, BUILT_IN_MD, NULL, - NULL_TREE); - ix86_builtins[(int) code] = decl; - ix86_builtins_isa[(int) code].type = NULL_TREE; - } - else - { - ix86_builtins[(int) code] = NULL_TREE; - ix86_builtins_isa[(int) code].const_p = false; - ix86_builtins_isa[(int) code].type = type; - ix86_builtins_isa[(int) code].name = name; - } - } - - return decl; -} - -/* Like def_builtin, but also marks the function decl "const". */ - -static inline tree -def_builtin_const (int mask, const char *name, tree type, - enum ix86_builtins code) -{ - tree decl = def_builtin (mask, name, type, code); - if (decl) - TREE_READONLY (decl) = 1; - else - ix86_builtins_isa[(int) code].const_p = true; - - return decl; -} - -/* Add any new builtin functions for a given ISA that may not have been - declared. This saves a bit of space compared to adding all of the - declarations to the tree, even if we didn't use them. */ - -static void -ix86_add_new_builtins (int isa) -{ - int i; - tree decl; - - for (i = 0; i < (int)IX86_BUILTIN_MAX; i++) - { - if ((ix86_builtins_isa[i].isa & isa) != 0 - && ix86_builtins_isa[i].type != NULL_TREE) - { - decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name, - ix86_builtins_isa[i].type, - i, BUILT_IN_MD, NULL, - NULL_TREE); - - ix86_builtins[i] = decl; - ix86_builtins_isa[i].type = NULL_TREE; - if (ix86_builtins_isa[i].const_p) - TREE_READONLY (decl) = 1; - } - } -} - -/* Bits for builtin_description.flag. */ - -/* Set when we don't support the comparison natively, and should - swap_comparison in order to support it. */ -#define BUILTIN_DESC_SWAP_OPERANDS 1 - -struct builtin_description -{ - const unsigned int mask; - const enum insn_code icode; - const char *const name; - const enum ix86_builtins code; - const enum rtx_code comparison; - const int flag; -}; - -static const struct builtin_description bdesc_comi[] = -{ - { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 }, - { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 }, - { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 }, - { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 }, - { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 }, - { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 }, - { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 }, - { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 }, - { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 }, - { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 }, - { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 }, - { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 }, -}; - -static const struct builtin_description bdesc_pcmpestr[] = -{ - /* SSE4.2 */ - { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 }, - { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 }, - { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode }, - { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode }, - { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode }, - { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode }, - { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode }, -}; - -static const struct builtin_description bdesc_pcmpistr[] = -{ - /* SSE4.2 */ - { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 }, - { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 }, - { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode }, - { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode }, - { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode }, - { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode }, - { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode }, -}; - -/* Special builtin types */ -enum ix86_special_builtin_type -{ - SPECIAL_FTYPE_UNKNOWN, - VOID_FTYPE_VOID, - VOID_FTYPE_PVOID, - PVOID_FTYPE_VOID, - V32QI_FTYPE_PCCHAR, - V16QI_FTYPE_PCCHAR, - V8SF_FTYPE_PCV4SF, - V8SF_FTYPE_PCFLOAT, - V4DF_FTYPE_PCV2DF, - V4DF_FTYPE_PCDOUBLE, - V4SF_FTYPE_PCFLOAT, - V2DF_FTYPE_PCDOUBLE, - V8SF_FTYPE_PCV8SF_V8SF, - V4DF_FTYPE_PCV4DF_V4DF, - V4SF_FTYPE_V4SF_PCV2SF, - V4SF_FTYPE_PCV4SF_V4SF, - V2DF_FTYPE_V2DF_PCDOUBLE, - V2DF_FTYPE_PCV2DF_V2DF, - V2DI_FTYPE_PV2DI, - VOID_FTYPE_PV2SF_V4SF, - VOID_FTYPE_PV4DI_V4DI, - VOID_FTYPE_PV2DI_V2DI, - VOID_FTYPE_PCHAR_V32QI, - VOID_FTYPE_PCHAR_V16QI, - VOID_FTYPE_PFLOAT_V8SF, - VOID_FTYPE_PFLOAT_V4SF, - VOID_FTYPE_PDOUBLE_V4DF, - VOID_FTYPE_PDOUBLE_V2DF, - VOID_FTYPE_PDI_DI, - VOID_FTYPE_PINT_INT, - VOID_FTYPE_PV8SF_V8SF_V8SF, - VOID_FTYPE_PV4DF_V4DF_V4DF, - VOID_FTYPE_PV4SF_V4SF_V4SF, - VOID_FTYPE_PV2DF_V2DF_V2DF, - VOID_FTYPE_USHORT_UINT_USHORT, - VOID_FTYPE_UINT_UINT_UINT, - VOID_FTYPE_UINT64_UINT_UINT, - UCHAR_FTYPE_USHORT_UINT_USHORT, - UCHAR_FTYPE_UINT_UINT_UINT, - UCHAR_FTYPE_UINT64_UINT_UINT -}; - -/* Builtin types */ -enum ix86_builtin_type -{ - FTYPE_UNKNOWN, - FLOAT128_FTYPE_FLOAT128, - FLOAT_FTYPE_FLOAT, - FLOAT128_FTYPE_FLOAT128_FLOAT128, - INT_FTYPE_V8SF_V8SF_PTEST, - INT_FTYPE_V4DI_V4DI_PTEST, - INT_FTYPE_V4DF_V4DF_PTEST, - INT_FTYPE_V4SF_V4SF_PTEST, - INT_FTYPE_V2DI_V2DI_PTEST, - INT_FTYPE_V2DF_V2DF_PTEST, - INT64_FTYPE_V4SF, - INT64_FTYPE_V2DF, - INT_FTYPE_V16QI, - INT_FTYPE_V8QI, - INT_FTYPE_V8SF, - INT_FTYPE_V4DF, - INT_FTYPE_V4SF, - INT_FTYPE_V2DF, - V16QI_FTYPE_V16QI, - V8SI_FTYPE_V8SF, - V8SI_FTYPE_V4SI, - V8HI_FTYPE_V8HI, - V8HI_FTYPE_V16QI, - V8QI_FTYPE_V8QI, - V8SF_FTYPE_V8SF, - V8SF_FTYPE_V8SI, - V8SF_FTYPE_V4SF, - V4SI_FTYPE_V4SI, - V4SI_FTYPE_V16QI, - V4SI_FTYPE_V8SI, - V4SI_FTYPE_V8HI, - V4SI_FTYPE_V4DF, - V4SI_FTYPE_V4SF, - V4SI_FTYPE_V2DF, - V4HI_FTYPE_V4HI, - V4DF_FTYPE_V4DF, - V4DF_FTYPE_V4SI, - V4DF_FTYPE_V4SF, - V4DF_FTYPE_V2DF, - V4SF_FTYPE_V4DF, - V4SF_FTYPE_V4SF, - V4SF_FTYPE_V4SF_VEC_MERGE, - V4SF_FTYPE_V8SF, - V4SF_FTYPE_V4SI, - V4SF_FTYPE_V2DF, - V2DI_FTYPE_V2DI, - V2DI_FTYPE_V16QI, - V2DI_FTYPE_V8HI, - V2DI_FTYPE_V4SI, - V2DF_FTYPE_V2DF, - V2DF_FTYPE_V2DF_VEC_MERGE, - V2DF_FTYPE_V4SI, - V2DF_FTYPE_V4DF, - V2DF_FTYPE_V4SF, - V2DF_FTYPE_V2SI, - V2SI_FTYPE_V2SI, - V2SI_FTYPE_V4SF, - V2SI_FTYPE_V2SF, - V2SI_FTYPE_V2DF, - V2SF_FTYPE_V2SF, - V2SF_FTYPE_V2SI, - V16QI_FTYPE_V16QI_V16QI, - V16QI_FTYPE_V8HI_V8HI, - V8QI_FTYPE_V8QI_V8QI, - V8QI_FTYPE_V4HI_V4HI, - V8HI_FTYPE_V8HI_V8HI, - V8HI_FTYPE_V8HI_V8HI_COUNT, - V8HI_FTYPE_V16QI_V16QI, - V8HI_FTYPE_V4SI_V4SI, - V8HI_FTYPE_V8HI_SI_COUNT, - V8SF_FTYPE_V8SF_V8SF, - V8SF_FTYPE_V8SF_V8SI, - V4SI_FTYPE_V4SI_V4SI, - V4SI_FTYPE_V4SI_V4SI_COUNT, - V4SI_FTYPE_V8HI_V8HI, - V4SI_FTYPE_V4SF_V4SF, - V4SI_FTYPE_V2DF_V2DF, - V4SI_FTYPE_V4SI_SI_COUNT, - V4HI_FTYPE_V4HI_V4HI, - V4HI_FTYPE_V4HI_V4HI_COUNT, - V4HI_FTYPE_V8QI_V8QI, - V4HI_FTYPE_V2SI_V2SI, - V4HI_FTYPE_V4HI_SI_COUNT, - V4DF_FTYPE_V4DF_V4DF, - V4DF_FTYPE_V4DF_V4DI, - V4SF_FTYPE_V4SF_V4SF, - V4SF_FTYPE_V4SF_V4SF_SWAP, - V4SF_FTYPE_V4SF_V4SI, - V4SF_FTYPE_V4SF_V2SI, - V4SF_FTYPE_V4SF_V2DF, - V4SF_FTYPE_V4SF_DI, - V4SF_FTYPE_V4SF_SI, - V2DI_FTYPE_V2DI_V2DI, - V2DI_FTYPE_V2DI_V2DI_COUNT, - V2DI_FTYPE_V16QI_V16QI, - V2DI_FTYPE_V4SI_V4SI, - V2DI_FTYPE_V2DI_V16QI, - V2DI_FTYPE_V2DF_V2DF, - V2DI_FTYPE_V2DI_SI_COUNT, - V2SI_FTYPE_V2SI_V2SI, - V2SI_FTYPE_V2SI_V2SI_COUNT, - V2SI_FTYPE_V4HI_V4HI, - V2SI_FTYPE_V2SF_V2SF, - V2SI_FTYPE_V2SI_SI_COUNT, - V2DF_FTYPE_V2DF_V2DF, - V2DF_FTYPE_V2DF_V2DF_SWAP, - V2DF_FTYPE_V2DF_V4SF, - V2DF_FTYPE_V2DF_V2DI, - V2DF_FTYPE_V2DF_DI, - V2DF_FTYPE_V2DF_SI, - V2SF_FTYPE_V2SF_V2SF, - V1DI_FTYPE_V1DI_V1DI, - V1DI_FTYPE_V1DI_V1DI_COUNT, - V1DI_FTYPE_V8QI_V8QI, - V1DI_FTYPE_V2SI_V2SI, - V1DI_FTYPE_V1DI_SI_COUNT, - UINT64_FTYPE_UINT64_UINT64, - UINT_FTYPE_UINT_UINT, - UINT_FTYPE_UINT_USHORT, - UINT_FTYPE_UINT_UCHAR, - V8HI_FTYPE_V8HI_INT, - V4SI_FTYPE_V4SI_INT, - V4HI_FTYPE_V4HI_INT, - V8SF_FTYPE_V8SF_INT, - V4SI_FTYPE_V8SI_INT, - V4SF_FTYPE_V8SF_INT, - V2DF_FTYPE_V4DF_INT, - V4DF_FTYPE_V4DF_INT, - V4SF_FTYPE_V4SF_INT, - V2DI_FTYPE_V2DI_INT, - V2DI2TI_FTYPE_V2DI_INT, - V2DF_FTYPE_V2DF_INT, - V16QI_FTYPE_V16QI_V16QI_V16QI, - V8SF_FTYPE_V8SF_V8SF_V8SF, - V4DF_FTYPE_V4DF_V4DF_V4DF, - V4SF_FTYPE_V4SF_V4SF_V4SF, - V2DF_FTYPE_V2DF_V2DF_V2DF, - V16QI_FTYPE_V16QI_V16QI_INT, - V8SI_FTYPE_V8SI_V8SI_INT, - V8SI_FTYPE_V8SI_V4SI_INT, - V8HI_FTYPE_V8HI_V8HI_INT, - V8SF_FTYPE_V8SF_V8SF_INT, - V8SF_FTYPE_V8SF_V4SF_INT, - V4SI_FTYPE_V4SI_V4SI_INT, - V4DF_FTYPE_V4DF_V4DF_INT, - V4DF_FTYPE_V4DF_V2DF_INT, - V4SF_FTYPE_V4SF_V4SF_INT, - V2DI_FTYPE_V2DI_V2DI_INT, - V2DI2TI_FTYPE_V2DI_V2DI_INT, - V1DI2DI_FTYPE_V1DI_V1DI_INT, - V2DF_FTYPE_V2DF_V2DF_INT, - V2DI_FTYPE_V2DI_UINT_UINT, - V2DI_FTYPE_V2DI_V2DI_UINT_UINT -}; - -/* Special builtins with variable number of arguments. */ -static const struct builtin_description bdesc_special_args[] = -{ - /* MMX */ - { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID }, - - /* 3DNow! */ - { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID }, - - /* SSE */ - { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF }, - { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF }, - { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT }, - - { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF }, - { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF }, - { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF }, - { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF }, - - /* SSE or 3DNow!A */ - { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID }, - { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PDI_DI }, - - /* SSE2 */ - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR }, - - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE }, - - /* SSE3 */ - { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR }, - - /* SSE4.1 */ - { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI }, - - /* SSE4A */ - { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF }, - { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF }, - - /* AVX */ - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, 0, IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID }, - { OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_64BIT, CODE_FOR_avx_vzeroupper_rex64, 0, IX86_BUILTIN_VZEROUPPER_REX64, UNKNOWN, (int) VOID_FTYPE_VOID }, - - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastss, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastsd256, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastss256, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_pd256, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_ps256, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF }, - - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR }, - - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF }, - - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_V2DF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_V4SF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_V4DF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_V8SF }, - - { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID }, - { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID }, - { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT }, - { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT }, - { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT }, - { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT }, - -}; - -/* Builtins with variable number of arguments. */ -static const struct builtin_description bdesc_args[] = -{ - /* MMX */ - { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI }, - { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, - { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI }, - { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI }, - { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, - { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI }, - - { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI }, - { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, - { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI }, - { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, - { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI }, - { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, - { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI }, - { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, - - { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, - { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, - - { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI }, - { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI }, - { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI }, - { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI }, - - { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI }, - { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, - { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI }, - { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI }, - { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, - { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI }, - - { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI }, - { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, - { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI }, - { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI }, - { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI}, - { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI}, - - { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI }, - { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI }, - { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI }, - - { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI }, - - { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT }, - { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT }, - { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT }, - { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT }, - { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT }, - { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT }, - - { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT }, - { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT }, - { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT }, - { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT }, - { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT }, - { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT }, - - { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT }, - { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT }, - { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT }, - { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT }, - - /* 3DNow! */ - { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF }, - { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI }, - { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF }, - { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF }, - - { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI }, - { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF }, - { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF }, - { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF }, - { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF }, - { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF }, - { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF }, - { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF }, - { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF }, - { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF }, - { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF }, - { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF }, - { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF }, - { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF }, - { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, - - /* 3DNow!A */ - { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF }, - { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI }, - { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI }, - { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF }, - { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF }, - { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF }, - - /* SSE */ - { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF }, - { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF }, - { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF }, - { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF }, - { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF }, - { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF }, - { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF }, - { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF }, - { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF }, - { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF }, - { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF }, - { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF }, - - { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT }, - - { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, - { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, - { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, - { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, - { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, - { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, - { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, - { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, - - { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF }, - { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF }, - { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF }, - { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP }, - { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP }, - { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF }, - { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF }, - { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF }, - { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF }, - { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP }, - { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP}, - { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF }, - { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF }, - { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF }, - { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF }, - { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF }, - { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF }, - { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF }, - { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF }, - { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP }, - { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP }, - { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF }, - - { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, - { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, - { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, - { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, - - { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, - { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, - { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, - { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, - - { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, - { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, - { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, - { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, - { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, - - { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI }, - { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI }, - { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI }, - - { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT }, - - { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE }, - { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE }, - { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE }, - - /* SSE MMX or 3Dnow!A */ - { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI }, - { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, - { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, - - { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI }, - { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, - { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI }, - { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, - - { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI }, - { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI }, - - { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT }, - - /* SSE2 */ - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT }, - - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI }, - - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF }, - - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI }, - - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF }, - { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF }, - { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF }, - - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF }, - - { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, - - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP}, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF }, - - { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, - - { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, - - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd_exp, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd_exp, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, - - { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF }, - - { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI }, - - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, - - { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI }, - - { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI }, - - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, - - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI }, - - { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, - - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI }, - - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI }, - - { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI }, - - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI }, - - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI }, - - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI }, - { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF }, - - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT }, - - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT }, - - { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT }, - - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT }, - - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE }, - - { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 }, - - { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI }, - - /* SSE2 MMX */ - { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI }, - - /* SSE3 */ - { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF}, - { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF }, - - { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, - { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, - { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, - { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, - { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, - { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, - - /* SSSE3 */ - { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI }, - { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI }, - { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI }, - { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI }, - { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI }, - { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI }, - - { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, - { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, - { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI }, - { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI }, - { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, - { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, - { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, - { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, - { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI }, - { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI }, - { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, - { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, - { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI }, - { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI }, - { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, - { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, - { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI }, - { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI }, - { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI }, - { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI }, - { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, - { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, - { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI }, - { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI }, - - /* SSSE3. */ - { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_V2DI_INT }, - { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI2DI_FTYPE_V1DI_V1DI_INT }, - - /* SSE4.1 */ - { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT }, - { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT }, - { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF }, - { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF }, - { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT }, - { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT }, - { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT }, - { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT }, - { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI }, - { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT }, - - { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI }, - { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI }, - { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI }, - { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI }, - { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI }, - { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI }, - { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI }, - { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI }, - { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI }, - { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI }, - { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI }, - { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI }, - { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI }, - - { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI }, - { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI }, - { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI }, - { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI }, - { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI }, - { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, - { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI }, - { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI }, - { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI }, - { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, - { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI }, - { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI }, - - /* SSE4.1 and SSE5 */ - { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT }, - { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT }, - { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT }, - { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT }, - - { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST }, - { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST }, - { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST }, - - /* SSE4.2 */ - { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI }, - { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR }, - { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT }, - { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT }, - { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 }, - - /* SSE4A */ - { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT }, - { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI }, - { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT }, - { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI }, - - /* AES */ - { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI }, - - { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI }, - { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI }, - - /* PCLMUL */ - { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT }, - - /* AVX */ - { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF }, - - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI }, - - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpsdv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpssv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT }, - - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF }, - - { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF }, - - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF }, - - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT }, - - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF }, - - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si_si256, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps_ps256, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd_pd256, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF }, - - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST }, - - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF }, -}; - -/* SSE5 */ -enum multi_arg_type { - MULTI_ARG_UNKNOWN, - MULTI_ARG_3_SF, - MULTI_ARG_3_DF, - MULTI_ARG_3_DI, - MULTI_ARG_3_SI, - MULTI_ARG_3_SI_DI, - MULTI_ARG_3_HI, - MULTI_ARG_3_HI_SI, - MULTI_ARG_3_QI, - MULTI_ARG_3_PERMPS, - MULTI_ARG_3_PERMPD, - MULTI_ARG_2_SF, - MULTI_ARG_2_DF, - MULTI_ARG_2_DI, - MULTI_ARG_2_SI, - MULTI_ARG_2_HI, - MULTI_ARG_2_QI, - MULTI_ARG_2_DI_IMM, - MULTI_ARG_2_SI_IMM, - MULTI_ARG_2_HI_IMM, - MULTI_ARG_2_QI_IMM, - MULTI_ARG_2_SF_CMP, - MULTI_ARG_2_DF_CMP, - MULTI_ARG_2_DI_CMP, - MULTI_ARG_2_SI_CMP, - MULTI_ARG_2_HI_CMP, - MULTI_ARG_2_QI_CMP, - MULTI_ARG_2_DI_TF, - MULTI_ARG_2_SI_TF, - MULTI_ARG_2_HI_TF, - MULTI_ARG_2_QI_TF, - MULTI_ARG_2_SF_TF, - MULTI_ARG_2_DF_TF, - MULTI_ARG_1_SF, - MULTI_ARG_1_DF, - MULTI_ARG_1_DI, - MULTI_ARG_1_SI, - MULTI_ARG_1_HI, - MULTI_ARG_1_QI, - MULTI_ARG_1_SI_DI, - MULTI_ARG_1_HI_DI, - MULTI_ARG_1_HI_SI, - MULTI_ARG_1_QI_DI, - MULTI_ARG_1_QI_SI, - MULTI_ARG_1_QI_HI, - MULTI_ARG_1_PH2PS, - MULTI_ARG_1_PS2PH -}; - -static const struct builtin_description bdesc_multi_arg[] = -{ - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv4sf4, "__builtin_ia32_fmaddss", IX86_BUILTIN_FMADDSS, 0, (int)MULTI_ARG_3_SF }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv2df4, "__builtin_ia32_fmaddsd", IX86_BUILTIN_FMADDSD, 0, (int)MULTI_ARG_3_DF }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv4sf4, "__builtin_ia32_fmaddps", IX86_BUILTIN_FMADDPS, 0, (int)MULTI_ARG_3_SF }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv2df4, "__builtin_ia32_fmaddpd", IX86_BUILTIN_FMADDPD, 0, (int)MULTI_ARG_3_DF }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv4sf4, "__builtin_ia32_fmsubss", IX86_BUILTIN_FMSUBSS, 0, (int)MULTI_ARG_3_SF }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv2df4, "__builtin_ia32_fmsubsd", IX86_BUILTIN_FMSUBSD, 0, (int)MULTI_ARG_3_DF }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv4sf4, "__builtin_ia32_fmsubps", IX86_BUILTIN_FMSUBPS, 0, (int)MULTI_ARG_3_SF }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv2df4, "__builtin_ia32_fmsubpd", IX86_BUILTIN_FMSUBPD, 0, (int)MULTI_ARG_3_DF }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv4sf4, "__builtin_ia32_fnmaddss", IX86_BUILTIN_FNMADDSS, 0, (int)MULTI_ARG_3_SF }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv2df4, "__builtin_ia32_fnmaddsd", IX86_BUILTIN_FNMADDSD, 0, (int)MULTI_ARG_3_DF }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv4sf4, "__builtin_ia32_fnmaddps", IX86_BUILTIN_FNMADDPS, 0, (int)MULTI_ARG_3_SF }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv2df4, "__builtin_ia32_fnmaddpd", IX86_BUILTIN_FNMADDPD, 0, (int)MULTI_ARG_3_DF }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv4sf4, "__builtin_ia32_fnmsubss", IX86_BUILTIN_FNMSUBSS, 0, (int)MULTI_ARG_3_SF }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv2df4, "__builtin_ia32_fnmsubsd", IX86_BUILTIN_FNMSUBSD, 0, (int)MULTI_ARG_3_DF }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv4sf4, "__builtin_ia32_fnmsubps", IX86_BUILTIN_FNMSUBPS, 0, (int)MULTI_ARG_3_SF }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv2df4, "__builtin_ia32_fnmsubpd", IX86_BUILTIN_FNMSUBPD, 0, (int)MULTI_ARG_3_DF }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov", IX86_BUILTIN_PCMOV, 0, (int)MULTI_ARG_3_DI }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov_v2di", IX86_BUILTIN_PCMOV_V2DI, 0, (int)MULTI_ARG_3_DI }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4si, "__builtin_ia32_pcmov_v4si", IX86_BUILTIN_PCMOV_V4SI, 0, (int)MULTI_ARG_3_SI }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v8hi, "__builtin_ia32_pcmov_v8hi", IX86_BUILTIN_PCMOV_V8HI, 0, (int)MULTI_ARG_3_HI }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v16qi, "__builtin_ia32_pcmov_v16qi",IX86_BUILTIN_PCMOV_V16QI,0, (int)MULTI_ARG_3_QI }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2df, "__builtin_ia32_pcmov_v2df", IX86_BUILTIN_PCMOV_V2DF, 0, (int)MULTI_ARG_3_DF }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4sf, "__builtin_ia32_pcmov_v4sf", IX86_BUILTIN_PCMOV_V4SF, 0, (int)MULTI_ARG_3_SF }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pperm, "__builtin_ia32_pperm", IX86_BUILTIN_PPERM, 0, (int)MULTI_ARG_3_QI }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv4sf, "__builtin_ia32_permps", IX86_BUILTIN_PERMPS, 0, (int)MULTI_ARG_3_PERMPS }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv2df, "__builtin_ia32_permpd", IX86_BUILTIN_PERMPD, 0, (int)MULTI_ARG_3_PERMPD }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssww, "__builtin_ia32_pmacssww", IX86_BUILTIN_PMACSSWW, 0, (int)MULTI_ARG_3_HI }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsww, "__builtin_ia32_pmacsww", IX86_BUILTIN_PMACSWW, 0, (int)MULTI_ARG_3_HI }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsswd, "__builtin_ia32_pmacsswd", IX86_BUILTIN_PMACSSWD, 0, (int)MULTI_ARG_3_HI_SI }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacswd, "__builtin_ia32_pmacswd", IX86_BUILTIN_PMACSWD, 0, (int)MULTI_ARG_3_HI_SI }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdd, "__builtin_ia32_pmacssdd", IX86_BUILTIN_PMACSSDD, 0, (int)MULTI_ARG_3_SI }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdd, "__builtin_ia32_pmacsdd", IX86_BUILTIN_PMACSDD, 0, (int)MULTI_ARG_3_SI }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdql, "__builtin_ia32_pmacssdql", IX86_BUILTIN_PMACSSDQL, 0, (int)MULTI_ARG_3_SI_DI }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdqh, "__builtin_ia32_pmacssdqh", IX86_BUILTIN_PMACSSDQH, 0, (int)MULTI_ARG_3_SI_DI }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdql, "__builtin_ia32_pmacsdql", IX86_BUILTIN_PMACSDQL, 0, (int)MULTI_ARG_3_SI_DI }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdqh, "__builtin_ia32_pmacsdqh", IX86_BUILTIN_PMACSDQH, 0, (int)MULTI_ARG_3_SI_DI }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcsswd, "__builtin_ia32_pmadcsswd", IX86_BUILTIN_PMADCSSWD, 0, (int)MULTI_ARG_3_HI_SI }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcswd, "__builtin_ia32_pmadcswd", IX86_BUILTIN_PMADCSWD, 0, (int)MULTI_ARG_3_HI_SI }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv2di3, "__builtin_ia32_protq", IX86_BUILTIN_PROTQ, 0, (int)MULTI_ARG_2_DI }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv4si3, "__builtin_ia32_protd", IX86_BUILTIN_PROTD, 0, (int)MULTI_ARG_2_SI }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv8hi3, "__builtin_ia32_protw", IX86_BUILTIN_PROTW, 0, (int)MULTI_ARG_2_HI }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv16qi3, "__builtin_ia32_protb", IX86_BUILTIN_PROTB, 0, (int)MULTI_ARG_2_QI }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv2di3, "__builtin_ia32_protqi", IX86_BUILTIN_PROTQ_IMM, 0, (int)MULTI_ARG_2_DI_IMM }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv4si3, "__builtin_ia32_protdi", IX86_BUILTIN_PROTD_IMM, 0, (int)MULTI_ARG_2_SI_IMM }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv8hi3, "__builtin_ia32_protwi", IX86_BUILTIN_PROTW_IMM, 0, (int)MULTI_ARG_2_HI_IMM }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv16qi3, "__builtin_ia32_protbi", IX86_BUILTIN_PROTB_IMM, 0, (int)MULTI_ARG_2_QI_IMM }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv2di3, "__builtin_ia32_pshaq", IX86_BUILTIN_PSHAQ, 0, (int)MULTI_ARG_2_DI }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv4si3, "__builtin_ia32_pshad", IX86_BUILTIN_PSHAD, 0, (int)MULTI_ARG_2_SI }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv8hi3, "__builtin_ia32_pshaw", IX86_BUILTIN_PSHAW, 0, (int)MULTI_ARG_2_HI }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv16qi3, "__builtin_ia32_pshab", IX86_BUILTIN_PSHAB, 0, (int)MULTI_ARG_2_QI }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv2di3, "__builtin_ia32_pshlq", IX86_BUILTIN_PSHLQ, 0, (int)MULTI_ARG_2_DI }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv4si3, "__builtin_ia32_pshld", IX86_BUILTIN_PSHLD, 0, (int)MULTI_ARG_2_SI }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv8hi3, "__builtin_ia32_pshlw", IX86_BUILTIN_PSHLW, 0, (int)MULTI_ARG_2_HI }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv16qi3, "__builtin_ia32_pshlb", IX86_BUILTIN_PSHLB, 0, (int)MULTI_ARG_2_QI }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv4sf2, "__builtin_ia32_frczss", IX86_BUILTIN_FRCZSS, 0, (int)MULTI_ARG_2_SF }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv2df2, "__builtin_ia32_frczsd", IX86_BUILTIN_FRCZSD, 0, (int)MULTI_ARG_2_DF }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv4sf2, "__builtin_ia32_frczps", IX86_BUILTIN_FRCZPS, 0, (int)MULTI_ARG_1_SF }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv2df2, "__builtin_ia32_frczpd", IX86_BUILTIN_FRCZPD, 0, (int)MULTI_ARG_1_DF }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtph2ps, "__builtin_ia32_cvtph2ps", IX86_BUILTIN_CVTPH2PS, 0, (int)MULTI_ARG_1_PH2PS }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtps2ph, "__builtin_ia32_cvtps2ph", IX86_BUILTIN_CVTPS2PH, 0, (int)MULTI_ARG_1_PS2PH }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbw, "__builtin_ia32_phaddbw", IX86_BUILTIN_PHADDBW, 0, (int)MULTI_ARG_1_QI_HI }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbd, "__builtin_ia32_phaddbd", IX86_BUILTIN_PHADDBD, 0, (int)MULTI_ARG_1_QI_SI }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbq, "__builtin_ia32_phaddbq", IX86_BUILTIN_PHADDBQ, 0, (int)MULTI_ARG_1_QI_DI }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwd, "__builtin_ia32_phaddwd", IX86_BUILTIN_PHADDWD, 0, (int)MULTI_ARG_1_HI_SI }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwq, "__builtin_ia32_phaddwq", IX86_BUILTIN_PHADDWQ, 0, (int)MULTI_ARG_1_HI_DI }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadddq, "__builtin_ia32_phadddq", IX86_BUILTIN_PHADDDQ, 0, (int)MULTI_ARG_1_SI_DI }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubw, "__builtin_ia32_phaddubw", IX86_BUILTIN_PHADDUBW, 0, (int)MULTI_ARG_1_QI_HI }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubd, "__builtin_ia32_phaddubd", IX86_BUILTIN_PHADDUBD, 0, (int)MULTI_ARG_1_QI_SI }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubq, "__builtin_ia32_phaddubq", IX86_BUILTIN_PHADDUBQ, 0, (int)MULTI_ARG_1_QI_DI }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwd, "__builtin_ia32_phadduwd", IX86_BUILTIN_PHADDUWD, 0, (int)MULTI_ARG_1_HI_SI }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwq, "__builtin_ia32_phadduwq", IX86_BUILTIN_PHADDUWQ, 0, (int)MULTI_ARG_1_HI_DI }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddudq, "__builtin_ia32_phaddudq", IX86_BUILTIN_PHADDUDQ, 0, (int)MULTI_ARG_1_SI_DI }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubbw, "__builtin_ia32_phsubbw", IX86_BUILTIN_PHSUBBW, 0, (int)MULTI_ARG_1_QI_HI }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubwd, "__builtin_ia32_phsubwd", IX86_BUILTIN_PHSUBWD, 0, (int)MULTI_ARG_1_HI_SI }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubdq, "__builtin_ia32_phsubdq", IX86_BUILTIN_PHSUBDQ, 0, (int)MULTI_ARG_1_SI_DI }, - - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comeqss", IX86_BUILTIN_COMEQSS, EQ, (int)MULTI_ARG_2_SF_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comness", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comneqss", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comltss", IX86_BUILTIN_COMLTSS, LT, (int)MULTI_ARG_2_SF_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comless", IX86_BUILTIN_COMLESS, LE, (int)MULTI_ARG_2_SF_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgtss", IX86_BUILTIN_COMGTSS, GT, (int)MULTI_ARG_2_SF_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgess", IX86_BUILTIN_COMGESS, GE, (int)MULTI_ARG_2_SF_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comueqss", IX86_BUILTIN_COMUEQSS, UNEQ, (int)MULTI_ARG_2_SF_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuness", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuneqss", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunltss", IX86_BUILTIN_COMULTSS, UNLT, (int)MULTI_ARG_2_SF_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunless", IX86_BUILTIN_COMULESS, UNLE, (int)MULTI_ARG_2_SF_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungtss", IX86_BUILTIN_COMUGTSS, UNGT, (int)MULTI_ARG_2_SF_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungess", IX86_BUILTIN_COMUGESS, UNGE, (int)MULTI_ARG_2_SF_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comordss", IX86_BUILTIN_COMORDSS, ORDERED, (int)MULTI_ARG_2_SF_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunordss", IX86_BUILTIN_COMUNORDSS, UNORDERED, (int)MULTI_ARG_2_SF_CMP }, - - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comeqsd", IX86_BUILTIN_COMEQSD, EQ, (int)MULTI_ARG_2_DF_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comnesd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comneqsd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comltsd", IX86_BUILTIN_COMLTSD, LT, (int)MULTI_ARG_2_DF_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comlesd", IX86_BUILTIN_COMLESD, LE, (int)MULTI_ARG_2_DF_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgtsd", IX86_BUILTIN_COMGTSD, GT, (int)MULTI_ARG_2_DF_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgesd", IX86_BUILTIN_COMGESD, GE, (int)MULTI_ARG_2_DF_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comueqsd", IX86_BUILTIN_COMUEQSD, UNEQ, (int)MULTI_ARG_2_DF_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunesd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comuneqsd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunltsd", IX86_BUILTIN_COMULTSD, UNLT, (int)MULTI_ARG_2_DF_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunlesd", IX86_BUILTIN_COMULESD, UNLE, (int)MULTI_ARG_2_DF_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungtsd", IX86_BUILTIN_COMUGTSD, UNGT, (int)MULTI_ARG_2_DF_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungesd", IX86_BUILTIN_COMUGESD, UNGE, (int)MULTI_ARG_2_DF_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comordsd", IX86_BUILTIN_COMORDSD, ORDERED, (int)MULTI_ARG_2_DF_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunordsd", IX86_BUILTIN_COMUNORDSD, UNORDERED, (int)MULTI_ARG_2_DF_CMP }, - - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comeqps", IX86_BUILTIN_COMEQPS, EQ, (int)MULTI_ARG_2_SF_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneqps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comltps", IX86_BUILTIN_COMLTPS, LT, (int)MULTI_ARG_2_SF_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comleps", IX86_BUILTIN_COMLEPS, LE, (int)MULTI_ARG_2_SF_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgtps", IX86_BUILTIN_COMGTPS, GT, (int)MULTI_ARG_2_SF_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgeps", IX86_BUILTIN_COMGEPS, GE, (int)MULTI_ARG_2_SF_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comueqps", IX86_BUILTIN_COMUEQPS, UNEQ, (int)MULTI_ARG_2_SF_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneqps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunltps", IX86_BUILTIN_COMULTPS, UNLT, (int)MULTI_ARG_2_SF_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunleps", IX86_BUILTIN_COMULEPS, UNLE, (int)MULTI_ARG_2_SF_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungtps", IX86_BUILTIN_COMUGTPS, UNGT, (int)MULTI_ARG_2_SF_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungeps", IX86_BUILTIN_COMUGEPS, UNGE, (int)MULTI_ARG_2_SF_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comordps", IX86_BUILTIN_COMORDPS, ORDERED, (int)MULTI_ARG_2_SF_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunordps", IX86_BUILTIN_COMUNORDPS, UNORDERED, (int)MULTI_ARG_2_SF_CMP }, - - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comeqpd", IX86_BUILTIN_COMEQPD, EQ, (int)MULTI_ARG_2_DF_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comnepd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comneqpd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comltpd", IX86_BUILTIN_COMLTPD, LT, (int)MULTI_ARG_2_DF_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comlepd", IX86_BUILTIN_COMLEPD, LE, (int)MULTI_ARG_2_DF_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgtpd", IX86_BUILTIN_COMGTPD, GT, (int)MULTI_ARG_2_DF_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgepd", IX86_BUILTIN_COMGEPD, GE, (int)MULTI_ARG_2_DF_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comueqpd", IX86_BUILTIN_COMUEQPD, UNEQ, (int)MULTI_ARG_2_DF_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunepd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comuneqpd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunltpd", IX86_BUILTIN_COMULTPD, UNLT, (int)MULTI_ARG_2_DF_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunlepd", IX86_BUILTIN_COMULEPD, UNLE, (int)MULTI_ARG_2_DF_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungtpd", IX86_BUILTIN_COMUGTPD, UNGT, (int)MULTI_ARG_2_DF_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungepd", IX86_BUILTIN_COMUGEPD, UNGE, (int)MULTI_ARG_2_DF_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comordpd", IX86_BUILTIN_COMORDPD, ORDERED, (int)MULTI_ARG_2_DF_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunordpd", IX86_BUILTIN_COMUNORDPD, UNORDERED, (int)MULTI_ARG_2_DF_CMP }, - - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomeqb", IX86_BUILTIN_PCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneqb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomltb", IX86_BUILTIN_PCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomleb", IX86_BUILTIN_PCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgtb", IX86_BUILTIN_PCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgeb", IX86_BUILTIN_PCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP }, - - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomeqw", IX86_BUILTIN_PCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomnew", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomneqw", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomltw", IX86_BUILTIN_PCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomlew", IX86_BUILTIN_PCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgtw", IX86_BUILTIN_PCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgew", IX86_BUILTIN_PCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP }, - - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomeqd", IX86_BUILTIN_PCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomned", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomneqd", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomltd", IX86_BUILTIN_PCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomled", IX86_BUILTIN_PCOMLED, LE, (int)MULTI_ARG_2_SI_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomgtd", IX86_BUILTIN_PCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomged", IX86_BUILTIN_PCOMGED, GE, (int)MULTI_ARG_2_SI_CMP }, - - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomeqq", IX86_BUILTIN_PCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneqq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomltq", IX86_BUILTIN_PCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomleq", IX86_BUILTIN_PCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgtq", IX86_BUILTIN_PCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgeq", IX86_BUILTIN_PCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP }, - - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomequb", IX86_BUILTIN_PCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomneub", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomnequb", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomltub", IX86_BUILTIN_PCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomleub", IX86_BUILTIN_PCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgtub", IX86_BUILTIN_PCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgeub", IX86_BUILTIN_PCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP }, - - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomequw", IX86_BUILTIN_PCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomneuw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomnequw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomltuw", IX86_BUILTIN_PCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomleuw", IX86_BUILTIN_PCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgtuw", IX86_BUILTIN_PCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgeuw", IX86_BUILTIN_PCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP }, - - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomequd", IX86_BUILTIN_PCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomneud", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomnequd", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomltud", IX86_BUILTIN_PCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomleud", IX86_BUILTIN_PCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgtud", IX86_BUILTIN_PCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgeud", IX86_BUILTIN_PCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP }, - - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomequq", IX86_BUILTIN_PCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomneuq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomnequq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomltuq", IX86_BUILTIN_PCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomleuq", IX86_BUILTIN_PCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgtuq", IX86_BUILTIN_PCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgeuq", IX86_BUILTIN_PCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP }, - - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalsess", IX86_BUILTIN_COMFALSESS, COM_FALSE_S, (int)MULTI_ARG_2_SF_TF }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtruess", IX86_BUILTIN_COMTRUESS, COM_TRUE_S, (int)MULTI_ARG_2_SF_TF }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalseps", IX86_BUILTIN_COMFALSEPS, COM_FALSE_P, (int)MULTI_ARG_2_SF_TF }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtrueps", IX86_BUILTIN_COMTRUEPS, COM_TRUE_P, (int)MULTI_ARG_2_SF_TF }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsesd", IX86_BUILTIN_COMFALSESD, COM_FALSE_S, (int)MULTI_ARG_2_DF_TF }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruesd", IX86_BUILTIN_COMTRUESD, COM_TRUE_S, (int)MULTI_ARG_2_DF_TF }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsepd", IX86_BUILTIN_COMFALSEPD, COM_FALSE_P, (int)MULTI_ARG_2_DF_TF }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruepd", IX86_BUILTIN_COMTRUEPD, COM_TRUE_P, (int)MULTI_ARG_2_DF_TF }, - - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseb", IX86_BUILTIN_PCOMFALSEB, PCOM_FALSE, (int)MULTI_ARG_2_QI_TF }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalsew", IX86_BUILTIN_PCOMFALSEW, PCOM_FALSE, (int)MULTI_ARG_2_HI_TF }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalsed", IX86_BUILTIN_PCOMFALSED, PCOM_FALSE, (int)MULTI_ARG_2_SI_TF }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseq", IX86_BUILTIN_PCOMFALSEQ, PCOM_FALSE, (int)MULTI_ARG_2_DI_TF }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseub",IX86_BUILTIN_PCOMFALSEUB,PCOM_FALSE, (int)MULTI_ARG_2_QI_TF }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalseuw",IX86_BUILTIN_PCOMFALSEUW,PCOM_FALSE, (int)MULTI_ARG_2_HI_TF }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalseud",IX86_BUILTIN_PCOMFALSEUD,PCOM_FALSE, (int)MULTI_ARG_2_SI_TF }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseuq",IX86_BUILTIN_PCOMFALSEUQ,PCOM_FALSE, (int)MULTI_ARG_2_DI_TF }, - - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueb", IX86_BUILTIN_PCOMTRUEB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtruew", IX86_BUILTIN_PCOMTRUEW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrued", IX86_BUILTIN_PCOMTRUED, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueq", IX86_BUILTIN_PCOMTRUEQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueub", IX86_BUILTIN_PCOMTRUEUB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtrueuw", IX86_BUILTIN_PCOMTRUEUW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrueud", IX86_BUILTIN_PCOMTRUEUD, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF }, - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueuq", IX86_BUILTIN_PCOMTRUEUQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF }, -}; - -/* Set up all the MMX/SSE builtins, even builtins for instructions that are not - in the current target ISA to allow the user to compile particular modules - with different target specific options that differ from the command line - options. */ -static void -ix86_init_mmx_sse_builtins (void) -{ - const struct builtin_description * d; - size_t i; - - tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode); - tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode); - tree V1DI_type_node - = build_vector_type_for_mode (long_long_integer_type_node, V1DImode); - tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode); - tree V2DI_type_node - = build_vector_type_for_mode (long_long_integer_type_node, V2DImode); - tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode); - tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode); - tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode); - tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode); - tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode); - tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode); - - tree pchar_type_node = build_pointer_type (char_type_node); - tree pcchar_type_node - = build_pointer_type (build_type_variant (char_type_node, 1, 0)); - tree pfloat_type_node = build_pointer_type (float_type_node); - tree pcfloat_type_node - = build_pointer_type (build_type_variant (float_type_node, 1, 0)); - tree pv2sf_type_node = build_pointer_type (V2SF_type_node); - tree pcv2sf_type_node - = build_pointer_type (build_type_variant (V2SF_type_node, 1, 0)); - tree pv2di_type_node = build_pointer_type (V2DI_type_node); - tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node); - - /* Comparisons. */ - tree int_ftype_v4sf_v4sf - = build_function_type_list (integer_type_node, - V4SF_type_node, V4SF_type_node, NULL_TREE); - tree v4si_ftype_v4sf_v4sf - = build_function_type_list (V4SI_type_node, - V4SF_type_node, V4SF_type_node, NULL_TREE); - /* MMX/SSE/integer conversions. */ - tree int_ftype_v4sf - = build_function_type_list (integer_type_node, - V4SF_type_node, NULL_TREE); - tree int64_ftype_v4sf - = build_function_type_list (long_long_integer_type_node, - V4SF_type_node, NULL_TREE); - tree int_ftype_v8qi - = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE); - tree v4sf_ftype_v4sf_int - = build_function_type_list (V4SF_type_node, - V4SF_type_node, integer_type_node, NULL_TREE); - tree v4sf_ftype_v4sf_int64 - = build_function_type_list (V4SF_type_node, - V4SF_type_node, long_long_integer_type_node, - NULL_TREE); - tree v4sf_ftype_v4sf_v2si - = build_function_type_list (V4SF_type_node, - V4SF_type_node, V2SI_type_node, NULL_TREE); - - /* Miscellaneous. */ - tree v8qi_ftype_v4hi_v4hi - = build_function_type_list (V8QI_type_node, - V4HI_type_node, V4HI_type_node, NULL_TREE); - tree v4hi_ftype_v2si_v2si - = build_function_type_list (V4HI_type_node, - V2SI_type_node, V2SI_type_node, NULL_TREE); - tree v4sf_ftype_v4sf_v4sf_int - = build_function_type_list (V4SF_type_node, - V4SF_type_node, V4SF_type_node, - integer_type_node, NULL_TREE); - tree v2si_ftype_v4hi_v4hi - = build_function_type_list (V2SI_type_node, - V4HI_type_node, V4HI_type_node, NULL_TREE); - tree v4hi_ftype_v4hi_int - = build_function_type_list (V4HI_type_node, - V4HI_type_node, integer_type_node, NULL_TREE); - tree v2si_ftype_v2si_int - = build_function_type_list (V2SI_type_node, - V2SI_type_node, integer_type_node, NULL_TREE); - tree v1di_ftype_v1di_int - = build_function_type_list (V1DI_type_node, - V1DI_type_node, integer_type_node, NULL_TREE); - - tree void_ftype_void - = build_function_type (void_type_node, void_list_node); - tree void_ftype_unsigned - = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE); - tree void_ftype_unsigned_unsigned - = build_function_type_list (void_type_node, unsigned_type_node, - unsigned_type_node, NULL_TREE); - tree void_ftype_pcvoid_unsigned_unsigned - = build_function_type_list (void_type_node, const_ptr_type_node, - unsigned_type_node, unsigned_type_node, - NULL_TREE); - tree unsigned_ftype_void - = build_function_type (unsigned_type_node, void_list_node); - - tree v2si_ftype_v4sf - = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE); - /* Loads/stores. */ - tree void_ftype_v8qi_v8qi_pchar - = build_function_type_list (void_type_node, - V8QI_type_node, V8QI_type_node, - pchar_type_node, NULL_TREE); - tree v4sf_ftype_pcfloat - = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE); - tree v4sf_ftype_v4sf_pcv2sf - = build_function_type_list (V4SF_type_node, - V4SF_type_node, pcv2sf_type_node, NULL_TREE); - tree void_ftype_pv2sf_v4sf - = build_function_type_list (void_type_node, - pv2sf_type_node, V4SF_type_node, NULL_TREE); - tree void_ftype_pfloat_v4sf - = build_function_type_list (void_type_node, - pfloat_type_node, V4SF_type_node, NULL_TREE); - tree void_ftype_pdi_di - = build_function_type_list (void_type_node, - pdi_type_node, long_long_unsigned_type_node, - NULL_TREE); - tree void_ftype_pv2di_v2di - = build_function_type_list (void_type_node, - pv2di_type_node, V2DI_type_node, NULL_TREE); - /* Normal vector unops. */ - tree v4sf_ftype_v4sf - = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE); - tree v16qi_ftype_v16qi - = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE); - tree v8hi_ftype_v8hi - = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE); - tree v4si_ftype_v4si - = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE); - tree v8qi_ftype_v8qi - = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE); - tree v4hi_ftype_v4hi - = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE); - - /* Normal vector binops. */ - tree v4sf_ftype_v4sf_v4sf - = build_function_type_list (V4SF_type_node, - V4SF_type_node, V4SF_type_node, NULL_TREE); - tree v8qi_ftype_v8qi_v8qi - = build_function_type_list (V8QI_type_node, - V8QI_type_node, V8QI_type_node, NULL_TREE); - tree v4hi_ftype_v4hi_v4hi - = build_function_type_list (V4HI_type_node, - V4HI_type_node, V4HI_type_node, NULL_TREE); - tree v2si_ftype_v2si_v2si - = build_function_type_list (V2SI_type_node, - V2SI_type_node, V2SI_type_node, NULL_TREE); - tree v1di_ftype_v1di_v1di - = build_function_type_list (V1DI_type_node, - V1DI_type_node, V1DI_type_node, NULL_TREE); - tree v1di_ftype_v1di_v1di_int - = build_function_type_list (V1DI_type_node, - V1DI_type_node, V1DI_type_node, - integer_type_node, NULL_TREE); - tree v2si_ftype_v2sf - = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE); - tree v2sf_ftype_v2si - = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE); - tree v2si_ftype_v2si - = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE); - tree v2sf_ftype_v2sf - = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE); - tree v2sf_ftype_v2sf_v2sf - = build_function_type_list (V2SF_type_node, - V2SF_type_node, V2SF_type_node, NULL_TREE); - tree v2si_ftype_v2sf_v2sf - = build_function_type_list (V2SI_type_node, - V2SF_type_node, V2SF_type_node, NULL_TREE); - tree pint_type_node = build_pointer_type (integer_type_node); - tree pdouble_type_node = build_pointer_type (double_type_node); - tree pcdouble_type_node = build_pointer_type ( - build_type_variant (double_type_node, 1, 0)); - tree int_ftype_v2df_v2df - = build_function_type_list (integer_type_node, - V2DF_type_node, V2DF_type_node, NULL_TREE); - - tree void_ftype_pcvoid - = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE); - tree v4sf_ftype_v4si - = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE); - tree v4si_ftype_v4sf - = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE); - tree v2df_ftype_v4si - = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE); - tree v4si_ftype_v2df - = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE); - tree v4si_ftype_v2df_v2df - = build_function_type_list (V4SI_type_node, - V2DF_type_node, V2DF_type_node, NULL_TREE); - tree v2si_ftype_v2df - = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE); - tree v4sf_ftype_v2df - = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE); - tree v2df_ftype_v2si - = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE); - tree v2df_ftype_v4sf - = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE); - tree int_ftype_v2df - = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE); - tree int64_ftype_v2df - = build_function_type_list (long_long_integer_type_node, - V2DF_type_node, NULL_TREE); - tree v2df_ftype_v2df_int - = build_function_type_list (V2DF_type_node, - V2DF_type_node, integer_type_node, NULL_TREE); - tree v2df_ftype_v2df_int64 - = build_function_type_list (V2DF_type_node, - V2DF_type_node, long_long_integer_type_node, - NULL_TREE); - tree v4sf_ftype_v4sf_v2df - = build_function_type_list (V4SF_type_node, - V4SF_type_node, V2DF_type_node, NULL_TREE); - tree v2df_ftype_v2df_v4sf - = build_function_type_list (V2DF_type_node, - V2DF_type_node, V4SF_type_node, NULL_TREE); - tree v2df_ftype_v2df_v2df_int - = build_function_type_list (V2DF_type_node, - V2DF_type_node, V2DF_type_node, - integer_type_node, - NULL_TREE); - tree v2df_ftype_v2df_pcdouble - = build_function_type_list (V2DF_type_node, - V2DF_type_node, pcdouble_type_node, NULL_TREE); - tree void_ftype_pdouble_v2df - = build_function_type_list (void_type_node, - pdouble_type_node, V2DF_type_node, NULL_TREE); - tree void_ftype_pint_int - = build_function_type_list (void_type_node, - pint_type_node, integer_type_node, NULL_TREE); - tree void_ftype_v16qi_v16qi_pchar - = build_function_type_list (void_type_node, - V16QI_type_node, V16QI_type_node, - pchar_type_node, NULL_TREE); - tree v2df_ftype_pcdouble - = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE); - tree v2df_ftype_v2df_v2df - = build_function_type_list (V2DF_type_node, - V2DF_type_node, V2DF_type_node, NULL_TREE); - tree v16qi_ftype_v16qi_v16qi - = build_function_type_list (V16QI_type_node, - V16QI_type_node, V16QI_type_node, NULL_TREE); - tree v8hi_ftype_v8hi_v8hi - = build_function_type_list (V8HI_type_node, - V8HI_type_node, V8HI_type_node, NULL_TREE); - tree v4si_ftype_v4si_v4si - = build_function_type_list (V4SI_type_node, - V4SI_type_node, V4SI_type_node, NULL_TREE); - tree v2di_ftype_v2di_v2di - = build_function_type_list (V2DI_type_node, - V2DI_type_node, V2DI_type_node, NULL_TREE); - tree v2di_ftype_v2df_v2df - = build_function_type_list (V2DI_type_node, - V2DF_type_node, V2DF_type_node, NULL_TREE); - tree v2df_ftype_v2df - = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE); - tree v2di_ftype_v2di_int - = build_function_type_list (V2DI_type_node, - V2DI_type_node, integer_type_node, NULL_TREE); - tree v2di_ftype_v2di_v2di_int - = build_function_type_list (V2DI_type_node, V2DI_type_node, - V2DI_type_node, integer_type_node, NULL_TREE); - tree v4si_ftype_v4si_int - = build_function_type_list (V4SI_type_node, - V4SI_type_node, integer_type_node, NULL_TREE); - tree v8hi_ftype_v8hi_int - = build_function_type_list (V8HI_type_node, - V8HI_type_node, integer_type_node, NULL_TREE); - tree v4si_ftype_v8hi_v8hi - = build_function_type_list (V4SI_type_node, - V8HI_type_node, V8HI_type_node, NULL_TREE); - tree v1di_ftype_v8qi_v8qi - = build_function_type_list (V1DI_type_node, - V8QI_type_node, V8QI_type_node, NULL_TREE); - tree v1di_ftype_v2si_v2si - = build_function_type_list (V1DI_type_node, - V2SI_type_node, V2SI_type_node, NULL_TREE); - tree v2di_ftype_v16qi_v16qi - = build_function_type_list (V2DI_type_node, - V16QI_type_node, V16QI_type_node, NULL_TREE); - tree v2di_ftype_v4si_v4si - = build_function_type_list (V2DI_type_node, - V4SI_type_node, V4SI_type_node, NULL_TREE); - tree int_ftype_v16qi - = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE); - tree v16qi_ftype_pcchar - = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE); - tree void_ftype_pchar_v16qi - = build_function_type_list (void_type_node, - pchar_type_node, V16QI_type_node, NULL_TREE); - - tree v2di_ftype_v2di_unsigned_unsigned - = build_function_type_list (V2DI_type_node, V2DI_type_node, - unsigned_type_node, unsigned_type_node, - NULL_TREE); - tree v2di_ftype_v2di_v2di_unsigned_unsigned - = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node, - unsigned_type_node, unsigned_type_node, - NULL_TREE); - tree v2di_ftype_v2di_v16qi - = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node, - NULL_TREE); - tree v2df_ftype_v2df_v2df_v2df - = build_function_type_list (V2DF_type_node, - V2DF_type_node, V2DF_type_node, - V2DF_type_node, NULL_TREE); - tree v4sf_ftype_v4sf_v4sf_v4sf - = build_function_type_list (V4SF_type_node, - V4SF_type_node, V4SF_type_node, - V4SF_type_node, NULL_TREE); - tree v8hi_ftype_v16qi - = build_function_type_list (V8HI_type_node, V16QI_type_node, - NULL_TREE); - tree v4si_ftype_v16qi - = build_function_type_list (V4SI_type_node, V16QI_type_node, - NULL_TREE); - tree v2di_ftype_v16qi - = build_function_type_list (V2DI_type_node, V16QI_type_node, - NULL_TREE); - tree v4si_ftype_v8hi - = build_function_type_list (V4SI_type_node, V8HI_type_node, - NULL_TREE); - tree v2di_ftype_v8hi - = build_function_type_list (V2DI_type_node, V8HI_type_node, - NULL_TREE); - tree v2di_ftype_v4si - = build_function_type_list (V2DI_type_node, V4SI_type_node, - NULL_TREE); - tree v2di_ftype_pv2di - = build_function_type_list (V2DI_type_node, pv2di_type_node, - NULL_TREE); - tree v16qi_ftype_v16qi_v16qi_int - = build_function_type_list (V16QI_type_node, V16QI_type_node, - V16QI_type_node, integer_type_node, - NULL_TREE); - tree v16qi_ftype_v16qi_v16qi_v16qi - = build_function_type_list (V16QI_type_node, V16QI_type_node, - V16QI_type_node, V16QI_type_node, - NULL_TREE); - tree v8hi_ftype_v8hi_v8hi_int - = build_function_type_list (V8HI_type_node, V8HI_type_node, - V8HI_type_node, integer_type_node, - NULL_TREE); - tree v4si_ftype_v4si_v4si_int - = build_function_type_list (V4SI_type_node, V4SI_type_node, - V4SI_type_node, integer_type_node, - NULL_TREE); - tree int_ftype_v2di_v2di - = build_function_type_list (integer_type_node, - V2DI_type_node, V2DI_type_node, - NULL_TREE); - tree int_ftype_v16qi_int_v16qi_int_int - = build_function_type_list (integer_type_node, - V16QI_type_node, - integer_type_node, - V16QI_type_node, - integer_type_node, - integer_type_node, - NULL_TREE); - tree v16qi_ftype_v16qi_int_v16qi_int_int - = build_function_type_list (V16QI_type_node, - V16QI_type_node, - integer_type_node, - V16QI_type_node, - integer_type_node, - integer_type_node, - NULL_TREE); - tree int_ftype_v16qi_v16qi_int - = build_function_type_list (integer_type_node, - V16QI_type_node, - V16QI_type_node, - integer_type_node, - NULL_TREE); - - /* SSE5 instructions */ - tree v2di_ftype_v2di_v2di_v2di - = build_function_type_list (V2DI_type_node, - V2DI_type_node, - V2DI_type_node, - V2DI_type_node, - NULL_TREE); - - tree v4si_ftype_v4si_v4si_v4si - = build_function_type_list (V4SI_type_node, - V4SI_type_node, - V4SI_type_node, - V4SI_type_node, - NULL_TREE); - - tree v4si_ftype_v4si_v4si_v2di - = build_function_type_list (V4SI_type_node, - V4SI_type_node, - V4SI_type_node, - V2DI_type_node, - NULL_TREE); - - tree v8hi_ftype_v8hi_v8hi_v8hi - = build_function_type_list (V8HI_type_node, - V8HI_type_node, - V8HI_type_node, - V8HI_type_node, - NULL_TREE); - - tree v8hi_ftype_v8hi_v8hi_v4si - = build_function_type_list (V8HI_type_node, - V8HI_type_node, - V8HI_type_node, - V4SI_type_node, - NULL_TREE); - - tree v2df_ftype_v2df_v2df_v16qi - = build_function_type_list (V2DF_type_node, - V2DF_type_node, - V2DF_type_node, - V16QI_type_node, - NULL_TREE); - - tree v4sf_ftype_v4sf_v4sf_v16qi - = build_function_type_list (V4SF_type_node, - V4SF_type_node, - V4SF_type_node, - V16QI_type_node, - NULL_TREE); - - tree v2di_ftype_v2di_si - = build_function_type_list (V2DI_type_node, - V2DI_type_node, - integer_type_node, - NULL_TREE); - - tree v4si_ftype_v4si_si - = build_function_type_list (V4SI_type_node, - V4SI_type_node, - integer_type_node, - NULL_TREE); - - tree v8hi_ftype_v8hi_si - = build_function_type_list (V8HI_type_node, - V8HI_type_node, - integer_type_node, - NULL_TREE); - - tree v16qi_ftype_v16qi_si - = build_function_type_list (V16QI_type_node, - V16QI_type_node, - integer_type_node, - NULL_TREE); - tree v4sf_ftype_v4hi - = build_function_type_list (V4SF_type_node, - V4HI_type_node, - NULL_TREE); - - tree v4hi_ftype_v4sf - = build_function_type_list (V4HI_type_node, - V4SF_type_node, - NULL_TREE); - - tree v2di_ftype_v2di - = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE); - - tree v16qi_ftype_v8hi_v8hi - = build_function_type_list (V16QI_type_node, - V8HI_type_node, V8HI_type_node, - NULL_TREE); - tree v8hi_ftype_v4si_v4si - = build_function_type_list (V8HI_type_node, - V4SI_type_node, V4SI_type_node, - NULL_TREE); - tree v8hi_ftype_v16qi_v16qi - = build_function_type_list (V8HI_type_node, - V16QI_type_node, V16QI_type_node, - NULL_TREE); - tree v4hi_ftype_v8qi_v8qi - = build_function_type_list (V4HI_type_node, - V8QI_type_node, V8QI_type_node, - NULL_TREE); - tree unsigned_ftype_unsigned_uchar - = build_function_type_list (unsigned_type_node, - unsigned_type_node, - unsigned_char_type_node, - NULL_TREE); - tree unsigned_ftype_unsigned_ushort - = build_function_type_list (unsigned_type_node, - unsigned_type_node, - short_unsigned_type_node, - NULL_TREE); - tree unsigned_ftype_unsigned_unsigned - = build_function_type_list (unsigned_type_node, - unsigned_type_node, - unsigned_type_node, - NULL_TREE); - tree uint64_ftype_uint64_uint64 - = build_function_type_list (long_long_unsigned_type_node, - long_long_unsigned_type_node, - long_long_unsigned_type_node, - NULL_TREE); - tree float_ftype_float - = build_function_type_list (float_type_node, - float_type_node, - NULL_TREE); - - /* AVX builtins */ - tree V32QI_type_node = build_vector_type_for_mode (char_type_node, - V32QImode); - tree V8SI_type_node = build_vector_type_for_mode (intSI_type_node, - V8SImode); - tree V8SF_type_node = build_vector_type_for_mode (float_type_node, - V8SFmode); - tree V4DI_type_node = build_vector_type_for_mode (long_long_integer_type_node, - V4DImode); - tree V4DF_type_node = build_vector_type_for_mode (double_type_node, - V4DFmode); - tree v8sf_ftype_v8sf - = build_function_type_list (V8SF_type_node, - V8SF_type_node, - NULL_TREE); - tree v8si_ftype_v8sf - = build_function_type_list (V8SI_type_node, - V8SF_type_node, - NULL_TREE); - tree v8sf_ftype_v8si - = build_function_type_list (V8SF_type_node, - V8SI_type_node, - NULL_TREE); - tree v4si_ftype_v4df - = build_function_type_list (V4SI_type_node, - V4DF_type_node, - NULL_TREE); - tree v4df_ftype_v4df - = build_function_type_list (V4DF_type_node, - V4DF_type_node, - NULL_TREE); - tree v4df_ftype_v4si - = build_function_type_list (V4DF_type_node, - V4SI_type_node, - NULL_TREE); - tree v4df_ftype_v4sf - = build_function_type_list (V4DF_type_node, - V4SF_type_node, - NULL_TREE); - tree v4sf_ftype_v4df - = build_function_type_list (V4SF_type_node, - V4DF_type_node, - NULL_TREE); - tree v8sf_ftype_v8sf_v8sf - = build_function_type_list (V8SF_type_node, - V8SF_type_node, V8SF_type_node, - NULL_TREE); - tree v4df_ftype_v4df_v4df - = build_function_type_list (V4DF_type_node, - V4DF_type_node, V4DF_type_node, - NULL_TREE); - tree v8sf_ftype_v8sf_int - = build_function_type_list (V8SF_type_node, - V8SF_type_node, integer_type_node, - NULL_TREE); - tree v4si_ftype_v8si_int - = build_function_type_list (V4SI_type_node, - V8SI_type_node, integer_type_node, - NULL_TREE); - tree v4df_ftype_v4df_int - = build_function_type_list (V4DF_type_node, - V4DF_type_node, integer_type_node, - NULL_TREE); - tree v4sf_ftype_v8sf_int - = build_function_type_list (V4SF_type_node, - V8SF_type_node, integer_type_node, - NULL_TREE); - tree v2df_ftype_v4df_int - = build_function_type_list (V2DF_type_node, - V4DF_type_node, integer_type_node, - NULL_TREE); - tree v8sf_ftype_v8sf_v8sf_int - = build_function_type_list (V8SF_type_node, - V8SF_type_node, V8SF_type_node, - integer_type_node, - NULL_TREE); - tree v8sf_ftype_v8sf_v8sf_v8sf - = build_function_type_list (V8SF_type_node, - V8SF_type_node, V8SF_type_node, - V8SF_type_node, - NULL_TREE); - tree v4df_ftype_v4df_v4df_v4df - = build_function_type_list (V4DF_type_node, - V4DF_type_node, V4DF_type_node, - V4DF_type_node, - NULL_TREE); - tree v8si_ftype_v8si_v8si_int - = build_function_type_list (V8SI_type_node, - V8SI_type_node, V8SI_type_node, - integer_type_node, - NULL_TREE); - tree v4df_ftype_v4df_v4df_int - = build_function_type_list (V4DF_type_node, - V4DF_type_node, V4DF_type_node, - integer_type_node, - NULL_TREE); - tree v8sf_ftype_pcfloat - = build_function_type_list (V8SF_type_node, - pcfloat_type_node, - NULL_TREE); - tree v4df_ftype_pcdouble - = build_function_type_list (V4DF_type_node, - pcdouble_type_node, - NULL_TREE); - tree pcv4sf_type_node - = build_pointer_type (build_type_variant (V4SF_type_node, 1, 0)); - tree pcv2df_type_node - = build_pointer_type (build_type_variant (V2DF_type_node, 1, 0)); - tree v8sf_ftype_pcv4sf - = build_function_type_list (V8SF_type_node, - pcv4sf_type_node, - NULL_TREE); - tree v4df_ftype_pcv2df - = build_function_type_list (V4DF_type_node, - pcv2df_type_node, - NULL_TREE); - tree v32qi_ftype_pcchar - = build_function_type_list (V32QI_type_node, - pcchar_type_node, - NULL_TREE); - tree void_ftype_pchar_v32qi - = build_function_type_list (void_type_node, - pchar_type_node, V32QI_type_node, - NULL_TREE); - tree v8si_ftype_v8si_v4si_int - = build_function_type_list (V8SI_type_node, - V8SI_type_node, V4SI_type_node, - integer_type_node, - NULL_TREE); - tree pv4di_type_node = build_pointer_type (V4DI_type_node); - tree void_ftype_pv4di_v4di - = build_function_type_list (void_type_node, - pv4di_type_node, V4DI_type_node, - NULL_TREE); - tree v8sf_ftype_v8sf_v4sf_int - = build_function_type_list (V8SF_type_node, - V8SF_type_node, V4SF_type_node, - integer_type_node, - NULL_TREE); - tree v4df_ftype_v4df_v2df_int - = build_function_type_list (V4DF_type_node, - V4DF_type_node, V2DF_type_node, - integer_type_node, - NULL_TREE); - tree void_ftype_pfloat_v8sf - = build_function_type_list (void_type_node, - pfloat_type_node, V8SF_type_node, - NULL_TREE); - tree void_ftype_pdouble_v4df - = build_function_type_list (void_type_node, - pdouble_type_node, V4DF_type_node, - NULL_TREE); - tree pv8sf_type_node = build_pointer_type (V8SF_type_node); - tree pv4sf_type_node = build_pointer_type (V4SF_type_node); - tree pv4df_type_node = build_pointer_type (V4DF_type_node); - tree pv2df_type_node = build_pointer_type (V2DF_type_node); - tree pcv8sf_type_node - = build_pointer_type (build_type_variant (V8SF_type_node, 1, 0)); - tree pcv4df_type_node - = build_pointer_type (build_type_variant (V4DF_type_node, 1, 0)); - tree v8sf_ftype_pcv8sf_v8sf - = build_function_type_list (V8SF_type_node, - pcv8sf_type_node, V8SF_type_node, - NULL_TREE); - tree v4df_ftype_pcv4df_v4df - = build_function_type_list (V4DF_type_node, - pcv4df_type_node, V4DF_type_node, - NULL_TREE); - tree v4sf_ftype_pcv4sf_v4sf - = build_function_type_list (V4SF_type_node, - pcv4sf_type_node, V4SF_type_node, - NULL_TREE); - tree v2df_ftype_pcv2df_v2df - = build_function_type_list (V2DF_type_node, - pcv2df_type_node, V2DF_type_node, - NULL_TREE); - tree void_ftype_pv8sf_v8sf_v8sf - = build_function_type_list (void_type_node, - pv8sf_type_node, V8SF_type_node, - V8SF_type_node, - NULL_TREE); - tree void_ftype_pv4df_v4df_v4df - = build_function_type_list (void_type_node, - pv4df_type_node, V4DF_type_node, - V4DF_type_node, - NULL_TREE); - tree void_ftype_pv4sf_v4sf_v4sf - = build_function_type_list (void_type_node, - pv4sf_type_node, V4SF_type_node, - V4SF_type_node, - NULL_TREE); - tree void_ftype_pv2df_v2df_v2df - = build_function_type_list (void_type_node, - pv2df_type_node, V2DF_type_node, - V2DF_type_node, - NULL_TREE); - tree v4df_ftype_v2df - = build_function_type_list (V4DF_type_node, - V2DF_type_node, - NULL_TREE); - tree v8sf_ftype_v4sf - = build_function_type_list (V8SF_type_node, - V4SF_type_node, - NULL_TREE); - tree v8si_ftype_v4si - = build_function_type_list (V8SI_type_node, - V4SI_type_node, - NULL_TREE); - tree v2df_ftype_v4df - = build_function_type_list (V2DF_type_node, - V4DF_type_node, - NULL_TREE); - tree v4sf_ftype_v8sf - = build_function_type_list (V4SF_type_node, - V8SF_type_node, - NULL_TREE); - tree v4si_ftype_v8si - = build_function_type_list (V4SI_type_node, - V8SI_type_node, - NULL_TREE); - tree int_ftype_v4df - = build_function_type_list (integer_type_node, - V4DF_type_node, - NULL_TREE); - tree int_ftype_v8sf - = build_function_type_list (integer_type_node, - V8SF_type_node, - NULL_TREE); - tree int_ftype_v8sf_v8sf - = build_function_type_list (integer_type_node, - V8SF_type_node, V8SF_type_node, - NULL_TREE); - tree int_ftype_v4di_v4di - = build_function_type_list (integer_type_node, - V4DI_type_node, V4DI_type_node, - NULL_TREE); - tree int_ftype_v4df_v4df - = build_function_type_list (integer_type_node, - V4DF_type_node, V4DF_type_node, - NULL_TREE); - tree v8sf_ftype_v8sf_v8si - = build_function_type_list (V8SF_type_node, - V8SF_type_node, V8SI_type_node, - NULL_TREE); - tree v4df_ftype_v4df_v4di - = build_function_type_list (V4DF_type_node, - V4DF_type_node, V4DI_type_node, - NULL_TREE); - tree v4sf_ftype_v4sf_v4si - = build_function_type_list (V4SF_type_node, - V4SF_type_node, V4SI_type_node, NULL_TREE); - tree v2df_ftype_v2df_v2di - = build_function_type_list (V2DF_type_node, - V2DF_type_node, V2DI_type_node, NULL_TREE); - - /* LWP instructions. */ - - tree pvoid_ftype_void - = build_function_type (ptr_type_node, void_list_node); - - tree void_ftype_pvoid - = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE); - - tree void_ftype_ushort_unsigned_ushort - = build_function_type_list (void_type_node, - short_unsigned_type_node, - unsigned_type_node, - short_unsigned_type_node, - NULL_TREE); - - tree void_ftype_unsigned_unsigned_unsigned - = build_function_type_list (void_type_node, - unsigned_type_node, - unsigned_type_node, - unsigned_type_node, - NULL_TREE); - - tree void_ftype_uint64_unsigned_unsigned - = build_function_type_list (void_type_node, - long_long_unsigned_type_node, - unsigned_type_node, - unsigned_type_node, - NULL_TREE); - - tree uchar_ftype_ushort_unsigned_ushort - = build_function_type_list (unsigned_char_type_node, - short_unsigned_type_node, - unsigned_type_node, - short_unsigned_type_node, - NULL_TREE); - - tree uchar_ftype_unsigned_unsigned_unsigned - = build_function_type_list (unsigned_char_type_node, - unsigned_type_node, - unsigned_type_node, - unsigned_type_node, - NULL_TREE); - - tree uchar_ftype_uint64_unsigned_unsigned - = build_function_type_list (unsigned_char_type_node, - long_long_unsigned_type_node, - unsigned_type_node, - unsigned_type_node, - NULL_TREE); - - tree ftype; - - /* Add all special builtins with variable number of operands. */ - for (i = 0, d = bdesc_special_args; - i < ARRAY_SIZE (bdesc_special_args); - i++, d++) - { - tree type; - - if (d->name == 0) - continue; - - switch ((enum ix86_special_builtin_type) d->flag) - { - case VOID_FTYPE_VOID: - type = void_ftype_void; - break; - case V32QI_FTYPE_PCCHAR: - type = v32qi_ftype_pcchar; - break; - case V16QI_FTYPE_PCCHAR: - type = v16qi_ftype_pcchar; - break; - case V8SF_FTYPE_PCV4SF: - type = v8sf_ftype_pcv4sf; - break; - case V8SF_FTYPE_PCFLOAT: - type = v8sf_ftype_pcfloat; - break; - case V4DF_FTYPE_PCV2DF: - type = v4df_ftype_pcv2df; - break; - case V4DF_FTYPE_PCDOUBLE: - type = v4df_ftype_pcdouble; - break; - case V4SF_FTYPE_PCFLOAT: - type = v4sf_ftype_pcfloat; - break; - case V2DI_FTYPE_PV2DI: - type = v2di_ftype_pv2di; - break; - case V2DF_FTYPE_PCDOUBLE: - type = v2df_ftype_pcdouble; - break; - case V8SF_FTYPE_PCV8SF_V8SF: - type = v8sf_ftype_pcv8sf_v8sf; - break; - case V4DF_FTYPE_PCV4DF_V4DF: - type = v4df_ftype_pcv4df_v4df; - break; - case V4SF_FTYPE_V4SF_PCV2SF: - type = v4sf_ftype_v4sf_pcv2sf; - break; - case V4SF_FTYPE_PCV4SF_V4SF: - type = v4sf_ftype_pcv4sf_v4sf; - break; - case V2DF_FTYPE_V2DF_PCDOUBLE: - type = v2df_ftype_v2df_pcdouble; - break; - case V2DF_FTYPE_PCV2DF_V2DF: - type = v2df_ftype_pcv2df_v2df; - break; - case VOID_FTYPE_PV2SF_V4SF: - type = void_ftype_pv2sf_v4sf; - break; - case VOID_FTYPE_PV4DI_V4DI: - type = void_ftype_pv4di_v4di; - break; - case VOID_FTYPE_PV2DI_V2DI: - type = void_ftype_pv2di_v2di; - break; - case VOID_FTYPE_PCHAR_V32QI: - type = void_ftype_pchar_v32qi; - break; - case VOID_FTYPE_PCHAR_V16QI: - type = void_ftype_pchar_v16qi; - break; - case VOID_FTYPE_PFLOAT_V8SF: - type = void_ftype_pfloat_v8sf; - break; - case VOID_FTYPE_PFLOAT_V4SF: - type = void_ftype_pfloat_v4sf; - break; - case VOID_FTYPE_PDOUBLE_V4DF: - type = void_ftype_pdouble_v4df; - break; - case VOID_FTYPE_PDOUBLE_V2DF: - type = void_ftype_pdouble_v2df; - break; - case VOID_FTYPE_PDI_DI: - type = void_ftype_pdi_di; - break; - case VOID_FTYPE_PINT_INT: - type = void_ftype_pint_int; - break; - case VOID_FTYPE_PV8SF_V8SF_V8SF: - type = void_ftype_pv8sf_v8sf_v8sf; - break; - case VOID_FTYPE_PV4DF_V4DF_V4DF: - type = void_ftype_pv4df_v4df_v4df; - break; - case VOID_FTYPE_PV4SF_V4SF_V4SF: - type = void_ftype_pv4sf_v4sf_v4sf; - break; - case VOID_FTYPE_PV2DF_V2DF_V2DF: - type = void_ftype_pv2df_v2df_v2df; - break; - case VOID_FTYPE_USHORT_UINT_USHORT: - type = void_ftype_ushort_unsigned_ushort; - break; - case VOID_FTYPE_UINT_UINT_UINT: - type = void_ftype_unsigned_unsigned_unsigned; - break; - case VOID_FTYPE_UINT64_UINT_UINT: - type = void_ftype_uint64_unsigned_unsigned; - break; - case VOID_FTYPE_PVOID: - type = void_ftype_pvoid; - break; - case PVOID_FTYPE_VOID: - type = pvoid_ftype_void; - break; - case UCHAR_FTYPE_USHORT_UINT_USHORT: - type = uchar_ftype_ushort_unsigned_ushort; - break; - case UCHAR_FTYPE_UINT_UINT_UINT: - type = uchar_ftype_unsigned_unsigned_unsigned; - break; - case UCHAR_FTYPE_UINT64_UINT_UINT: - type = uchar_ftype_uint64_unsigned_unsigned; - break; - - default: - gcc_unreachable (); - } - - def_builtin (d->mask, d->name, type, d->code); - } - - /* Add all builtins with variable number of operands. */ - for (i = 0, d = bdesc_args; - i < ARRAY_SIZE (bdesc_args); - i++, d++) - { - tree type; - - if (d->name == 0) - continue; - - switch ((enum ix86_builtin_type) d->flag) - { - case FLOAT_FTYPE_FLOAT: - type = float_ftype_float; - break; - case INT_FTYPE_V8SF_V8SF_PTEST: - type = int_ftype_v8sf_v8sf; - break; - case INT_FTYPE_V4DI_V4DI_PTEST: - type = int_ftype_v4di_v4di; - break; - case INT_FTYPE_V4DF_V4DF_PTEST: - type = int_ftype_v4df_v4df; - break; - case INT_FTYPE_V4SF_V4SF_PTEST: - type = int_ftype_v4sf_v4sf; - break; - case INT_FTYPE_V2DI_V2DI_PTEST: - type = int_ftype_v2di_v2di; - break; - case INT_FTYPE_V2DF_V2DF_PTEST: - type = int_ftype_v2df_v2df; - break; - case INT64_FTYPE_V4SF: - type = int64_ftype_v4sf; - break; - case INT64_FTYPE_V2DF: - type = int64_ftype_v2df; - break; - case INT_FTYPE_V16QI: - type = int_ftype_v16qi; - break; - case INT_FTYPE_V8QI: - type = int_ftype_v8qi; - break; - case INT_FTYPE_V8SF: - type = int_ftype_v8sf; - break; - case INT_FTYPE_V4DF: - type = int_ftype_v4df; - break; - case INT_FTYPE_V4SF: - type = int_ftype_v4sf; - break; - case INT_FTYPE_V2DF: - type = int_ftype_v2df; - break; - case V16QI_FTYPE_V16QI: - type = v16qi_ftype_v16qi; - break; - case V8SI_FTYPE_V8SF: - type = v8si_ftype_v8sf; - break; - case V8SI_FTYPE_V4SI: - type = v8si_ftype_v4si; - break; - case V8HI_FTYPE_V8HI: - type = v8hi_ftype_v8hi; - break; - case V8HI_FTYPE_V16QI: - type = v8hi_ftype_v16qi; - break; - case V8QI_FTYPE_V8QI: - type = v8qi_ftype_v8qi; - break; - case V8SF_FTYPE_V8SF: - type = v8sf_ftype_v8sf; - break; - case V8SF_FTYPE_V8SI: - type = v8sf_ftype_v8si; - break; - case V8SF_FTYPE_V4SF: - type = v8sf_ftype_v4sf; - break; - case V4SI_FTYPE_V4DF: - type = v4si_ftype_v4df; - break; - case V4SI_FTYPE_V4SI: - type = v4si_ftype_v4si; - break; - case V4SI_FTYPE_V16QI: - type = v4si_ftype_v16qi; - break; - case V4SI_FTYPE_V8SI: - type = v4si_ftype_v8si; - break; - case V4SI_FTYPE_V8HI: - type = v4si_ftype_v8hi; - break; - case V4SI_FTYPE_V4SF: - type = v4si_ftype_v4sf; - break; - case V4SI_FTYPE_V2DF: - type = v4si_ftype_v2df; - break; - case V4HI_FTYPE_V4HI: - type = v4hi_ftype_v4hi; - break; - case V4DF_FTYPE_V4DF: - type = v4df_ftype_v4df; - break; - case V4DF_FTYPE_V4SI: - type = v4df_ftype_v4si; - break; - case V4DF_FTYPE_V4SF: - type = v4df_ftype_v4sf; - break; - case V4DF_FTYPE_V2DF: - type = v4df_ftype_v2df; - break; - case V4SF_FTYPE_V4SF: - case V4SF_FTYPE_V4SF_VEC_MERGE: - type = v4sf_ftype_v4sf; - break; - case V4SF_FTYPE_V8SF: - type = v4sf_ftype_v8sf; - break; - case V4SF_FTYPE_V4SI: - type = v4sf_ftype_v4si; - break; - case V4SF_FTYPE_V4DF: - type = v4sf_ftype_v4df; - break; - case V4SF_FTYPE_V2DF: - type = v4sf_ftype_v2df; - break; - case V2DI_FTYPE_V2DI: - type = v2di_ftype_v2di; - break; - case V2DI_FTYPE_V16QI: - type = v2di_ftype_v16qi; - break; - case V2DI_FTYPE_V8HI: - type = v2di_ftype_v8hi; - break; - case V2DI_FTYPE_V4SI: - type = v2di_ftype_v4si; - break; - case V2SI_FTYPE_V2SI: - type = v2si_ftype_v2si; - break; - case V2SI_FTYPE_V4SF: - type = v2si_ftype_v4sf; - break; - case V2SI_FTYPE_V2DF: - type = v2si_ftype_v2df; - break; - case V2SI_FTYPE_V2SF: - type = v2si_ftype_v2sf; - break; - case V2DF_FTYPE_V4DF: - type = v2df_ftype_v4df; - break; - case V2DF_FTYPE_V4SF: - type = v2df_ftype_v4sf; - break; - case V2DF_FTYPE_V2DF: - case V2DF_FTYPE_V2DF_VEC_MERGE: - type = v2df_ftype_v2df; - break; - case V2DF_FTYPE_V2SI: - type = v2df_ftype_v2si; - break; - case V2DF_FTYPE_V4SI: - type = v2df_ftype_v4si; - break; - case V2SF_FTYPE_V2SF: - type = v2sf_ftype_v2sf; - break; - case V2SF_FTYPE_V2SI: - type = v2sf_ftype_v2si; - break; - case V16QI_FTYPE_V16QI_V16QI: - type = v16qi_ftype_v16qi_v16qi; - break; - case V16QI_FTYPE_V8HI_V8HI: - type = v16qi_ftype_v8hi_v8hi; - break; - case V8QI_FTYPE_V8QI_V8QI: - type = v8qi_ftype_v8qi_v8qi; - break; - case V8QI_FTYPE_V4HI_V4HI: - type = v8qi_ftype_v4hi_v4hi; - break; - case V8HI_FTYPE_V8HI_V8HI: - case V8HI_FTYPE_V8HI_V8HI_COUNT: - type = v8hi_ftype_v8hi_v8hi; - break; - case V8HI_FTYPE_V16QI_V16QI: - type = v8hi_ftype_v16qi_v16qi; - break; - case V8HI_FTYPE_V4SI_V4SI: - type = v8hi_ftype_v4si_v4si; - break; - case V8HI_FTYPE_V8HI_SI_COUNT: - type = v8hi_ftype_v8hi_int; - break; - case V8SF_FTYPE_V8SF_V8SF: - type = v8sf_ftype_v8sf_v8sf; - break; - case V8SF_FTYPE_V8SF_V8SI: - type = v8sf_ftype_v8sf_v8si; - break; - case V4SI_FTYPE_V4SI_V4SI: - case V4SI_FTYPE_V4SI_V4SI_COUNT: - type = v4si_ftype_v4si_v4si; - break; - case V4SI_FTYPE_V8HI_V8HI: - type = v4si_ftype_v8hi_v8hi; - break; - case V4SI_FTYPE_V4SF_V4SF: - type = v4si_ftype_v4sf_v4sf; - break; - case V4SI_FTYPE_V2DF_V2DF: - type = v4si_ftype_v2df_v2df; - break; - case V4SI_FTYPE_V4SI_SI_COUNT: - type = v4si_ftype_v4si_int; - break; - case V4HI_FTYPE_V4HI_V4HI: - case V4HI_FTYPE_V4HI_V4HI_COUNT: - type = v4hi_ftype_v4hi_v4hi; - break; - case V4HI_FTYPE_V8QI_V8QI: - type = v4hi_ftype_v8qi_v8qi; - break; - case V4HI_FTYPE_V2SI_V2SI: - type = v4hi_ftype_v2si_v2si; - break; - case V4HI_FTYPE_V4HI_SI_COUNT: - type = v4hi_ftype_v4hi_int; - break; - case V4DF_FTYPE_V4DF_V4DF: - type = v4df_ftype_v4df_v4df; - break; - case V4DF_FTYPE_V4DF_V4DI: - type = v4df_ftype_v4df_v4di; - break; - case V4SF_FTYPE_V4SF_V4SF: - case V4SF_FTYPE_V4SF_V4SF_SWAP: - type = v4sf_ftype_v4sf_v4sf; - break; - case V4SF_FTYPE_V4SF_V4SI: - type = v4sf_ftype_v4sf_v4si; - break; - case V4SF_FTYPE_V4SF_V2SI: - type = v4sf_ftype_v4sf_v2si; - break; - case V4SF_FTYPE_V4SF_V2DF: - type = v4sf_ftype_v4sf_v2df; - break; - case V4SF_FTYPE_V4SF_DI: - type = v4sf_ftype_v4sf_int64; - break; - case V4SF_FTYPE_V4SF_SI: - type = v4sf_ftype_v4sf_int; - break; - case V2DI_FTYPE_V2DI_V2DI: - case V2DI_FTYPE_V2DI_V2DI_COUNT: - type = v2di_ftype_v2di_v2di; - break; - case V2DI_FTYPE_V16QI_V16QI: - type = v2di_ftype_v16qi_v16qi; - break; - case V2DI_FTYPE_V4SI_V4SI: - type = v2di_ftype_v4si_v4si; - break; - case V2DI_FTYPE_V2DI_V16QI: - type = v2di_ftype_v2di_v16qi; - break; - case V2DI_FTYPE_V2DF_V2DF: - type = v2di_ftype_v2df_v2df; - break; - case V2DI_FTYPE_V2DI_SI_COUNT: - type = v2di_ftype_v2di_int; - break; - case V2SI_FTYPE_V2SI_V2SI: - case V2SI_FTYPE_V2SI_V2SI_COUNT: - type = v2si_ftype_v2si_v2si; - break; - case V2SI_FTYPE_V4HI_V4HI: - type = v2si_ftype_v4hi_v4hi; - break; - case V2SI_FTYPE_V2SF_V2SF: - type = v2si_ftype_v2sf_v2sf; - break; - case V2SI_FTYPE_V2SI_SI_COUNT: - type = v2si_ftype_v2si_int; - break; - case V2DF_FTYPE_V2DF_V2DF: - case V2DF_FTYPE_V2DF_V2DF_SWAP: - type = v2df_ftype_v2df_v2df; - break; - case V2DF_FTYPE_V2DF_V4SF: - type = v2df_ftype_v2df_v4sf; - break; - case V2DF_FTYPE_V2DF_V2DI: - type = v2df_ftype_v2df_v2di; - break; - case V2DF_FTYPE_V2DF_DI: - type = v2df_ftype_v2df_int64; - break; - case V2DF_FTYPE_V2DF_SI: - type = v2df_ftype_v2df_int; - break; - case V2SF_FTYPE_V2SF_V2SF: - type = v2sf_ftype_v2sf_v2sf; - break; - case V1DI_FTYPE_V1DI_V1DI: - case V1DI_FTYPE_V1DI_V1DI_COUNT: - type = v1di_ftype_v1di_v1di; - break; - case V1DI_FTYPE_V8QI_V8QI: - type = v1di_ftype_v8qi_v8qi; - break; - case V1DI_FTYPE_V2SI_V2SI: - type = v1di_ftype_v2si_v2si; - break; - case V1DI_FTYPE_V1DI_SI_COUNT: - type = v1di_ftype_v1di_int; - break; - case UINT64_FTYPE_UINT64_UINT64: - type = uint64_ftype_uint64_uint64; - break; - case UINT_FTYPE_UINT_UINT: - type = unsigned_ftype_unsigned_unsigned; - break; - case UINT_FTYPE_UINT_USHORT: - type = unsigned_ftype_unsigned_ushort; - break; - case UINT_FTYPE_UINT_UCHAR: - type = unsigned_ftype_unsigned_uchar; - break; - case V8HI_FTYPE_V8HI_INT: - type = v8hi_ftype_v8hi_int; - break; - case V8SF_FTYPE_V8SF_INT: - type = v8sf_ftype_v8sf_int; - break; - case V4SI_FTYPE_V4SI_INT: - type = v4si_ftype_v4si_int; - break; - case V4SI_FTYPE_V8SI_INT: - type = v4si_ftype_v8si_int; - break; - case V4HI_FTYPE_V4HI_INT: - type = v4hi_ftype_v4hi_int; - break; - case V4DF_FTYPE_V4DF_INT: - type = v4df_ftype_v4df_int; - break; - case V4SF_FTYPE_V4SF_INT: - type = v4sf_ftype_v4sf_int; - break; - case V4SF_FTYPE_V8SF_INT: - type = v4sf_ftype_v8sf_int; - break; - case V2DI_FTYPE_V2DI_INT: - case V2DI2TI_FTYPE_V2DI_INT: - type = v2di_ftype_v2di_int; - break; - case V2DF_FTYPE_V2DF_INT: - type = v2df_ftype_v2df_int; - break; - case V2DF_FTYPE_V4DF_INT: - type = v2df_ftype_v4df_int; - break; - case V16QI_FTYPE_V16QI_V16QI_V16QI: - type = v16qi_ftype_v16qi_v16qi_v16qi; - break; - case V8SF_FTYPE_V8SF_V8SF_V8SF: - type = v8sf_ftype_v8sf_v8sf_v8sf; - break; - case V4DF_FTYPE_V4DF_V4DF_V4DF: - type = v4df_ftype_v4df_v4df_v4df; - break; - case V4SF_FTYPE_V4SF_V4SF_V4SF: - type = v4sf_ftype_v4sf_v4sf_v4sf; - break; - case V2DF_FTYPE_V2DF_V2DF_V2DF: - type = v2df_ftype_v2df_v2df_v2df; - break; - case V16QI_FTYPE_V16QI_V16QI_INT: - type = v16qi_ftype_v16qi_v16qi_int; - break; - case V8SI_FTYPE_V8SI_V8SI_INT: - type = v8si_ftype_v8si_v8si_int; - break; - case V8SI_FTYPE_V8SI_V4SI_INT: - type = v8si_ftype_v8si_v4si_int; - break; - case V8HI_FTYPE_V8HI_V8HI_INT: - type = v8hi_ftype_v8hi_v8hi_int; - break; - case V8SF_FTYPE_V8SF_V8SF_INT: - type = v8sf_ftype_v8sf_v8sf_int; - break; - case V8SF_FTYPE_V8SF_V4SF_INT: - type = v8sf_ftype_v8sf_v4sf_int; - break; - case V4SI_FTYPE_V4SI_V4SI_INT: - type = v4si_ftype_v4si_v4si_int; - break; - case V4DF_FTYPE_V4DF_V4DF_INT: - type = v4df_ftype_v4df_v4df_int; - break; - case V4DF_FTYPE_V4DF_V2DF_INT: - type = v4df_ftype_v4df_v2df_int; - break; - case V4SF_FTYPE_V4SF_V4SF_INT: - type = v4sf_ftype_v4sf_v4sf_int; - break; - case V2DI_FTYPE_V2DI_V2DI_INT: - case V2DI2TI_FTYPE_V2DI_V2DI_INT: - type = v2di_ftype_v2di_v2di_int; - break; - case V2DF_FTYPE_V2DF_V2DF_INT: - type = v2df_ftype_v2df_v2df_int; - break; - case V2DI_FTYPE_V2DI_UINT_UINT: - type = v2di_ftype_v2di_unsigned_unsigned; - break; - case V2DI_FTYPE_V2DI_V2DI_UINT_UINT: - type = v2di_ftype_v2di_v2di_unsigned_unsigned; - break; - case V1DI2DI_FTYPE_V1DI_V1DI_INT: - type = v1di_ftype_v1di_v1di_int; - break; - default: - gcc_unreachable (); - } - - def_builtin_const (d->mask, d->name, type, d->code); - } - - /* pcmpestr[im] insns. */ - for (i = 0, d = bdesc_pcmpestr; - i < ARRAY_SIZE (bdesc_pcmpestr); - i++, d++) - { - if (d->code == IX86_BUILTIN_PCMPESTRM128) - ftype = v16qi_ftype_v16qi_int_v16qi_int_int; - else - ftype = int_ftype_v16qi_int_v16qi_int_int; - def_builtin_const (d->mask, d->name, ftype, d->code); - } - - /* pcmpistr[im] insns. */ - for (i = 0, d = bdesc_pcmpistr; - i < ARRAY_SIZE (bdesc_pcmpistr); - i++, d++) - { - if (d->code == IX86_BUILTIN_PCMPISTRM128) - ftype = v16qi_ftype_v16qi_v16qi_int; - else - ftype = int_ftype_v16qi_v16qi_int; - def_builtin_const (d->mask, d->name, ftype, d->code); - } - - /* comi/ucomi insns. */ - for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++) - if (d->mask == OPTION_MASK_ISA_SSE2) - def_builtin_const (d->mask, d->name, int_ftype_v2df_v2df, d->code); - else - def_builtin_const (d->mask, d->name, int_ftype_v4sf_v4sf, d->code); - - /* SSE */ - def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR); - def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR); - - /* SSE or 3DNow!A */ - def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ); - - /* SSE2 */ - def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU); - - def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH); - x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE); - - /* SSE3. */ - def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor", void_ftype_pcvoid_unsigned_unsigned, IX86_BUILTIN_MONITOR); - def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait", void_ftype_unsigned_unsigned, IX86_BUILTIN_MWAIT); - - /* AES */ - def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENC128); - def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENCLAST128); - def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDEC128); - def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDECLAST128); - def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128", v2di_ftype_v2di, IX86_BUILTIN_AESIMC128); - def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128", v2di_ftype_v2di_int, IX86_BUILTIN_AESKEYGENASSIST128); - - /* PCLMUL */ - def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PCLMULQDQ128); - - /* AVX */ - def_builtin (OPTION_MASK_ISA_AVX, "__builtin_ia32_vzeroupper", void_ftype_void, - TARGET_64BIT ? IX86_BUILTIN_VZEROUPPER_REX64 : IX86_BUILTIN_VZEROUPPER); - - /* Access to the vec_init patterns. */ - ftype = build_function_type_list (V2SI_type_node, integer_type_node, - integer_type_node, NULL_TREE); - def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si", ftype, IX86_BUILTIN_VEC_INIT_V2SI); - - ftype = build_function_type_list (V4HI_type_node, short_integer_type_node, - short_integer_type_node, - short_integer_type_node, - short_integer_type_node, NULL_TREE); - def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi", ftype, IX86_BUILTIN_VEC_INIT_V4HI); - - ftype = build_function_type_list (V8QI_type_node, char_type_node, - char_type_node, char_type_node, - char_type_node, char_type_node, - char_type_node, char_type_node, - char_type_node, NULL_TREE); - def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi", ftype, IX86_BUILTIN_VEC_INIT_V8QI); - - /* Access to the vec_extract patterns. */ - ftype = build_function_type_list (double_type_node, V2DF_type_node, - integer_type_node, NULL_TREE); - def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df", ftype, IX86_BUILTIN_VEC_EXT_V2DF); - - ftype = build_function_type_list (long_long_integer_type_node, - V2DI_type_node, integer_type_node, - NULL_TREE); - def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di", ftype, IX86_BUILTIN_VEC_EXT_V2DI); - - ftype = build_function_type_list (float_type_node, V4SF_type_node, - integer_type_node, NULL_TREE); - def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf", ftype, IX86_BUILTIN_VEC_EXT_V4SF); - - ftype = build_function_type_list (intSI_type_node, V4SI_type_node, - integer_type_node, NULL_TREE); - def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si", ftype, IX86_BUILTIN_VEC_EXT_V4SI); - - ftype = build_function_type_list (intHI_type_node, V8HI_type_node, - integer_type_node, NULL_TREE); - def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi", ftype, IX86_BUILTIN_VEC_EXT_V8HI); - - ftype = build_function_type_list (intHI_type_node, V4HI_type_node, - integer_type_node, NULL_TREE); - def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_ext_v4hi", ftype, IX86_BUILTIN_VEC_EXT_V4HI); - - ftype = build_function_type_list (intSI_type_node, V2SI_type_node, - integer_type_node, NULL_TREE); - def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si", ftype, IX86_BUILTIN_VEC_EXT_V2SI); - - ftype = build_function_type_list (intQI_type_node, V16QI_type_node, - integer_type_node, NULL_TREE); - def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI); - - /* Access to the vec_set patterns. */ - ftype = build_function_type_list (V2DI_type_node, V2DI_type_node, - intDI_type_node, - integer_type_node, NULL_TREE); - def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_vec_set_v2di", ftype, IX86_BUILTIN_VEC_SET_V2DI); - - ftype = build_function_type_list (V4SF_type_node, V4SF_type_node, - float_type_node, - integer_type_node, NULL_TREE); - def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf", ftype, IX86_BUILTIN_VEC_SET_V4SF); - - ftype = build_function_type_list (V4SI_type_node, V4SI_type_node, - intSI_type_node, - integer_type_node, NULL_TREE); - def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si", ftype, IX86_BUILTIN_VEC_SET_V4SI); - - ftype = build_function_type_list (V8HI_type_node, V8HI_type_node, - intHI_type_node, - integer_type_node, NULL_TREE); - def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi", ftype, IX86_BUILTIN_VEC_SET_V8HI); - - ftype = build_function_type_list (V4HI_type_node, V4HI_type_node, - intHI_type_node, - integer_type_node, NULL_TREE); - def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_set_v4hi", ftype, IX86_BUILTIN_VEC_SET_V4HI); - - ftype = build_function_type_list (V16QI_type_node, V16QI_type_node, - intQI_type_node, - integer_type_node, NULL_TREE); - def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi", ftype, IX86_BUILTIN_VEC_SET_V16QI); - - /* Add SSE5 multi-arg argument instructions */ - for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++) - { - tree mtype = NULL_TREE; - - if (d->name == 0) - continue; - - switch ((enum multi_arg_type)d->flag) - { - case MULTI_ARG_3_SF: mtype = v4sf_ftype_v4sf_v4sf_v4sf; break; - case MULTI_ARG_3_DF: mtype = v2df_ftype_v2df_v2df_v2df; break; - case MULTI_ARG_3_DI: mtype = v2di_ftype_v2di_v2di_v2di; break; - case MULTI_ARG_3_SI: mtype = v4si_ftype_v4si_v4si_v4si; break; - case MULTI_ARG_3_SI_DI: mtype = v4si_ftype_v4si_v4si_v2di; break; - case MULTI_ARG_3_HI: mtype = v8hi_ftype_v8hi_v8hi_v8hi; break; - case MULTI_ARG_3_HI_SI: mtype = v8hi_ftype_v8hi_v8hi_v4si; break; - case MULTI_ARG_3_QI: mtype = v16qi_ftype_v16qi_v16qi_v16qi; break; - case MULTI_ARG_3_PERMPS: mtype = v4sf_ftype_v4sf_v4sf_v16qi; break; - case MULTI_ARG_3_PERMPD: mtype = v2df_ftype_v2df_v2df_v16qi; break; - case MULTI_ARG_2_SF: mtype = v4sf_ftype_v4sf_v4sf; break; - case MULTI_ARG_2_DF: mtype = v2df_ftype_v2df_v2df; break; - case MULTI_ARG_2_DI: mtype = v2di_ftype_v2di_v2di; break; - case MULTI_ARG_2_SI: mtype = v4si_ftype_v4si_v4si; break; - case MULTI_ARG_2_HI: mtype = v8hi_ftype_v8hi_v8hi; break; - case MULTI_ARG_2_QI: mtype = v16qi_ftype_v16qi_v16qi; break; - case MULTI_ARG_2_DI_IMM: mtype = v2di_ftype_v2di_si; break; - case MULTI_ARG_2_SI_IMM: mtype = v4si_ftype_v4si_si; break; - case MULTI_ARG_2_HI_IMM: mtype = v8hi_ftype_v8hi_si; break; - case MULTI_ARG_2_QI_IMM: mtype = v16qi_ftype_v16qi_si; break; - case MULTI_ARG_2_SF_CMP: mtype = v4sf_ftype_v4sf_v4sf; break; - case MULTI_ARG_2_DF_CMP: mtype = v2df_ftype_v2df_v2df; break; - case MULTI_ARG_2_DI_CMP: mtype = v2di_ftype_v2di_v2di; break; - case MULTI_ARG_2_SI_CMP: mtype = v4si_ftype_v4si_v4si; break; - case MULTI_ARG_2_HI_CMP: mtype = v8hi_ftype_v8hi_v8hi; break; - case MULTI_ARG_2_QI_CMP: mtype = v16qi_ftype_v16qi_v16qi; break; - case MULTI_ARG_2_SF_TF: mtype = v4sf_ftype_v4sf_v4sf; break; - case MULTI_ARG_2_DF_TF: mtype = v2df_ftype_v2df_v2df; break; - case MULTI_ARG_2_DI_TF: mtype = v2di_ftype_v2di_v2di; break; - case MULTI_ARG_2_SI_TF: mtype = v4si_ftype_v4si_v4si; break; - case MULTI_ARG_2_HI_TF: mtype = v8hi_ftype_v8hi_v8hi; break; - case MULTI_ARG_2_QI_TF: mtype = v16qi_ftype_v16qi_v16qi; break; - case MULTI_ARG_1_SF: mtype = v4sf_ftype_v4sf; break; - case MULTI_ARG_1_DF: mtype = v2df_ftype_v2df; break; - case MULTI_ARG_1_DI: mtype = v2di_ftype_v2di; break; - case MULTI_ARG_1_SI: mtype = v4si_ftype_v4si; break; - case MULTI_ARG_1_HI: mtype = v8hi_ftype_v8hi; break; - case MULTI_ARG_1_QI: mtype = v16qi_ftype_v16qi; break; - case MULTI_ARG_1_SI_DI: mtype = v2di_ftype_v4si; break; - case MULTI_ARG_1_HI_DI: mtype = v2di_ftype_v8hi; break; - case MULTI_ARG_1_HI_SI: mtype = v4si_ftype_v8hi; break; - case MULTI_ARG_1_QI_DI: mtype = v2di_ftype_v16qi; break; - case MULTI_ARG_1_QI_SI: mtype = v4si_ftype_v16qi; break; - case MULTI_ARG_1_QI_HI: mtype = v8hi_ftype_v16qi; break; - case MULTI_ARG_1_PH2PS: mtype = v4sf_ftype_v4hi; break; - case MULTI_ARG_1_PS2PH: mtype = v4hi_ftype_v4sf; break; - case MULTI_ARG_UNKNOWN: - default: - gcc_unreachable (); - } - - if (mtype) - def_builtin_const (d->mask, d->name, mtype, d->code); - } -} - -/* Internal method for ix86_init_builtins. */ - -static void -ix86_init_builtins_va_builtins_abi (void) -{ - tree ms_va_ref, sysv_va_ref; - tree fnvoid_va_end_ms, fnvoid_va_end_sysv; - tree fnvoid_va_start_ms, fnvoid_va_start_sysv; - tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv; - tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE; - - if (!TARGET_64BIT) - return; - fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE); - fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE); - ms_va_ref = build_reference_type (ms_va_list_type_node); - sysv_va_ref = - build_pointer_type (TREE_TYPE (sysv_va_list_type_node)); - - fnvoid_va_end_ms = - build_function_type_list (void_type_node, ms_va_ref, NULL_TREE); - fnvoid_va_start_ms = - build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE); - fnvoid_va_end_sysv = - build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE); - fnvoid_va_start_sysv = - build_varargs_function_type_list (void_type_node, sysv_va_ref, - NULL_TREE); - fnvoid_va_copy_ms = - build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node, - NULL_TREE); - fnvoid_va_copy_sysv = - build_function_type_list (void_type_node, sysv_va_ref, - sysv_va_ref, NULL_TREE); - - add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms, - BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms); - add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms, - BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms); - add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms, - BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms); - add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv, - BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv); - add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv, - BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv); - add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv, - BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv); -} - -static void -ix86_init_builtins (void) -{ - tree float128_type_node = make_node (REAL_TYPE); - tree ftype, decl; - - /* The __float80 type. */ - if (TYPE_MODE (long_double_type_node) == XFmode) - (*lang_hooks.types.register_builtin_type) (long_double_type_node, - "__float80"); - else - { - /* The __float80 type. */ - tree float80_type_node = make_node (REAL_TYPE); - - TYPE_PRECISION (float80_type_node) = 80; - layout_type (float80_type_node); - (*lang_hooks.types.register_builtin_type) (float80_type_node, - "__float80"); - } - - /* The __float128 type. */ - TYPE_PRECISION (float128_type_node) = 128; - layout_type (float128_type_node); - (*lang_hooks.types.register_builtin_type) (float128_type_node, - "__float128"); - - /* TFmode support builtins. */ - ftype = build_function_type (float128_type_node, void_list_node); - decl = add_builtin_function ("__builtin_infq", ftype, - IX86_BUILTIN_INFQ, BUILT_IN_MD, - NULL, NULL_TREE); - ix86_builtins[(int) IX86_BUILTIN_INFQ] = decl; - - /* We will expand them to normal call if SSE2 isn't available since - they are used by libgcc. */ - ftype = build_function_type_list (float128_type_node, - float128_type_node, - NULL_TREE); - decl = add_builtin_function ("__builtin_fabsq", ftype, - IX86_BUILTIN_FABSQ, BUILT_IN_MD, - "__fabstf2", NULL_TREE); - ix86_builtins[(int) IX86_BUILTIN_FABSQ] = decl; - TREE_READONLY (decl) = 1; - - ftype = build_function_type_list (float128_type_node, - float128_type_node, - float128_type_node, - NULL_TREE); - decl = add_builtin_function ("__builtin_copysignq", ftype, - IX86_BUILTIN_COPYSIGNQ, BUILT_IN_MD, - "__copysigntf3", NULL_TREE); - ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = decl; - TREE_READONLY (decl) = 1; - - ix86_init_mmx_sse_builtins (); - if (TARGET_64BIT) - ix86_init_builtins_va_builtins_abi (); -} - -/* Errors in the source file can cause expand_expr to return const0_rtx - where we expect a vector. To avoid crashing, use one of the vector - clear instructions. */ -static rtx -safe_vector_operand (rtx x, enum machine_mode mode) -{ - if (x == const0_rtx) - x = CONST0_RTX (mode); - return x; -} - -/* Subroutine of ix86_expand_builtin to take care of binop insns. */ - -static rtx -ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target) -{ - rtx pat; - tree arg0 = CALL_EXPR_ARG (exp, 0); - tree arg1 = CALL_EXPR_ARG (exp, 1); - rtx op0 = expand_normal (arg0); - rtx op1 = expand_normal (arg1); - enum machine_mode tmode = insn_data[icode].operand[0].mode; - enum machine_mode mode0 = insn_data[icode].operand[1].mode; - enum machine_mode mode1 = insn_data[icode].operand[2].mode; - - if (VECTOR_MODE_P (mode0)) - op0 = safe_vector_operand (op0, mode0); - if (VECTOR_MODE_P (mode1)) - op1 = safe_vector_operand (op1, mode1); - - if (optimize || !target - || GET_MODE (target) != tmode - || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) - target = gen_reg_rtx (tmode); - - if (GET_MODE (op1) == SImode && mode1 == TImode) - { - rtx x = gen_reg_rtx (V4SImode); - emit_insn (gen_sse2_loadd (x, op1)); - op1 = gen_lowpart (TImode, x); - } - - if (!(*insn_data[icode].operand[1].predicate) (op0, mode0)) - op0 = copy_to_mode_reg (mode0, op0); - if (!(*insn_data[icode].operand[2].predicate) (op1, mode1)) - op1 = copy_to_mode_reg (mode1, op1); - - pat = GEN_FCN (icode) (target, op0, op1); - if (! pat) - return 0; - - emit_insn (pat); - - return target; -} - -/* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */ - -static rtx -ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target, - enum multi_arg_type m_type, - enum insn_code sub_code) -{ - rtx pat; - int i; - int nargs; - bool comparison_p = false; - bool tf_p = false; - bool last_arg_constant = false; - int num_memory = 0; - struct { - rtx op; - enum machine_mode mode; - } args[4]; - - enum machine_mode tmode = insn_data[icode].operand[0].mode; - - switch (m_type) - { - case MULTI_ARG_3_SF: - case MULTI_ARG_3_DF: - case MULTI_ARG_3_DI: - case MULTI_ARG_3_SI: - case MULTI_ARG_3_SI_DI: - case MULTI_ARG_3_HI: - case MULTI_ARG_3_HI_SI: - case MULTI_ARG_3_QI: - case MULTI_ARG_3_PERMPS: - case MULTI_ARG_3_PERMPD: - nargs = 3; - break; - - case MULTI_ARG_2_SF: - case MULTI_ARG_2_DF: - case MULTI_ARG_2_DI: - case MULTI_ARG_2_SI: - case MULTI_ARG_2_HI: - case MULTI_ARG_2_QI: - nargs = 2; - break; - - case MULTI_ARG_2_DI_IMM: - case MULTI_ARG_2_SI_IMM: - case MULTI_ARG_2_HI_IMM: - case MULTI_ARG_2_QI_IMM: - nargs = 2; - last_arg_constant = true; - break; - - case MULTI_ARG_1_SF: - case MULTI_ARG_1_DF: - case MULTI_ARG_1_DI: - case MULTI_ARG_1_SI: - case MULTI_ARG_1_HI: - case MULTI_ARG_1_QI: - case MULTI_ARG_1_SI_DI: - case MULTI_ARG_1_HI_DI: - case MULTI_ARG_1_HI_SI: - case MULTI_ARG_1_QI_DI: - case MULTI_ARG_1_QI_SI: - case MULTI_ARG_1_QI_HI: - case MULTI_ARG_1_PH2PS: - case MULTI_ARG_1_PS2PH: - nargs = 1; - break; - - case MULTI_ARG_2_SF_CMP: - case MULTI_ARG_2_DF_CMP: - case MULTI_ARG_2_DI_CMP: - case MULTI_ARG_2_SI_CMP: - case MULTI_ARG_2_HI_CMP: - case MULTI_ARG_2_QI_CMP: - nargs = 2; - comparison_p = true; - break; - - case MULTI_ARG_2_SF_TF: - case MULTI_ARG_2_DF_TF: - case MULTI_ARG_2_DI_TF: - case MULTI_ARG_2_SI_TF: - case MULTI_ARG_2_HI_TF: - case MULTI_ARG_2_QI_TF: - nargs = 2; - tf_p = true; - break; - - case MULTI_ARG_UNKNOWN: - default: - gcc_unreachable (); - } - - if (optimize || !target - || GET_MODE (target) != tmode - || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) - target = gen_reg_rtx (tmode); - - gcc_assert (nargs <= 4); - - for (i = 0; i < nargs; i++) - { - tree arg = CALL_EXPR_ARG (exp, i); - rtx op = expand_normal (arg); - int adjust = (comparison_p) ? 1 : 0; - enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode; - - if (last_arg_constant && i == nargs-1) - { - if (GET_CODE (op) != CONST_INT) - { - error ("last argument must be an immediate"); - return gen_reg_rtx (tmode); - } - } - else - { - if (VECTOR_MODE_P (mode)) - op = safe_vector_operand (op, mode); - - /* If we aren't optimizing, only allow one memory operand to be - generated. */ - if (memory_operand (op, mode)) - num_memory++; - - gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode); - - if (optimize - || ! (*insn_data[icode].operand[i+adjust+1].predicate) (op, mode) - || num_memory > 1) - op = force_reg (mode, op); - } - - args[i].op = op; - args[i].mode = mode; - } - - switch (nargs) - { - case 1: - pat = GEN_FCN (icode) (target, args[0].op); - break; - - case 2: - if (tf_p) - pat = GEN_FCN (icode) (target, args[0].op, args[1].op, - GEN_INT ((int)sub_code)); - else if (! comparison_p) - pat = GEN_FCN (icode) (target, args[0].op, args[1].op); - else - { - rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target), - args[0].op, - args[1].op); - - pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op); - } - break; - - case 3: - pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op); - break; - - default: - gcc_unreachable (); - } - - if (! pat) - return 0; - - emit_insn (pat); - return target; -} - -/* Subroutine of ix86_expand_args_builtin to take care of scalar unop - insns with vec_merge. */ - -static rtx -ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp, - rtx target) -{ - rtx pat; - tree arg0 = CALL_EXPR_ARG (exp, 0); - rtx op1, op0 = expand_normal (arg0); - enum machine_mode tmode = insn_data[icode].operand[0].mode; - enum machine_mode mode0 = insn_data[icode].operand[1].mode; - - if (optimize || !target - || GET_MODE (target) != tmode - || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) - target = gen_reg_rtx (tmode); - - if (VECTOR_MODE_P (mode0)) - op0 = safe_vector_operand (op0, mode0); - - if ((optimize && !register_operand (op0, mode0)) - || ! (*insn_data[icode].operand[1].predicate) (op0, mode0)) - op0 = copy_to_mode_reg (mode0, op0); - - op1 = op0; - if (! (*insn_data[icode].operand[2].predicate) (op1, mode0)) - op1 = copy_to_mode_reg (mode0, op1); - - pat = GEN_FCN (icode) (target, op0, op1); - if (! pat) - return 0; - emit_insn (pat); - return target; -} - -/* Subroutine of ix86_expand_builtin to take care of comparison insns. */ - -static rtx -ix86_expand_sse_compare (const struct builtin_description *d, - tree exp, rtx target, bool swap) -{ - rtx pat; - tree arg0 = CALL_EXPR_ARG (exp, 0); - tree arg1 = CALL_EXPR_ARG (exp, 1); - rtx op0 = expand_normal (arg0); - rtx op1 = expand_normal (arg1); - rtx op2; - enum machine_mode tmode = insn_data[d->icode].operand[0].mode; - enum machine_mode mode0 = insn_data[d->icode].operand[1].mode; - enum machine_mode mode1 = insn_data[d->icode].operand[2].mode; - enum rtx_code comparison = d->comparison; - - if (VECTOR_MODE_P (mode0)) - op0 = safe_vector_operand (op0, mode0); - if (VECTOR_MODE_P (mode1)) - op1 = safe_vector_operand (op1, mode1); - - /* Swap operands if we have a comparison that isn't available in - hardware. */ - if (swap) - { - rtx tmp = gen_reg_rtx (mode1); - emit_move_insn (tmp, op1); - op1 = op0; - op0 = tmp; - } - - if (optimize || !target - || GET_MODE (target) != tmode - || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode)) - target = gen_reg_rtx (tmode); - - if ((optimize && !register_operand (op0, mode0)) - || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0)) - op0 = copy_to_mode_reg (mode0, op0); - if ((optimize && !register_operand (op1, mode1)) - || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1)) - op1 = copy_to_mode_reg (mode1, op1); - - op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1); - pat = GEN_FCN (d->icode) (target, op0, op1, op2); - if (! pat) - return 0; - emit_insn (pat); - return target; -} - -/* Subroutine of ix86_expand_builtin to take care of comi insns. */ - -static rtx -ix86_expand_sse_comi (const struct builtin_description *d, tree exp, - rtx target) -{ - rtx pat; - tree arg0 = CALL_EXPR_ARG (exp, 0); - tree arg1 = CALL_EXPR_ARG (exp, 1); - rtx op0 = expand_normal (arg0); - rtx op1 = expand_normal (arg1); - enum machine_mode mode0 = insn_data[d->icode].operand[0].mode; - enum machine_mode mode1 = insn_data[d->icode].operand[1].mode; - enum rtx_code comparison = d->comparison; - - if (VECTOR_MODE_P (mode0)) - op0 = safe_vector_operand (op0, mode0); - if (VECTOR_MODE_P (mode1)) - op1 = safe_vector_operand (op1, mode1); - - /* Swap operands if we have a comparison that isn't available in - hardware. */ - if (d->flag & BUILTIN_DESC_SWAP_OPERANDS) - { - rtx tmp = op1; - op1 = op0; - op0 = tmp; - } - - target = gen_reg_rtx (SImode); - emit_move_insn (target, const0_rtx); - target = gen_rtx_SUBREG (QImode, target, 0); - - if ((optimize && !register_operand (op0, mode0)) - || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0)) - op0 = copy_to_mode_reg (mode0, op0); - if ((optimize && !register_operand (op1, mode1)) - || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1)) - op1 = copy_to_mode_reg (mode1, op1); - - pat = GEN_FCN (d->icode) (op0, op1); - if (! pat) - return 0; - emit_insn (pat); - emit_insn (gen_rtx_SET (VOIDmode, - gen_rtx_STRICT_LOW_PART (VOIDmode, target), - gen_rtx_fmt_ee (comparison, QImode, - SET_DEST (pat), - const0_rtx))); - - return SUBREG_REG (target); -} - -/* Subroutine of ix86_expand_builtin to take care of ptest insns. */ - -static rtx -ix86_expand_sse_ptest (const struct builtin_description *d, tree exp, - rtx target) -{ - rtx pat; - tree arg0 = CALL_EXPR_ARG (exp, 0); - tree arg1 = CALL_EXPR_ARG (exp, 1); - rtx op0 = expand_normal (arg0); - rtx op1 = expand_normal (arg1); - enum machine_mode mode0 = insn_data[d->icode].operand[0].mode; - enum machine_mode mode1 = insn_data[d->icode].operand[1].mode; - enum rtx_code comparison = d->comparison; - - if (VECTOR_MODE_P (mode0)) - op0 = safe_vector_operand (op0, mode0); - if (VECTOR_MODE_P (mode1)) - op1 = safe_vector_operand (op1, mode1); - - target = gen_reg_rtx (SImode); - emit_move_insn (target, const0_rtx); - target = gen_rtx_SUBREG (QImode, target, 0); - - if ((optimize && !register_operand (op0, mode0)) - || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0)) - op0 = copy_to_mode_reg (mode0, op0); - if ((optimize && !register_operand (op1, mode1)) - || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1)) - op1 = copy_to_mode_reg (mode1, op1); - - pat = GEN_FCN (d->icode) (op0, op1); - if (! pat) - return 0; - emit_insn (pat); - emit_insn (gen_rtx_SET (VOIDmode, - gen_rtx_STRICT_LOW_PART (VOIDmode, target), - gen_rtx_fmt_ee (comparison, QImode, - SET_DEST (pat), - const0_rtx))); - - return SUBREG_REG (target); -} - -/* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */ - -static rtx -ix86_expand_sse_pcmpestr (const struct builtin_description *d, - tree exp, rtx target) -{ - rtx pat; - tree arg0 = CALL_EXPR_ARG (exp, 0); - tree arg1 = CALL_EXPR_ARG (exp, 1); - tree arg2 = CALL_EXPR_ARG (exp, 2); - tree arg3 = CALL_EXPR_ARG (exp, 3); - tree arg4 = CALL_EXPR_ARG (exp, 4); - rtx scratch0, scratch1; - rtx op0 = expand_normal (arg0); - rtx op1 = expand_normal (arg1); - rtx op2 = expand_normal (arg2); - rtx op3 = expand_normal (arg3); - rtx op4 = expand_normal (arg4); - enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm; - - tmode0 = insn_data[d->icode].operand[0].mode; - tmode1 = insn_data[d->icode].operand[1].mode; - modev2 = insn_data[d->icode].operand[2].mode; - modei3 = insn_data[d->icode].operand[3].mode; - modev4 = insn_data[d->icode].operand[4].mode; - modei5 = insn_data[d->icode].operand[5].mode; - modeimm = insn_data[d->icode].operand[6].mode; - - if (VECTOR_MODE_P (modev2)) - op0 = safe_vector_operand (op0, modev2); - if (VECTOR_MODE_P (modev4)) - op2 = safe_vector_operand (op2, modev4); - - if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2)) - op0 = copy_to_mode_reg (modev2, op0); - if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3)) - op1 = copy_to_mode_reg (modei3, op1); - if ((optimize && !register_operand (op2, modev4)) - || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4)) - op2 = copy_to_mode_reg (modev4, op2); - if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5)) - op3 = copy_to_mode_reg (modei5, op3); - - if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm)) - { - error ("the fifth argument must be a 8-bit immediate"); - return const0_rtx; - } - - if (d->code == IX86_BUILTIN_PCMPESTRI128) - { - if (optimize || !target - || GET_MODE (target) != tmode0 - || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0)) - target = gen_reg_rtx (tmode0); - - scratch1 = gen_reg_rtx (tmode1); - - pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4); - } - else if (d->code == IX86_BUILTIN_PCMPESTRM128) - { - if (optimize || !target - || GET_MODE (target) != tmode1 - || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1)) - target = gen_reg_rtx (tmode1); - - scratch0 = gen_reg_rtx (tmode0); - - pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4); - } - else - { - gcc_assert (d->flag); - - scratch0 = gen_reg_rtx (tmode0); - scratch1 = gen_reg_rtx (tmode1); - - pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4); - } - - if (! pat) - return 0; - - emit_insn (pat); - - if (d->flag) - { - target = gen_reg_rtx (SImode); - emit_move_insn (target, const0_rtx); - target = gen_rtx_SUBREG (QImode, target, 0); - - emit_insn - (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target), - gen_rtx_fmt_ee (EQ, QImode, - gen_rtx_REG ((enum machine_mode) d->flag, - FLAGS_REG), - const0_rtx))); - return SUBREG_REG (target); - } - else - return target; -} - - -/* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */ - -static rtx -ix86_expand_sse_pcmpistr (const struct builtin_description *d, - tree exp, rtx target) -{ - rtx pat; - tree arg0 = CALL_EXPR_ARG (exp, 0); - tree arg1 = CALL_EXPR_ARG (exp, 1); - tree arg2 = CALL_EXPR_ARG (exp, 2); - rtx scratch0, scratch1; - rtx op0 = expand_normal (arg0); - rtx op1 = expand_normal (arg1); - rtx op2 = expand_normal (arg2); - enum machine_mode tmode0, tmode1, modev2, modev3, modeimm; - - tmode0 = insn_data[d->icode].operand[0].mode; - tmode1 = insn_data[d->icode].operand[1].mode; - modev2 = insn_data[d->icode].operand[2].mode; - modev3 = insn_data[d->icode].operand[3].mode; - modeimm = insn_data[d->icode].operand[4].mode; - - if (VECTOR_MODE_P (modev2)) - op0 = safe_vector_operand (op0, modev2); - if (VECTOR_MODE_P (modev3)) - op1 = safe_vector_operand (op1, modev3); - - if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2)) - op0 = copy_to_mode_reg (modev2, op0); - if ((optimize && !register_operand (op1, modev3)) - || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3)) - op1 = copy_to_mode_reg (modev3, op1); - - if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm)) - { - error ("the third argument must be a 8-bit immediate"); - return const0_rtx; - } - - if (d->code == IX86_BUILTIN_PCMPISTRI128) - { - if (optimize || !target - || GET_MODE (target) != tmode0 - || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0)) - target = gen_reg_rtx (tmode0); - - scratch1 = gen_reg_rtx (tmode1); - - pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2); - } - else if (d->code == IX86_BUILTIN_PCMPISTRM128) - { - if (optimize || !target - || GET_MODE (target) != tmode1 - || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1)) - target = gen_reg_rtx (tmode1); - - scratch0 = gen_reg_rtx (tmode0); - - pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2); - } - else - { - gcc_assert (d->flag); - - scratch0 = gen_reg_rtx (tmode0); - scratch1 = gen_reg_rtx (tmode1); - - pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2); - } - - if (! pat) - return 0; - - emit_insn (pat); - - if (d->flag) - { - target = gen_reg_rtx (SImode); - emit_move_insn (target, const0_rtx); - target = gen_rtx_SUBREG (QImode, target, 0); - - emit_insn - (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target), - gen_rtx_fmt_ee (EQ, QImode, - gen_rtx_REG ((enum machine_mode) d->flag, - FLAGS_REG), - const0_rtx))); - return SUBREG_REG (target); - } - else - return target; -} - -/* Subroutine of ix86_expand_builtin to take care of insns with - variable number of operands. */ - -static rtx -ix86_expand_args_builtin (const struct builtin_description *d, - tree exp, rtx target) -{ - rtx pat, real_target; - unsigned int i, nargs; - unsigned int nargs_constant = 0; - int num_memory = 0; - struct - { - rtx op; - enum machine_mode mode; - } args[4]; - bool last_arg_count = false; - enum insn_code icode = d->icode; - const struct insn_data *insn_p = &insn_data[icode]; - enum machine_mode tmode = insn_p->operand[0].mode; - enum machine_mode rmode = VOIDmode; - bool swap = false; - enum rtx_code comparison = d->comparison; - - switch ((enum ix86_builtin_type) d->flag) - { - case INT_FTYPE_V8SF_V8SF_PTEST: - case INT_FTYPE_V4DI_V4DI_PTEST: - case INT_FTYPE_V4DF_V4DF_PTEST: - case INT_FTYPE_V4SF_V4SF_PTEST: - case INT_FTYPE_V2DI_V2DI_PTEST: - case INT_FTYPE_V2DF_V2DF_PTEST: - return ix86_expand_sse_ptest (d, exp, target); - case FLOAT128_FTYPE_FLOAT128: - case FLOAT_FTYPE_FLOAT: - case INT64_FTYPE_V4SF: - case INT64_FTYPE_V2DF: - case INT_FTYPE_V16QI: - case INT_FTYPE_V8QI: - case INT_FTYPE_V8SF: - case INT_FTYPE_V4DF: - case INT_FTYPE_V4SF: - case INT_FTYPE_V2DF: - case V16QI_FTYPE_V16QI: - case V8SI_FTYPE_V8SF: - case V8SI_FTYPE_V4SI: - case V8HI_FTYPE_V8HI: - case V8HI_FTYPE_V16QI: - case V8QI_FTYPE_V8QI: - case V8SF_FTYPE_V8SF: - case V8SF_FTYPE_V8SI: - case V8SF_FTYPE_V4SF: - case V4SI_FTYPE_V4SI: - case V4SI_FTYPE_V16QI: - case V4SI_FTYPE_V4SF: - case V4SI_FTYPE_V8SI: - case V4SI_FTYPE_V8HI: - case V4SI_FTYPE_V4DF: - case V4SI_FTYPE_V2DF: - case V4HI_FTYPE_V4HI: - case V4DF_FTYPE_V4DF: - case V4DF_FTYPE_V4SI: - case V4DF_FTYPE_V4SF: - case V4DF_FTYPE_V2DF: - case V4SF_FTYPE_V4SF: - case V4SF_FTYPE_V4SI: - case V4SF_FTYPE_V8SF: - case V4SF_FTYPE_V4DF: - case V4SF_FTYPE_V2DF: - case V2DI_FTYPE_V2DI: - case V2DI_FTYPE_V16QI: - case V2DI_FTYPE_V8HI: - case V2DI_FTYPE_V4SI: - case V2DF_FTYPE_V2DF: - case V2DF_FTYPE_V4SI: - case V2DF_FTYPE_V4DF: - case V2DF_FTYPE_V4SF: - case V2DF_FTYPE_V2SI: - case V2SI_FTYPE_V2SI: - case V2SI_FTYPE_V4SF: - case V2SI_FTYPE_V2SF: - case V2SI_FTYPE_V2DF: - case V2SF_FTYPE_V2SF: - case V2SF_FTYPE_V2SI: - nargs = 1; - break; - case V4SF_FTYPE_V4SF_VEC_MERGE: - case V2DF_FTYPE_V2DF_VEC_MERGE: - return ix86_expand_unop_vec_merge_builtin (icode, exp, target); - case FLOAT128_FTYPE_FLOAT128_FLOAT128: - case V16QI_FTYPE_V16QI_V16QI: - case V16QI_FTYPE_V8HI_V8HI: - case V8QI_FTYPE_V8QI_V8QI: - case V8QI_FTYPE_V4HI_V4HI: - case V8HI_FTYPE_V8HI_V8HI: - case V8HI_FTYPE_V16QI_V16QI: - case V8HI_FTYPE_V4SI_V4SI: - case V8SF_FTYPE_V8SF_V8SF: - case V8SF_FTYPE_V8SF_V8SI: - case V4SI_FTYPE_V4SI_V4SI: - case V4SI_FTYPE_V8HI_V8HI: - case V4SI_FTYPE_V4SF_V4SF: - case V4SI_FTYPE_V2DF_V2DF: - case V4HI_FTYPE_V4HI_V4HI: - case V4HI_FTYPE_V8QI_V8QI: - case V4HI_FTYPE_V2SI_V2SI: - case V4DF_FTYPE_V4DF_V4DF: - case V4DF_FTYPE_V4DF_V4DI: - case V4SF_FTYPE_V4SF_V4SF: - case V4SF_FTYPE_V4SF_V4SI: - case V4SF_FTYPE_V4SF_V2SI: - case V4SF_FTYPE_V4SF_V2DF: - case V4SF_FTYPE_V4SF_DI: - case V4SF_FTYPE_V4SF_SI: - case V2DI_FTYPE_V2DI_V2DI: - case V2DI_FTYPE_V16QI_V16QI: - case V2DI_FTYPE_V4SI_V4SI: - case V2DI_FTYPE_V2DI_V16QI: - case V2DI_FTYPE_V2DF_V2DF: - case V2SI_FTYPE_V2SI_V2SI: - case V2SI_FTYPE_V4HI_V4HI: - case V2SI_FTYPE_V2SF_V2SF: - case V2DF_FTYPE_V2DF_V2DF: - case V2DF_FTYPE_V2DF_V4SF: - case V2DF_FTYPE_V2DF_V2DI: - case V2DF_FTYPE_V2DF_DI: - case V2DF_FTYPE_V2DF_SI: - case V2SF_FTYPE_V2SF_V2SF: - case V1DI_FTYPE_V1DI_V1DI: - case V1DI_FTYPE_V8QI_V8QI: - case V1DI_FTYPE_V2SI_V2SI: - if (comparison == UNKNOWN) - return ix86_expand_binop_builtin (icode, exp, target); - nargs = 2; - break; - case V4SF_FTYPE_V4SF_V4SF_SWAP: - case V2DF_FTYPE_V2DF_V2DF_SWAP: - gcc_assert (comparison != UNKNOWN); - nargs = 2; - swap = true; - break; - case V8HI_FTYPE_V8HI_V8HI_COUNT: - case V8HI_FTYPE_V8HI_SI_COUNT: - case V4SI_FTYPE_V4SI_V4SI_COUNT: - case V4SI_FTYPE_V4SI_SI_COUNT: - case V4HI_FTYPE_V4HI_V4HI_COUNT: - case V4HI_FTYPE_V4HI_SI_COUNT: - case V2DI_FTYPE_V2DI_V2DI_COUNT: - case V2DI_FTYPE_V2DI_SI_COUNT: - case V2SI_FTYPE_V2SI_V2SI_COUNT: - case V2SI_FTYPE_V2SI_SI_COUNT: - case V1DI_FTYPE_V1DI_V1DI_COUNT: - case V1DI_FTYPE_V1DI_SI_COUNT: - nargs = 2; - last_arg_count = true; - break; - case UINT64_FTYPE_UINT64_UINT64: - case UINT_FTYPE_UINT_UINT: - case UINT_FTYPE_UINT_USHORT: - case UINT_FTYPE_UINT_UCHAR: - nargs = 2; - break; - case V2DI2TI_FTYPE_V2DI_INT: - nargs = 2; - rmode = V2DImode; - nargs_constant = 1; - break; - case V8HI_FTYPE_V8HI_INT: - case V8SF_FTYPE_V8SF_INT: - case V4SI_FTYPE_V4SI_INT: - case V4SI_FTYPE_V8SI_INT: - case V4HI_FTYPE_V4HI_INT: - case V4DF_FTYPE_V4DF_INT: - case V4SF_FTYPE_V4SF_INT: - case V4SF_FTYPE_V8SF_INT: - case V2DI_FTYPE_V2DI_INT: - case V2DF_FTYPE_V2DF_INT: - case V2DF_FTYPE_V4DF_INT: - nargs = 2; - nargs_constant = 1; - break; - case V16QI_FTYPE_V16QI_V16QI_V16QI: - case V8SF_FTYPE_V8SF_V8SF_V8SF: - case V4DF_FTYPE_V4DF_V4DF_V4DF: - case V4SF_FTYPE_V4SF_V4SF_V4SF: - case V2DF_FTYPE_V2DF_V2DF_V2DF: - nargs = 3; - break; - case V16QI_FTYPE_V16QI_V16QI_INT: - case V8HI_FTYPE_V8HI_V8HI_INT: - case V8SI_FTYPE_V8SI_V8SI_INT: - case V8SI_FTYPE_V8SI_V4SI_INT: - case V8SF_FTYPE_V8SF_V8SF_INT: - case V8SF_FTYPE_V8SF_V4SF_INT: - case V4SI_FTYPE_V4SI_V4SI_INT: - case V4DF_FTYPE_V4DF_V4DF_INT: - case V4DF_FTYPE_V4DF_V2DF_INT: - case V4SF_FTYPE_V4SF_V4SF_INT: - case V2DI_FTYPE_V2DI_V2DI_INT: - case V2DF_FTYPE_V2DF_V2DF_INT: - nargs = 3; - nargs_constant = 1; - break; - case V2DI2TI_FTYPE_V2DI_V2DI_INT: - nargs = 3; - rmode = V2DImode; - nargs_constant = 1; - break; - case V1DI2DI_FTYPE_V1DI_V1DI_INT: - nargs = 3; - rmode = DImode; - nargs_constant = 1; - break; - case V2DI_FTYPE_V2DI_UINT_UINT: - nargs = 3; - nargs_constant = 2; - break; - case V2DI_FTYPE_V2DI_V2DI_UINT_UINT: - nargs = 4; - nargs_constant = 2; - break; - default: - gcc_unreachable (); - } - - gcc_assert (nargs <= ARRAY_SIZE (args)); - - if (comparison != UNKNOWN) - { - gcc_assert (nargs == 2); - return ix86_expand_sse_compare (d, exp, target, swap); - } - - if (rmode == VOIDmode || rmode == tmode) - { - if (optimize - || target == 0 - || GET_MODE (target) != tmode - || ! (*insn_p->operand[0].predicate) (target, tmode)) - target = gen_reg_rtx (tmode); - real_target = target; - } - else - { - target = gen_reg_rtx (rmode); - real_target = simplify_gen_subreg (tmode, target, rmode, 0); - } - - for (i = 0; i < nargs; i++) - { - tree arg = CALL_EXPR_ARG (exp, i); - rtx op = expand_normal (arg); - enum machine_mode mode = insn_p->operand[i + 1].mode; - bool match = (*insn_p->operand[i + 1].predicate) (op, mode); - - if (last_arg_count && (i + 1) == nargs) - { - /* SIMD shift insns take either an 8-bit immediate or - register as count. But builtin functions take int as - count. If count doesn't match, we put it in register. */ - if (!match) - { - op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0); - if (!(*insn_p->operand[i + 1].predicate) (op, mode)) - op = copy_to_reg (op); - } - } - else if ((nargs - i) <= nargs_constant) - { - if (!match) - switch (icode) - { - case CODE_FOR_sse4_1_roundpd: - case CODE_FOR_sse4_1_roundps: - case CODE_FOR_sse4_1_roundsd: - case CODE_FOR_sse4_1_roundss: - case CODE_FOR_sse4_1_blendps: - case CODE_FOR_avx_blendpd256: - case CODE_FOR_avx_vpermilv4df: - case CODE_FOR_avx_roundpd256: - case CODE_FOR_avx_roundps256: - error ("the last argument must be a 4-bit immediate"); - return const0_rtx; - - case CODE_FOR_sse4_1_blendpd: - case CODE_FOR_avx_vpermilv2df: - error ("the last argument must be a 2-bit immediate"); - return const0_rtx; - - case CODE_FOR_avx_vextractf128v4df: - case CODE_FOR_avx_vextractf128v8sf: - case CODE_FOR_avx_vextractf128v8si: - case CODE_FOR_avx_vinsertf128v4df: - case CODE_FOR_avx_vinsertf128v8sf: - case CODE_FOR_avx_vinsertf128v8si: - error ("the last argument must be a 1-bit immediate"); - return const0_rtx; - - case CODE_FOR_avx_cmpsdv2df3: - case CODE_FOR_avx_cmpssv4sf3: - case CODE_FOR_avx_cmppdv2df3: - case CODE_FOR_avx_cmppsv4sf3: - case CODE_FOR_avx_cmppdv4df3: - case CODE_FOR_avx_cmppsv8sf3: - error ("the last argument must be a 5-bit immediate"); - return const0_rtx; - - default: - switch (nargs_constant) - { - case 2: - if ((nargs - i) == nargs_constant) - { - error ("the next to last argument must be an 8-bit immediate"); - break; - } - case 1: - error ("the last argument must be an 8-bit immediate"); - break; - default: - gcc_unreachable (); - } - return const0_rtx; - } - } - else - { - if (VECTOR_MODE_P (mode)) - op = safe_vector_operand (op, mode); - - /* If we aren't optimizing, only allow one memory operand to - be generated. */ - if (memory_operand (op, mode)) - num_memory++; - - if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode) - { - if (optimize || !match || num_memory > 1) - op = copy_to_mode_reg (mode, op); - } - else - { - op = copy_to_reg (op); - op = simplify_gen_subreg (mode, op, GET_MODE (op), 0); - } - } - - args[i].op = op; - args[i].mode = mode; - } - - switch (nargs) - { - case 1: - pat = GEN_FCN (icode) (real_target, args[0].op); - break; - case 2: - pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op); - break; - case 3: - pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op, - args[2].op); - break; - case 4: - pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op, - args[2].op, args[3].op); - break; - default: - gcc_unreachable (); - } - - if (! pat) - return 0; - - emit_insn (pat); - return target; -} - -/* Subroutine of ix86_expand_builtin to take care of special insns - with variable number of operands. */ - -static rtx -ix86_expand_special_args_builtin (const struct builtin_description *d, - tree exp, rtx target) -{ - tree arg; - rtx pat, op; - unsigned int i, nargs, arg_adjust, memory; - struct - { - rtx op; - enum machine_mode mode; - } args[3]; - enum insn_code icode = d->icode; - bool last_arg_constant = false; - const struct insn_data *insn_p = &insn_data[icode]; - enum machine_mode tmode = insn_p->operand[0].mode; - enum { load, store } klass; - - switch ((enum ix86_special_builtin_type) d->flag) - { - case VOID_FTYPE_VOID: - emit_insn (GEN_FCN (icode) (target)); - return 0; - case V2DI_FTYPE_PV2DI: - case V32QI_FTYPE_PCCHAR: - case V16QI_FTYPE_PCCHAR: - case V8SF_FTYPE_PCV4SF: - case V8SF_FTYPE_PCFLOAT: - case V4SF_FTYPE_PCFLOAT: - case V4DF_FTYPE_PCV2DF: - case V4DF_FTYPE_PCDOUBLE: - case V2DF_FTYPE_PCDOUBLE: - case VOID_FTYPE_PVOID: - nargs = 1; - klass = load; - memory = 0; - break; - case VOID_FTYPE_PV2SF_V4SF: - case VOID_FTYPE_PV4DI_V4DI: - case VOID_FTYPE_PV2DI_V2DI: - case VOID_FTYPE_PCHAR_V32QI: - case VOID_FTYPE_PCHAR_V16QI: - case VOID_FTYPE_PFLOAT_V8SF: - case VOID_FTYPE_PFLOAT_V4SF: - case VOID_FTYPE_PDOUBLE_V4DF: - case VOID_FTYPE_PDOUBLE_V2DF: - case VOID_FTYPE_PDI_DI: - case VOID_FTYPE_PINT_INT: - nargs = 1; - klass = store; - /* Reserve memory operand for target. */ - memory = ARRAY_SIZE (args); - break; - case V4SF_FTYPE_V4SF_PCV2SF: - case V2DF_FTYPE_V2DF_PCDOUBLE: - nargs = 2; - klass = load; - memory = 1; - break; - case V8SF_FTYPE_PCV8SF_V8SF: - case V4DF_FTYPE_PCV4DF_V4DF: - case V4SF_FTYPE_PCV4SF_V4SF: - case V2DF_FTYPE_PCV2DF_V2DF: - nargs = 2; - klass = load; - memory = 0; - break; - case VOID_FTYPE_PV8SF_V8SF_V8SF: - case VOID_FTYPE_PV4DF_V4DF_V4DF: - case VOID_FTYPE_PV4SF_V4SF_V4SF: - case VOID_FTYPE_PV2DF_V2DF_V2DF: - nargs = 2; - klass = store; - /* Reserve memory operand for target. */ - memory = ARRAY_SIZE (args); - break; - case VOID_FTYPE_UINT_UINT_UINT: - case VOID_FTYPE_UINT64_UINT_UINT: - case UCHAR_FTYPE_UINT_UINT_UINT: - case UCHAR_FTYPE_UINT64_UINT_UINT: - nargs = 3; - klass = load; - memory = ARRAY_SIZE (args); - last_arg_constant = true; - break; - default: - gcc_unreachable (); - } - - gcc_assert (nargs <= ARRAY_SIZE (args)); - - if (klass == store) - { - arg = CALL_EXPR_ARG (exp, 0); - op = expand_normal (arg); - gcc_assert (target == 0); - target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op)); - arg_adjust = 1; - } - else - { - arg_adjust = 0; - if (optimize - || target == 0 - || GET_MODE (target) != tmode - || ! (*insn_p->operand[0].predicate) (target, tmode)) - target = gen_reg_rtx (tmode); - } - - for (i = 0; i < nargs; i++) - { - enum machine_mode mode = insn_p->operand[i + 1].mode; - bool match; - - arg = CALL_EXPR_ARG (exp, i + arg_adjust); - op = expand_normal (arg); - match = (*insn_p->operand[i + 1].predicate) (op, mode); - - if (last_arg_constant && (i + 1) == nargs) - { - if (!match) - { - if (icode == CODE_FOR_lwp_lwpvalsi3 - || icode == CODE_FOR_lwp_lwpinssi3 - || icode == CODE_FOR_lwp_lwpvaldi3 - || icode == CODE_FOR_lwp_lwpinsdi3) - error ("the last argument must be a 32-bit immediate"); - else - error ("the last argument must be an 8-bit immediate"); - return const0_rtx; - } - } - else - { - if (i == memory) - { - /* This must be the memory operand. */ - op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op)); - gcc_assert (GET_MODE (op) == mode - || GET_MODE (op) == VOIDmode); - } - else - { - /* This must be register. */ - if (VECTOR_MODE_P (mode)) - op = safe_vector_operand (op, mode); - - gcc_assert (GET_MODE (op) == mode - || GET_MODE (op) == VOIDmode); - op = copy_to_mode_reg (mode, op); - } - } - - args[i].op = op; - args[i].mode = mode; - } - - switch (nargs) - { - case 1: - pat = GEN_FCN (icode) (target, args[0].op); - break; - case 2: - pat = GEN_FCN (icode) (target, args[0].op, args[1].op); - break; - case 3: - pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op); - break; - default: - gcc_unreachable (); - } - - if (! pat) - return 0; - emit_insn (pat); - return klass == store ? 0 : target; -} - -/* Return the integer constant in ARG. Constrain it to be in the range - of the subparts of VEC_TYPE; issue an error if not. */ - -static int -get_element_number (tree vec_type, tree arg) -{ - unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1; - - if (!host_integerp (arg, 1) - || (elt = tree_low_cst (arg, 1), elt > max)) - { - error ("selector must be an integer constant in the range 0..%wi", max); - return 0; - } - - return elt; -} - -/* A subroutine of ix86_expand_builtin. These builtins are a wrapper around - ix86_expand_vector_init. We DO have language-level syntax for this, in - the form of (type){ init-list }. Except that since we can't place emms - instructions from inside the compiler, we can't allow the use of MMX - registers unless the user explicitly asks for it. So we do *not* define - vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead - we have builtins invoked by mmintrin.h that gives us license to emit - these sorts of instructions. */ - -static rtx -ix86_expand_vec_init_builtin (tree type, tree exp, rtx target) -{ - enum machine_mode tmode = TYPE_MODE (type); - enum machine_mode inner_mode = GET_MODE_INNER (tmode); - int i, n_elt = GET_MODE_NUNITS (tmode); - rtvec v = rtvec_alloc (n_elt); - - gcc_assert (VECTOR_MODE_P (tmode)); - gcc_assert (call_expr_nargs (exp) == n_elt); - - for (i = 0; i < n_elt; ++i) - { - rtx x = expand_normal (CALL_EXPR_ARG (exp, i)); - RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x); - } - - if (!target || !register_operand (target, tmode)) - target = gen_reg_rtx (tmode); - - ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v)); - return target; -} - -/* A subroutine of ix86_expand_builtin. These builtins are a wrapper around - ix86_expand_vector_extract. They would be redundant (for non-MMX) if we - had a language-level syntax for referencing vector elements. */ - -static rtx -ix86_expand_vec_ext_builtin (tree exp, rtx target) -{ - enum machine_mode tmode, mode0; - tree arg0, arg1; - int elt; - rtx op0; - - arg0 = CALL_EXPR_ARG (exp, 0); - arg1 = CALL_EXPR_ARG (exp, 1); - - op0 = expand_normal (arg0); - elt = get_element_number (TREE_TYPE (arg0), arg1); - - tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0))); - mode0 = TYPE_MODE (TREE_TYPE (arg0)); - gcc_assert (VECTOR_MODE_P (mode0)); - - op0 = force_reg (mode0, op0); - - if (optimize || !target || !register_operand (target, tmode)) - target = gen_reg_rtx (tmode); - - ix86_expand_vector_extract (true, target, op0, elt); - - return target; -} - -/* A subroutine of ix86_expand_builtin. These builtins are a wrapper around - ix86_expand_vector_set. They would be redundant (for non-MMX) if we had - a language-level syntax for referencing vector elements. */ - -static rtx -ix86_expand_vec_set_builtin (tree exp) -{ - enum machine_mode tmode, mode1; - tree arg0, arg1, arg2; - int elt; - rtx op0, op1, target; - - arg0 = CALL_EXPR_ARG (exp, 0); - arg1 = CALL_EXPR_ARG (exp, 1); - arg2 = CALL_EXPR_ARG (exp, 2); - - tmode = TYPE_MODE (TREE_TYPE (arg0)); - mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0))); - gcc_assert (VECTOR_MODE_P (tmode)); - - op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL); - op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL); - elt = get_element_number (TREE_TYPE (arg0), arg2); - - if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode) - op1 = convert_modes (mode1, GET_MODE (op1), op1, true); - - op0 = force_reg (tmode, op0); - op1 = force_reg (mode1, op1); - - /* OP0 is the source of these builtin functions and shouldn't be - modified. Create a copy, use it and return it as target. */ - target = gen_reg_rtx (tmode); - emit_move_insn (target, op0); - ix86_expand_vector_set (true, target, op1, elt); - - return target; -} - -/* Expand an expression EXP that calls a built-in function, - with result going to TARGET if that's convenient - (and in mode MODE if that's convenient). - SUBTARGET may be used as the target for computing one of EXP's operands. - IGNORE is nonzero if the value is to be ignored. */ - -static rtx -ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, - enum machine_mode mode ATTRIBUTE_UNUSED, - int ignore ATTRIBUTE_UNUSED) -{ - const struct builtin_description *d; - size_t i; - enum insn_code icode; - tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); - tree arg0, arg1, arg2; - rtx op0, op1, op2, pat; - enum machine_mode mode0, mode1, mode2; - unsigned int fcode = DECL_FUNCTION_CODE (fndecl); - - /* Determine whether the builtin function is available under the current ISA. - Originally the builtin was not created if it wasn't applicable to the - current ISA based on the command line switches. With function specific - options, we need to check in the context of the function making the call - whether it is supported. */ - if (ix86_builtins_isa[fcode].isa - && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags)) - { - char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL, - NULL, NULL, false); - - if (!opts) - error ("%qE needs unknown isa option", fndecl); - else - { - gcc_assert (opts != NULL); - error ("%qE needs isa option %s", fndecl, opts); - free (opts); - } - return const0_rtx; - } - - switch (fcode) - { - case IX86_BUILTIN_MASKMOVQ: - case IX86_BUILTIN_MASKMOVDQU: - icode = (fcode == IX86_BUILTIN_MASKMOVQ - ? CODE_FOR_mmx_maskmovq - : CODE_FOR_sse2_maskmovdqu); - /* Note the arg order is different from the operand order. */ - arg1 = CALL_EXPR_ARG (exp, 0); - arg2 = CALL_EXPR_ARG (exp, 1); - arg0 = CALL_EXPR_ARG (exp, 2); - op0 = expand_normal (arg0); - op1 = expand_normal (arg1); - op2 = expand_normal (arg2); - mode0 = insn_data[icode].operand[0].mode; - mode1 = insn_data[icode].operand[1].mode; - mode2 = insn_data[icode].operand[2].mode; - - op0 = force_reg (Pmode, op0); - op0 = gen_rtx_MEM (mode1, op0); - - if (! (*insn_data[icode].operand[0].predicate) (op0, mode0)) - op0 = copy_to_mode_reg (mode0, op0); - if (! (*insn_data[icode].operand[1].predicate) (op1, mode1)) - op1 = copy_to_mode_reg (mode1, op1); - if (! (*insn_data[icode].operand[2].predicate) (op2, mode2)) - op2 = copy_to_mode_reg (mode2, op2); - pat = GEN_FCN (icode) (op0, op1, op2); - if (! pat) - return 0; - emit_insn (pat); - return 0; - - case IX86_BUILTIN_LDMXCSR: - op0 = expand_normal (CALL_EXPR_ARG (exp, 0)); - target = assign_386_stack_local (SImode, SLOT_VIRTUAL); - emit_move_insn (target, op0); - emit_insn (gen_sse_ldmxcsr (target)); - return 0; - - case IX86_BUILTIN_STMXCSR: - target = assign_386_stack_local (SImode, SLOT_VIRTUAL); - emit_insn (gen_sse_stmxcsr (target)); - return copy_to_mode_reg (SImode, target); - - case IX86_BUILTIN_CLFLUSH: - arg0 = CALL_EXPR_ARG (exp, 0); - op0 = expand_normal (arg0); - icode = CODE_FOR_sse2_clflush; - if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode)) - op0 = copy_to_mode_reg (Pmode, op0); - - emit_insn (gen_sse2_clflush (op0)); - return 0; - - case IX86_BUILTIN_MONITOR: - arg0 = CALL_EXPR_ARG (exp, 0); - arg1 = CALL_EXPR_ARG (exp, 1); - arg2 = CALL_EXPR_ARG (exp, 2); - op0 = expand_normal (arg0); - op1 = expand_normal (arg1); - op2 = expand_normal (arg2); - if (!REG_P (op0)) - op0 = copy_to_mode_reg (Pmode, op0); - if (!REG_P (op1)) - op1 = copy_to_mode_reg (SImode, op1); - if (!REG_P (op2)) - op2 = copy_to_mode_reg (SImode, op2); - emit_insn ((*ix86_gen_monitor) (op0, op1, op2)); - return 0; - - case IX86_BUILTIN_MWAIT: - arg0 = CALL_EXPR_ARG (exp, 0); - arg1 = CALL_EXPR_ARG (exp, 1); - op0 = expand_normal (arg0); - op1 = expand_normal (arg1); - if (!REG_P (op0)) - op0 = copy_to_mode_reg (SImode, op0); - if (!REG_P (op1)) - op1 = copy_to_mode_reg (SImode, op1); - emit_insn (gen_sse3_mwait (op0, op1)); - return 0; - - case IX86_BUILTIN_VEC_INIT_V2SI: - case IX86_BUILTIN_VEC_INIT_V4HI: - case IX86_BUILTIN_VEC_INIT_V8QI: - return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target); - - case IX86_BUILTIN_VEC_EXT_V2DF: - case IX86_BUILTIN_VEC_EXT_V2DI: - case IX86_BUILTIN_VEC_EXT_V4SF: - case IX86_BUILTIN_VEC_EXT_V4SI: - case IX86_BUILTIN_VEC_EXT_V8HI: - case IX86_BUILTIN_VEC_EXT_V2SI: - case IX86_BUILTIN_VEC_EXT_V4HI: - case IX86_BUILTIN_VEC_EXT_V16QI: - return ix86_expand_vec_ext_builtin (exp, target); - - case IX86_BUILTIN_VEC_SET_V2DI: - case IX86_BUILTIN_VEC_SET_V4SF: - case IX86_BUILTIN_VEC_SET_V4SI: - case IX86_BUILTIN_VEC_SET_V8HI: - case IX86_BUILTIN_VEC_SET_V4HI: - case IX86_BUILTIN_VEC_SET_V16QI: - return ix86_expand_vec_set_builtin (exp); - - case IX86_BUILTIN_INFQ: - { - REAL_VALUE_TYPE inf; - rtx tmp; - - real_inf (&inf); - tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode); - - tmp = validize_mem (force_const_mem (mode, tmp)); - - if (target == 0) - target = gen_reg_rtx (mode); - - emit_move_insn (target, tmp); - return target; - } - - case IX86_BUILTIN_LLWPCB: - arg0 = CALL_EXPR_ARG (exp, 0); - op0 = expand_normal (arg0); - icode = CODE_FOR_lwp_llwpcb; - if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode)) - op0 = copy_to_mode_reg (Pmode, op0); - emit_insn (gen_lwp_llwpcb (op0)); - return 0; - - case IX86_BUILTIN_SLWPCB: - icode = CODE_FOR_lwp_slwpcb; - if (!target - || ! (*insn_data[icode].operand[0].predicate) (target, Pmode)) - target = gen_reg_rtx (Pmode); - emit_insn (gen_lwp_slwpcb (target)); - return target; - - default: - break; - } - - for (i = 0, d = bdesc_special_args; - i < ARRAY_SIZE (bdesc_special_args); - i++, d++) - if (d->code == fcode) - return ix86_expand_special_args_builtin (d, exp, target); - - for (i = 0, d = bdesc_args; - i < ARRAY_SIZE (bdesc_args); - i++, d++) - if (d->code == fcode) - switch (fcode) - { - case IX86_BUILTIN_FABSQ: - case IX86_BUILTIN_COPYSIGNQ: - if (!TARGET_SSE2) - /* Emit a normal call if SSE2 isn't available. */ - return expand_call (exp, target, ignore); - default: - return ix86_expand_args_builtin (d, exp, target); - } - - for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++) - if (d->code == fcode) - return ix86_expand_sse_comi (d, exp, target); - - for (i = 0, d = bdesc_pcmpestr; - i < ARRAY_SIZE (bdesc_pcmpestr); - i++, d++) - if (d->code == fcode) - return ix86_expand_sse_pcmpestr (d, exp, target); - - for (i = 0, d = bdesc_pcmpistr; - i < ARRAY_SIZE (bdesc_pcmpistr); - i++, d++) - if (d->code == fcode) - return ix86_expand_sse_pcmpistr (d, exp, target); - - for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++) - if (d->code == fcode) - return ix86_expand_multi_arg_builtin (d->icode, exp, target, - (enum multi_arg_type)d->flag, - d->comparison); - - gcc_unreachable (); -} - -/* Returns a function decl for a vectorized version of the builtin function - with builtin function code FN and the result vector type TYPE, or NULL_TREE - if it is not available. */ - -static tree -ix86_builtin_vectorized_function (unsigned int fn, tree type_out, - tree type_in) -{ - enum machine_mode in_mode, out_mode; - int in_n, out_n; - - if (TREE_CODE (type_out) != VECTOR_TYPE - || TREE_CODE (type_in) != VECTOR_TYPE) - return NULL_TREE; - - out_mode = TYPE_MODE (TREE_TYPE (type_out)); - out_n = TYPE_VECTOR_SUBPARTS (type_out); - in_mode = TYPE_MODE (TREE_TYPE (type_in)); - in_n = TYPE_VECTOR_SUBPARTS (type_in); - - switch (fn) - { - case BUILT_IN_SQRT: - if (out_mode == DFmode && out_n == 2 - && in_mode == DFmode && in_n == 2) - return ix86_builtins[IX86_BUILTIN_SQRTPD]; - break; - - case BUILT_IN_SQRTF: - if (out_mode == SFmode && out_n == 4 - && in_mode == SFmode && in_n == 4) - return ix86_builtins[IX86_BUILTIN_SQRTPS_NR]; - break; - - case BUILT_IN_LRINT: - if (out_mode == SImode && out_n == 4 - && in_mode == DFmode && in_n == 2) - return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX]; - break; - - case BUILT_IN_LRINTF: - if (out_mode == SImode && out_n == 4 - && in_mode == SFmode && in_n == 4) - return ix86_builtins[IX86_BUILTIN_CVTPS2DQ]; - break; - - default: - ; - } - - /* Dispatch to a handler for a vectorization library. */ - if (ix86_veclib_handler) - return (*ix86_veclib_handler)(fn, type_out, type_in); - - return NULL_TREE; -} - -/* Handler for an SVML-style interface to - a library with vectorized intrinsics. */ - -static tree -ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in) -{ - char name[20]; - tree fntype, new_fndecl, args; - unsigned arity; - const char *bname; - enum machine_mode el_mode, in_mode; - int n, in_n; - - /* The SVML is suitable for unsafe math only. */ - if (!flag_unsafe_math_optimizations) - return NULL_TREE; - - el_mode = TYPE_MODE (TREE_TYPE (type_out)); - n = TYPE_VECTOR_SUBPARTS (type_out); - in_mode = TYPE_MODE (TREE_TYPE (type_in)); - in_n = TYPE_VECTOR_SUBPARTS (type_in); - if (el_mode != in_mode - || n != in_n) - return NULL_TREE; - - switch (fn) - { - case BUILT_IN_EXP: - case BUILT_IN_LOG: - case BUILT_IN_LOG10: - case BUILT_IN_POW: - case BUILT_IN_TANH: - case BUILT_IN_TAN: - case BUILT_IN_ATAN: - case BUILT_IN_ATAN2: - case BUILT_IN_ATANH: - case BUILT_IN_CBRT: - case BUILT_IN_SINH: - case BUILT_IN_SIN: - case BUILT_IN_ASINH: - case BUILT_IN_ASIN: - case BUILT_IN_COSH: - case BUILT_IN_COS: - case BUILT_IN_ACOSH: - case BUILT_IN_ACOS: - if (el_mode != DFmode || n != 2) - return NULL_TREE; - break; - - case BUILT_IN_EXPF: - case BUILT_IN_LOGF: - case BUILT_IN_LOG10F: - case BUILT_IN_POWF: - case BUILT_IN_TANHF: - case BUILT_IN_TANF: - case BUILT_IN_ATANF: - case BUILT_IN_ATAN2F: - case BUILT_IN_ATANHF: - case BUILT_IN_CBRTF: - case BUILT_IN_SINHF: - case BUILT_IN_SINF: - case BUILT_IN_ASINHF: - case BUILT_IN_ASINF: - case BUILT_IN_COSHF: - case BUILT_IN_COSF: - case BUILT_IN_ACOSHF: - case BUILT_IN_ACOSF: - if (el_mode != SFmode || n != 4) - return NULL_TREE; - break; - - default: - return NULL_TREE; - } - - bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn])); - - if (fn == BUILT_IN_LOGF) - strcpy (name, "vmlsLn4"); - else if (fn == BUILT_IN_LOG) - strcpy (name, "vmldLn2"); - else if (n == 4) - { - sprintf (name, "vmls%s", bname+10); - name[strlen (name)-1] = '4'; - } - else - sprintf (name, "vmld%s2", bname+10); - - /* Convert to uppercase. */ - name[4] &= ~0x20; - - arity = 0; - for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args; - args = TREE_CHAIN (args)) - arity++; - - if (arity == 1) - fntype = build_function_type_list (type_out, type_in, NULL); - else - fntype = build_function_type_list (type_out, type_in, type_in, NULL); - - /* Build a function declaration for the vectorized function. */ - new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype); - TREE_PUBLIC (new_fndecl) = 1; - DECL_EXTERNAL (new_fndecl) = 1; - DECL_IS_NOVOPS (new_fndecl) = 1; - TREE_READONLY (new_fndecl) = 1; - - return new_fndecl; -} - -/* Handler for an ACML-style interface to - a library with vectorized intrinsics. */ - -static tree -ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in) -{ - char name[20] = "__vr.._"; - tree fntype, new_fndecl, args; - unsigned arity; - const char *bname; - enum machine_mode el_mode, in_mode; - int n, in_n; - - /* The ACML is 64bits only and suitable for unsafe math only as - it does not correctly support parts of IEEE with the required - precision such as denormals. */ - if (!TARGET_64BIT - || !flag_unsafe_math_optimizations) - return NULL_TREE; - - el_mode = TYPE_MODE (TREE_TYPE (type_out)); - n = TYPE_VECTOR_SUBPARTS (type_out); - in_mode = TYPE_MODE (TREE_TYPE (type_in)); - in_n = TYPE_VECTOR_SUBPARTS (type_in); - if (el_mode != in_mode - || n != in_n) - return NULL_TREE; - - switch (fn) - { - case BUILT_IN_SIN: - case BUILT_IN_COS: - case BUILT_IN_EXP: - case BUILT_IN_LOG: - case BUILT_IN_LOG2: - case BUILT_IN_LOG10: - name[4] = 'd'; - name[5] = '2'; - if (el_mode != DFmode - || n != 2) - return NULL_TREE; - break; - - case BUILT_IN_SINF: - case BUILT_IN_COSF: - case BUILT_IN_EXPF: - case BUILT_IN_POWF: - case BUILT_IN_LOGF: - case BUILT_IN_LOG2F: - case BUILT_IN_LOG10F: - name[4] = 's'; - name[5] = '4'; - if (el_mode != SFmode - || n != 4) - return NULL_TREE; - break; - - default: - return NULL_TREE; - } - - bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn])); - sprintf (name + 7, "%s", bname+10); - - arity = 0; - for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args; - args = TREE_CHAIN (args)) - arity++; - - if (arity == 1) - fntype = build_function_type_list (type_out, type_in, NULL); - else - fntype = build_function_type_list (type_out, type_in, type_in, NULL); - - /* Build a function declaration for the vectorized function. */ - new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype); - TREE_PUBLIC (new_fndecl) = 1; - DECL_EXTERNAL (new_fndecl) = 1; - DECL_IS_NOVOPS (new_fndecl) = 1; - TREE_READONLY (new_fndecl) = 1; - - return new_fndecl; -} - - -/* Returns a decl of a function that implements conversion of an integer vector - into a floating-point vector, or vice-versa. TYPE is the type of the integer - side of the conversion. - Return NULL_TREE if it is not available. */ - -static tree -ix86_vectorize_builtin_conversion (unsigned int code, tree type) -{ - if (!TARGET_SSE2 || TREE_CODE (type) != VECTOR_TYPE - /* There are only conversions from/to signed integers. */ - || TYPE_UNSIGNED (TREE_TYPE (type))) - return NULL_TREE; - - switch (code) - { - case FLOAT_EXPR: - switch (TYPE_MODE (type)) - { - case V4SImode: - return ix86_builtins[IX86_BUILTIN_CVTDQ2PS]; - default: - return NULL_TREE; - } - - case FIX_TRUNC_EXPR: - switch (TYPE_MODE (type)) - { - case V4SImode: - return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ]; - default: - return NULL_TREE; - } - default: - return NULL_TREE; - - } -} - -/* Returns a code for a target-specific builtin that implements - reciprocal of the function, or NULL_TREE if not available. */ - -static tree -ix86_builtin_reciprocal (unsigned int fn, bool md_fn, - bool sqrt ATTRIBUTE_UNUSED) -{ - if (! (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p () - && flag_finite_math_only && !flag_trapping_math - && flag_unsafe_math_optimizations)) - return NULL_TREE; - - if (md_fn) - /* Machine dependent builtins. */ - switch (fn) - { - /* Vectorized version of sqrt to rsqrt conversion. */ - case IX86_BUILTIN_SQRTPS_NR: - return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR]; - - default: - return NULL_TREE; - } - else - /* Normal builtins. */ - switch (fn) - { - /* Sqrt to rsqrt conversion. */ - case BUILT_IN_SQRTF: - return ix86_builtins[IX86_BUILTIN_RSQRTF]; - - default: - return NULL_TREE; - } -} - -/* Store OPERAND to the memory after reload is completed. This means - that we can't easily use assign_stack_local. */ -rtx -ix86_force_to_memory (enum machine_mode mode, rtx operand) -{ - rtx result; - - gcc_assert (reload_completed); - if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE) - { - result = gen_rtx_MEM (mode, - gen_rtx_PLUS (Pmode, - stack_pointer_rtx, - GEN_INT (-RED_ZONE_SIZE))); - emit_move_insn (result, operand); - } - else if ((TARGET_64BIT_MS_ABI || !TARGET_RED_ZONE) && TARGET_64BIT) - { - switch (mode) - { - case HImode: - case SImode: - operand = gen_lowpart (DImode, operand); - /* FALLTHRU */ - case DImode: - emit_insn ( - gen_rtx_SET (VOIDmode, - gen_rtx_MEM (DImode, - gen_rtx_PRE_DEC (DImode, - stack_pointer_rtx)), - operand)); - break; - default: - gcc_unreachable (); - } - result = gen_rtx_MEM (mode, stack_pointer_rtx); - } - else - { - switch (mode) - { - case DImode: - { - rtx operands[2]; - split_di (&operand, 1, operands, operands + 1); - emit_insn ( - gen_rtx_SET (VOIDmode, - gen_rtx_MEM (SImode, - gen_rtx_PRE_DEC (Pmode, - stack_pointer_rtx)), - operands[1])); - emit_insn ( - gen_rtx_SET (VOIDmode, - gen_rtx_MEM (SImode, - gen_rtx_PRE_DEC (Pmode, - stack_pointer_rtx)), - operands[0])); - } - break; - case HImode: - /* Store HImodes as SImodes. */ - operand = gen_lowpart (SImode, operand); - /* FALLTHRU */ - case SImode: - emit_insn ( - gen_rtx_SET (VOIDmode, - gen_rtx_MEM (GET_MODE (operand), - gen_rtx_PRE_DEC (SImode, - stack_pointer_rtx)), - operand)); - break; - default: - gcc_unreachable (); - } - result = gen_rtx_MEM (mode, stack_pointer_rtx); - } - return result; -} - -/* Free operand from the memory. */ -void -ix86_free_from_memory (enum machine_mode mode) -{ - if (!TARGET_RED_ZONE || TARGET_64BIT_MS_ABI) - { - int size; - - if (mode == DImode || TARGET_64BIT) - size = 8; - else - size = 4; - /* Use LEA to deallocate stack space. In peephole2 it will be converted - to pop or add instruction if registers are available. */ - emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, - gen_rtx_PLUS (Pmode, stack_pointer_rtx, - GEN_INT (size)))); - } -} - -/* Put float CONST_DOUBLE in the constant pool instead of fp regs. - QImode must go into class Q_REGS. - Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and - movdf to do mem-to-mem moves through integer regs. */ -enum reg_class -ix86_preferred_reload_class (rtx x, enum reg_class regclass) -{ - enum machine_mode mode = GET_MODE (x); - - /* We're only allowed to return a subclass of CLASS. Many of the - following checks fail for NO_REGS, so eliminate that early. */ - if (regclass == NO_REGS) - return NO_REGS; - - /* All classes can load zeros. */ - if (x == CONST0_RTX (mode)) - return regclass; - - /* Force constants into memory if we are loading a (nonzero) constant into - an MMX or SSE register. This is because there are no MMX/SSE instructions - to load from a constant. */ - if (CONSTANT_P (x) - && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass))) - return NO_REGS; - - /* Prefer SSE regs only, if we can use them for math. */ - if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode)) - return SSE_CLASS_P (regclass) ? regclass : NO_REGS; - - /* Floating-point constants need more complex checks. */ - if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode) - { - /* General regs can load everything. */ - if (reg_class_subset_p (regclass, GENERAL_REGS)) - return regclass; - - /* Floats can load 0 and 1 plus some others. Note that we eliminated - zero above. We only want to wind up preferring 80387 registers if - we plan on doing computation with them. */ - if (TARGET_80387 - && standard_80387_constant_p (x)) - { - /* Limit class to non-sse. */ - if (regclass == FLOAT_SSE_REGS) - return FLOAT_REGS; - if (regclass == FP_TOP_SSE_REGS) - return FP_TOP_REG; - if (regclass == FP_SECOND_SSE_REGS) - return FP_SECOND_REG; - if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS) - return regclass; - } - - return NO_REGS; - } - - /* Generally when we see PLUS here, it's the function invariant - (plus soft-fp const_int). Which can only be computed into general - regs. */ - if (GET_CODE (x) == PLUS) - return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS; - - /* QImode constants are easy to load, but non-constant QImode data - must go into Q_REGS. */ - if (GET_MODE (x) == QImode && !CONSTANT_P (x)) - { - if (reg_class_subset_p (regclass, Q_REGS)) - return regclass; - if (reg_class_subset_p (Q_REGS, regclass)) - return Q_REGS; - return NO_REGS; - } - - return regclass; -} - -/* Discourage putting floating-point values in SSE registers unless - SSE math is being used, and likewise for the 387 registers. */ -enum reg_class -ix86_preferred_output_reload_class (rtx x, enum reg_class regclass) -{ - enum machine_mode mode = GET_MODE (x); - - /* Restrict the output reload class to the register bank that we are doing - math on. If we would like not to return a subset of CLASS, reject this - alternative: if reload cannot do this, it will still use its choice. */ - mode = GET_MODE (x); - if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode)) - return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS; - - if (X87_FLOAT_MODE_P (mode)) - { - if (regclass == FP_TOP_SSE_REGS) - return FP_TOP_REG; - else if (regclass == FP_SECOND_SSE_REGS) - return FP_SECOND_REG; - else - return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS; - } - - return regclass; -} - -static enum reg_class -ix86_secondary_reload (bool in_p, rtx x, enum reg_class rclass, - enum machine_mode mode, - secondary_reload_info *sri ATTRIBUTE_UNUSED) -{ - /* QImode spills from non-QI registers require - intermediate register on 32bit targets. */ - if (!in_p && mode == QImode && !TARGET_64BIT - && (rclass == GENERAL_REGS - || rclass == LEGACY_REGS - || rclass == INDEX_REGS)) - { - int regno; - - if (REG_P (x)) - regno = REGNO (x); - else - regno = -1; - - if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG) - regno = true_regnum (x); - - /* Return Q_REGS if the operand is in memory. */ - if (regno == -1) - return Q_REGS; - } - - return NO_REGS; -} - -/* If we are copying between general and FP registers, we need a memory - location. The same is true for SSE and MMX registers. - - To optimize register_move_cost performance, allow inline variant. - - The macro can't work reliably when one of the CLASSES is class containing - registers from multiple units (SSE, MMX, integer). We avoid this by never - combining those units in single alternative in the machine description. - Ensure that this constraint holds to avoid unexpected surprises. - - When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not - enforce these sanity checks. */ - -static inline int -inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2, - enum machine_mode mode, int strict) -{ - if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1) - || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2) - || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1) - || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2) - || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1) - || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2)) - { - gcc_assert (!strict); - return true; - } - - if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)) - return true; - - /* ??? This is a lie. We do have moves between mmx/general, and for - mmx/sse2. But by saying we need secondary memory we discourage the - register allocator from using the mmx registers unless needed. */ - if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)) - return true; - - if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)) - { - /* SSE1 doesn't have any direct moves from other classes. */ - if (!TARGET_SSE2) - return true; - - /* If the target says that inter-unit moves are more expensive - than moving through memory, then don't generate them. */ - if (!TARGET_INTER_UNIT_MOVES) - return true; - - /* Between SSE and general, we have moves no larger than word size. */ - if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) - return true; - } - - return false; -} - -int -ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2, - enum machine_mode mode, int strict) -{ - return inline_secondary_memory_needed (class1, class2, mode, strict); -} - -/* Return true if the registers in CLASS cannot represent the change from - modes FROM to TO. */ - -bool -ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to, - enum reg_class regclass) -{ - if (from == to) - return false; - - /* x87 registers can't do subreg at all, as all values are reformatted - to extended precision. */ - if (MAYBE_FLOAT_CLASS_P (regclass)) - return true; - - if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass)) - { - /* Vector registers do not support QI or HImode loads. If we don't - disallow a change to these modes, reload will assume it's ok to - drop the subreg from (subreg:SI (reg:HI 100) 0). This affects - the vec_dupv4hi pattern. */ - if (GET_MODE_SIZE (from) < 4) - return true; - - /* Vector registers do not support subreg with nonzero offsets, which - are otherwise valid for integer registers. Since we can't see - whether we have a nonzero offset from here, prohibit all - nonparadoxical subregs changing size. */ - if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from)) - return true; - } - - return false; -} - -/* Return the cost of moving data of mode M between a - register and memory. A value of 2 is the default; this cost is - relative to those in `REGISTER_MOVE_COST'. - - This function is used extensively by register_move_cost that is used to - build tables at startup. Make it inline in this case. - When IN is 2, return maximum of in and out move cost. - - If moving between registers and memory is more expensive than - between two registers, you should define this macro to express the - relative cost. - - Model also increased moving costs of QImode registers in non - Q_REGS classes. - */ -static inline int -inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass, - int in) -{ - int cost; - if (FLOAT_CLASS_P (regclass)) - { - int index; - switch (mode) - { - case SFmode: - index = 0; - break; - case DFmode: - index = 1; - break; - case XFmode: - index = 2; - break; - default: - return 100; - } - if (in == 2) - return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]); - return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index]; - } - if (SSE_CLASS_P (regclass)) - { - int index; - switch (GET_MODE_SIZE (mode)) - { - case 4: - index = 0; - break; - case 8: - index = 1; - break; - case 16: - index = 2; - break; - default: - return 100; - } - if (in == 2) - return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]); - return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index]; - } - if (MMX_CLASS_P (regclass)) - { - int index; - switch (GET_MODE_SIZE (mode)) - { - case 4: - index = 0; - break; - case 8: - index = 1; - break; - default: - return 100; - } - if (in) - return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]); - return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index]; - } - switch (GET_MODE_SIZE (mode)) - { - case 1: - if (Q_CLASS_P (regclass) || TARGET_64BIT) - { - if (!in) - return ix86_cost->int_store[0]; - if (TARGET_PARTIAL_REG_DEPENDENCY - && optimize_function_for_speed_p (cfun)) - cost = ix86_cost->movzbl_load; - else - cost = ix86_cost->int_load[0]; - if (in == 2) - return MAX (cost, ix86_cost->int_store[0]); - return cost; - } - else - { - if (in == 2) - return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4); - if (in) - return ix86_cost->movzbl_load; - else - return ix86_cost->int_store[0] + 4; - } - break; - case 2: - if (in == 2) - return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]); - return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1]; - default: - /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */ - if (mode == TFmode) - mode = XFmode; - if (in == 2) - cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]); - else if (in) - cost = ix86_cost->int_load[2]; - else - cost = ix86_cost->int_store[2]; - return (cost * (((int) GET_MODE_SIZE (mode) - + UNITS_PER_WORD - 1) / UNITS_PER_WORD)); - } -} - -int -ix86_memory_move_cost (enum machine_mode mode, enum reg_class regclass, int in) -{ - return inline_memory_move_cost (mode, regclass, in); -} - - -/* Return the cost of moving data from a register in class CLASS1 to - one in class CLASS2. - - It is not required that the cost always equal 2 when FROM is the same as TO; - on some machines it is expensive to move between registers if they are not - general registers. */ - -int -ix86_register_move_cost (enum machine_mode mode, enum reg_class class1, - enum reg_class class2) -{ - /* In case we require secondary memory, compute cost of the store followed - by load. In order to avoid bad register allocation choices, we need - for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */ - - if (inline_secondary_memory_needed (class1, class2, mode, 0)) - { - int cost = 1; - - cost += inline_memory_move_cost (mode, class1, 2); - cost += inline_memory_move_cost (mode, class2, 2); - - /* In case of copying from general_purpose_register we may emit multiple - stores followed by single load causing memory size mismatch stall. - Count this as arbitrarily high cost of 20. */ - if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode)) - cost += 20; - - /* In the case of FP/MMX moves, the registers actually overlap, and we - have to switch modes in order to treat them differently. */ - if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2)) - || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1))) - cost += 20; - - return cost; - } - - /* Moves between SSE/MMX and integer unit are expensive. */ - if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2) - || SSE_CLASS_P (class1) != SSE_CLASS_P (class2)) - - /* ??? By keeping returned value relatively high, we limit the number - of moves between integer and MMX/SSE registers for all targets. - Additionally, high value prevents problem with x86_modes_tieable_p(), - where integer modes in MMX/SSE registers are not tieable - because of missing QImode and HImode moves to, from or between - MMX/SSE registers. */ - return MAX (8, ix86_cost->mmxsse_to_integer); - - if (MAYBE_FLOAT_CLASS_P (class1)) - return ix86_cost->fp_move; - if (MAYBE_SSE_CLASS_P (class1)) - return ix86_cost->sse_move; - if (MAYBE_MMX_CLASS_P (class1)) - return ix86_cost->mmx_move; - return 2; -} - -/* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */ - -bool -ix86_hard_regno_mode_ok (int regno, enum machine_mode mode) -{ - /* Flags and only flags can only hold CCmode values. */ - if (CC_REGNO_P (regno)) - return GET_MODE_CLASS (mode) == MODE_CC; - if (GET_MODE_CLASS (mode) == MODE_CC - || GET_MODE_CLASS (mode) == MODE_RANDOM - || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT) - return 0; - if (FP_REGNO_P (regno)) - return VALID_FP_MODE_P (mode); - if (SSE_REGNO_P (regno)) - { - /* We implement the move patterns for all vector modes into and - out of SSE registers, even when no operation instructions - are available. OImode move is available only when AVX is - enabled. */ - return ((TARGET_AVX && mode == OImode) - || VALID_AVX256_REG_MODE (mode) - || VALID_SSE_REG_MODE (mode) - || VALID_SSE2_REG_MODE (mode) - || VALID_MMX_REG_MODE (mode) - || VALID_MMX_REG_MODE_3DNOW (mode)); - } - if (MMX_REGNO_P (regno)) - { - /* We implement the move patterns for 3DNOW modes even in MMX mode, - so if the register is available at all, then we can move data of - the given mode into or out of it. */ - return (VALID_MMX_REG_MODE (mode) - || VALID_MMX_REG_MODE_3DNOW (mode)); - } - - if (mode == QImode) - { - /* Take care for QImode values - they can be in non-QI regs, - but then they do cause partial register stalls. */ - if (regno <= BX_REG || TARGET_64BIT) - return 1; - if (!TARGET_PARTIAL_REG_STALL) - return 1; - return reload_in_progress || reload_completed; - } - /* We handle both integer and floats in the general purpose registers. */ - else if (VALID_INT_MODE_P (mode)) - return 1; - else if (VALID_FP_MODE_P (mode)) - return 1; - else if (VALID_DFP_MODE_P (mode)) - return 1; - /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go - on to use that value in smaller contexts, this can easily force a - pseudo to be allocated to GENERAL_REGS. Since this is no worse than - supporting DImode, allow it. */ - else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode)) - return 1; - - return 0; -} - -/* A subroutine of ix86_modes_tieable_p. Return true if MODE is a - tieable integer mode. */ - -static bool -ix86_tieable_integer_mode_p (enum machine_mode mode) -{ - switch (mode) - { - case HImode: - case SImode: - return true; - - case QImode: - return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL; - - case DImode: - return TARGET_64BIT; - - default: - return false; - } -} - -/* Return true if MODE1 is accessible in a register that can hold MODE2 - without copying. That is, all register classes that can hold MODE2 - can also hold MODE1. */ - -bool -ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2) -{ - if (mode1 == mode2) - return true; - - if (ix86_tieable_integer_mode_p (mode1) - && ix86_tieable_integer_mode_p (mode2)) - return true; - - /* MODE2 being XFmode implies fp stack or general regs, which means we - can tie any smaller floating point modes to it. Note that we do not - tie this with TFmode. */ - if (mode2 == XFmode) - return mode1 == SFmode || mode1 == DFmode; - - /* MODE2 being DFmode implies fp stack, general or sse regs, which means - that we can tie it with SFmode. */ - if (mode2 == DFmode) - return mode1 == SFmode; - - /* If MODE2 is only appropriate for an SSE register, then tie with - any other mode acceptable to SSE registers. */ - if (GET_MODE_SIZE (mode2) == 16 - && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2)) - return (GET_MODE_SIZE (mode1) == 16 - && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1)); - - /* If MODE2 is appropriate for an MMX register, then tie - with any other mode acceptable to MMX registers. */ - if (GET_MODE_SIZE (mode2) == 8 - && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2)) - return (GET_MODE_SIZE (mode1) == 8 - && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1)); - - return false; -} - -/* Compute a (partial) cost for rtx X. Return true if the complete - cost has been computed, and false if subexpressions should be - scanned. In either case, *TOTAL contains the cost result. */ - -static bool -ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed) -{ - enum rtx_code outer_code = (enum rtx_code) outer_code_i; - enum machine_mode mode = GET_MODE (x); - const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost; - - switch (code) - { - case CONST_INT: - case CONST: - case LABEL_REF: - case SYMBOL_REF: - if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode)) - *total = 3; - else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode)) - *total = 2; - else if (flag_pic && SYMBOLIC_CONST (x) - && (!TARGET_64BIT - || (!GET_CODE (x) != LABEL_REF - && (GET_CODE (x) != SYMBOL_REF - || !SYMBOL_REF_LOCAL_P (x))))) - *total = 1; - else - *total = 0; - return true; - - case CONST_DOUBLE: - if (mode == VOIDmode) - *total = 0; - else - switch (standard_80387_constant_p (x)) - { - case 1: /* 0.0 */ - *total = 1; - break; - default: /* Other constants */ - *total = 2; - break; - case 0: - case -1: - /* Start with (MEM (SYMBOL_REF)), since that's where - it'll probably end up. Add a penalty for size. */ - *total = (COSTS_N_INSNS (1) - + (flag_pic != 0 && !TARGET_64BIT) - + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2)); - break; - } - return true; - - case ZERO_EXTEND: - /* The zero extensions is often completely free on x86_64, so make - it as cheap as possible. */ - if (TARGET_64BIT && mode == DImode - && GET_MODE (XEXP (x, 0)) == SImode) - *total = 1; - else if (TARGET_ZERO_EXTEND_WITH_AND) - *total = cost->add; - else - *total = cost->movzx; - return false; - - case SIGN_EXTEND: - *total = cost->movsx; - return false; - - case ASHIFT: - if (CONST_INT_P (XEXP (x, 1)) - && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT)) - { - HOST_WIDE_INT value = INTVAL (XEXP (x, 1)); - if (value == 1) - { - *total = cost->add; - return false; - } - if ((value == 2 || value == 3) - && cost->lea <= cost->shift_const) - { - *total = cost->lea; - return false; - } - } - /* FALLTHRU */ - - case ROTATE: - case ASHIFTRT: - case LSHIFTRT: - case ROTATERT: - if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode) - { - if (CONST_INT_P (XEXP (x, 1))) - { - if (INTVAL (XEXP (x, 1)) > 32) - *total = cost->shift_const + COSTS_N_INSNS (2); - else - *total = cost->shift_const * 2; - } - else - { - if (GET_CODE (XEXP (x, 1)) == AND) - *total = cost->shift_var * 2; - else - *total = cost->shift_var * 6 + COSTS_N_INSNS (2); - } - } - else - { - if (CONST_INT_P (XEXP (x, 1))) - *total = cost->shift_const; - else - *total = cost->shift_var; - } - return false; - - case MULT: - if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) - { - /* ??? SSE scalar cost should be used here. */ - *total = cost->fmul; - return false; - } - else if (X87_FLOAT_MODE_P (mode)) - { - *total = cost->fmul; - return false; - } - else if (FLOAT_MODE_P (mode)) - { - /* ??? SSE vector cost should be used here. */ - *total = cost->fmul; - return false; - } - else - { - rtx op0 = XEXP (x, 0); - rtx op1 = XEXP (x, 1); - int nbits; - if (CONST_INT_P (XEXP (x, 1))) - { - unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1)); - for (nbits = 0; value != 0; value &= value - 1) - nbits++; - } - else - /* This is arbitrary. */ - nbits = 7; - - /* Compute costs correctly for widening multiplication. */ - if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND) - && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2 - == GET_MODE_SIZE (mode)) - { - int is_mulwiden = 0; - enum machine_mode inner_mode = GET_MODE (op0); - - if (GET_CODE (op0) == GET_CODE (op1)) - is_mulwiden = 1, op1 = XEXP (op1, 0); - else if (CONST_INT_P (op1)) - { - if (GET_CODE (op0) == SIGN_EXTEND) - is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode) - == INTVAL (op1); - else - is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode)); - } - - if (is_mulwiden) - op0 = XEXP (op0, 0), mode = GET_MODE (op0); - } - - *total = (cost->mult_init[MODE_INDEX (mode)] - + nbits * cost->mult_bit - + rtx_cost (op0, outer_code, speed) + rtx_cost (op1, outer_code, speed)); - - return true; - } - - case DIV: - case UDIV: - case MOD: - case UMOD: - if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) - /* ??? SSE cost should be used here. */ - *total = cost->fdiv; - else if (X87_FLOAT_MODE_P (mode)) - *total = cost->fdiv; - else if (FLOAT_MODE_P (mode)) - /* ??? SSE vector cost should be used here. */ - *total = cost->fdiv; - else - *total = cost->divide[MODE_INDEX (mode)]; - return false; - - case PLUS: - if (GET_MODE_CLASS (mode) == MODE_INT - && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode)) - { - if (GET_CODE (XEXP (x, 0)) == PLUS - && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT - && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1)) - && CONSTANT_P (XEXP (x, 1))) - { - HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)); - if (val == 2 || val == 4 || val == 8) - { - *total = cost->lea; - *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed); - *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), - outer_code, speed); - *total += rtx_cost (XEXP (x, 1), outer_code, speed); - return true; - } - } - else if (GET_CODE (XEXP (x, 0)) == MULT - && CONST_INT_P (XEXP (XEXP (x, 0), 1))) - { - HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1)); - if (val == 2 || val == 4 || val == 8) - { - *total = cost->lea; - *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed); - *total += rtx_cost (XEXP (x, 1), outer_code, speed); - return true; - } - } - else if (GET_CODE (XEXP (x, 0)) == PLUS) - { - *total = cost->lea; - *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed); - *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed); - *total += rtx_cost (XEXP (x, 1), outer_code, speed); - return true; - } - } - /* FALLTHRU */ - - case MINUS: - if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) - { - /* ??? SSE cost should be used here. */ - *total = cost->fadd; - return false; - } - else if (X87_FLOAT_MODE_P (mode)) - { - *total = cost->fadd; - return false; - } - else if (FLOAT_MODE_P (mode)) - { - /* ??? SSE vector cost should be used here. */ - *total = cost->fadd; - return false; - } - /* FALLTHRU */ - - case AND: - case IOR: - case XOR: - if (!TARGET_64BIT && mode == DImode) - { - *total = (cost->add * 2 - + (rtx_cost (XEXP (x, 0), outer_code, speed) - << (GET_MODE (XEXP (x, 0)) != DImode)) - + (rtx_cost (XEXP (x, 1), outer_code, speed) - << (GET_MODE (XEXP (x, 1)) != DImode))); - return true; - } - /* FALLTHRU */ - - case NEG: - if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) - { - /* ??? SSE cost should be used here. */ - *total = cost->fchs; - return false; - } - else if (X87_FLOAT_MODE_P (mode)) - { - *total = cost->fchs; - return false; - } - else if (FLOAT_MODE_P (mode)) - { - /* ??? SSE vector cost should be used here. */ - *total = cost->fchs; - return false; - } - /* FALLTHRU */ - - case NOT: - if (!TARGET_64BIT && mode == DImode) - *total = cost->add * 2; - else - *total = cost->add; - return false; - - case COMPARE: - if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT - && XEXP (XEXP (x, 0), 1) == const1_rtx - && CONST_INT_P (XEXP (XEXP (x, 0), 2)) - && XEXP (x, 1) == const0_rtx) - { - /* This kind of construct is implemented using test[bwl]. - Treat it as if we had an AND. */ - *total = (cost->add - + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed) - + rtx_cost (const1_rtx, outer_code, speed)); - return true; - } - return false; - - case FLOAT_EXTEND: - if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)) - *total = 0; - return false; - - case ABS: - if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) - /* ??? SSE cost should be used here. */ - *total = cost->fabs; - else if (X87_FLOAT_MODE_P (mode)) - *total = cost->fabs; - else if (FLOAT_MODE_P (mode)) - /* ??? SSE vector cost should be used here. */ - *total = cost->fabs; - return false; - - case SQRT: - if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) - /* ??? SSE cost should be used here. */ - *total = cost->fsqrt; - else if (X87_FLOAT_MODE_P (mode)) - *total = cost->fsqrt; - else if (FLOAT_MODE_P (mode)) - /* ??? SSE vector cost should be used here. */ - *total = cost->fsqrt; - return false; - - case UNSPEC: - if (XINT (x, 1) == UNSPEC_TP) - *total = 0; - return false; - - default: - return false; - } -} - -#if TARGET_MACHO - -static int current_machopic_label_num; - -/* Given a symbol name and its associated stub, write out the - definition of the stub. */ - -void -machopic_output_stub (FILE *file, const char *symb, const char *stub) -{ - unsigned int length; - char *binder_name, *symbol_name, lazy_ptr_name[32]; - int label = ++current_machopic_label_num; - - /* For 64-bit we shouldn't get here. */ - gcc_assert (!TARGET_64BIT); - - /* Lose our funky encoding stuff so it doesn't contaminate the stub. */ - symb = (*targetm.strip_name_encoding) (symb); - - length = strlen (stub); - binder_name = XALLOCAVEC (char, length + 32); - GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length); - - length = strlen (symb); - symbol_name = XALLOCAVEC (char, length + 32); - GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length); - - sprintf (lazy_ptr_name, "L%d$lz", label); - - if (MACHOPIC_PURE) - switch_to_section (darwin_sections[machopic_picsymbol_stub_section]); - else - switch_to_section (darwin_sections[machopic_symbol_stub_section]); - - fprintf (file, "%s:\n", stub); - fprintf (file, "\t.indirect_symbol %s\n", symbol_name); - - if (MACHOPIC_PURE) - { - fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label); - fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label); - fprintf (file, "\tjmp\t*%%edx\n"); - } - else - fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name); - - fprintf (file, "%s:\n", binder_name); - - if (MACHOPIC_PURE) - { - fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label); - fprintf (file, "\tpushl\t%%eax\n"); - } - else - fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name); - - fprintf (file, "\tjmp\tdyld_stub_binding_helper\n"); - - switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]); - fprintf (file, "%s:\n", lazy_ptr_name); - fprintf (file, "\t.indirect_symbol %s\n", symbol_name); - fprintf (file, "\t.long %s\n", binder_name); -} - -void -darwin_x86_file_end (void) -{ - darwin_file_end (); - ix86_file_end (); -} -#endif /* TARGET_MACHO */ - -/* Order the registers for register allocator. */ - -void -x86_order_regs_for_local_alloc (void) -{ - int pos = 0; - int i; - - /* First allocate the local general purpose registers. */ - for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) - if (GENERAL_REGNO_P (i) && call_used_regs[i]) - reg_alloc_order [pos++] = i; - - /* Global general purpose registers. */ - for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) - if (GENERAL_REGNO_P (i) && !call_used_regs[i]) - reg_alloc_order [pos++] = i; - - /* x87 registers come first in case we are doing FP math - using them. */ - if (!TARGET_SSE_MATH) - for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++) - reg_alloc_order [pos++] = i; - - /* SSE registers. */ - for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++) - reg_alloc_order [pos++] = i; - for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++) - reg_alloc_order [pos++] = i; - - /* x87 registers. */ - if (TARGET_SSE_MATH) - for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++) - reg_alloc_order [pos++] = i; - - for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++) - reg_alloc_order [pos++] = i; - - /* Initialize the rest of array as we do not allocate some registers - at all. */ - while (pos < FIRST_PSEUDO_REGISTER) - reg_alloc_order [pos++] = 0; -} - -/* Handle a "ms_abi" or "sysv" attribute; arguments as in - struct attribute_spec.handler. */ -static tree -ix86_handle_abi_attribute (tree *node, tree name, - tree args ATTRIBUTE_UNUSED, - int flags ATTRIBUTE_UNUSED, bool *no_add_attrs) -{ - if (TREE_CODE (*node) != FUNCTION_TYPE - && TREE_CODE (*node) != METHOD_TYPE - && TREE_CODE (*node) != FIELD_DECL - && TREE_CODE (*node) != TYPE_DECL) - { - warning (OPT_Wattributes, "%qs attribute only applies to functions", - IDENTIFIER_POINTER (name)); - *no_add_attrs = true; - return NULL_TREE; - } - if (!TARGET_64BIT) - { - warning (OPT_Wattributes, "%qs attribute only available for 64-bit", - IDENTIFIER_POINTER (name)); - *no_add_attrs = true; - return NULL_TREE; - } - - /* Can combine regparm with all attributes but fastcall. */ - if (is_attribute_p ("ms_abi", name)) - { - if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node))) - { - error ("ms_abi and sysv_abi attributes are not compatible"); - } - - return NULL_TREE; - } - else if (is_attribute_p ("sysv_abi", name)) - { - if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node))) - { - error ("ms_abi and sysv_abi attributes are not compatible"); - } - - return NULL_TREE; - } - - return NULL_TREE; -} - -/* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in - struct attribute_spec.handler. */ -static tree -ix86_handle_struct_attribute (tree *node, tree name, - tree args ATTRIBUTE_UNUSED, - int flags ATTRIBUTE_UNUSED, bool *no_add_attrs) -{ - tree *type = NULL; - if (DECL_P (*node)) - { - if (TREE_CODE (*node) == TYPE_DECL) - type = &TREE_TYPE (*node); - } - else - type = node; - - if (!(type && (TREE_CODE (*type) == RECORD_TYPE - || TREE_CODE (*type) == UNION_TYPE))) - { - warning (OPT_Wattributes, "%qs attribute ignored", - IDENTIFIER_POINTER (name)); - *no_add_attrs = true; - } - - else if ((is_attribute_p ("ms_struct", name) - && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type))) - || ((is_attribute_p ("gcc_struct", name) - && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type))))) - { - warning (OPT_Wattributes, "%qs incompatible attribute ignored", - IDENTIFIER_POINTER (name)); - *no_add_attrs = true; - } - - return NULL_TREE; -} - -static bool -ix86_ms_bitfield_layout_p (const_tree record_type) -{ - return (TARGET_MS_BITFIELD_LAYOUT && - !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type))) - || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)); -} - -/* Returns an expression indicating where the this parameter is - located on entry to the FUNCTION. */ - -static rtx -x86_this_parameter (tree function) -{ - tree type = TREE_TYPE (function); - bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0; - int nregs; - - if (TARGET_64BIT) - { - const int *parm_regs; - - if (ix86_function_type_abi (type) == MS_ABI) - parm_regs = x86_64_ms_abi_int_parameter_registers; - else - parm_regs = x86_64_int_parameter_registers; - return gen_rtx_REG (DImode, parm_regs[aggr]); - } - - nregs = ix86_function_regparm (type, function); - - if (nregs > 0 && !stdarg_p (type)) - { - int regno; - - if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type))) - regno = aggr ? DX_REG : CX_REG; - else - { - regno = AX_REG; - if (aggr) - { - regno = DX_REG; - if (nregs == 1) - return gen_rtx_MEM (SImode, - plus_constant (stack_pointer_rtx, 4)); - } - } - return gen_rtx_REG (SImode, regno); - } - - return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4)); -} - -/* Determine whether x86_output_mi_thunk can succeed. */ - -static bool -x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED, - HOST_WIDE_INT delta ATTRIBUTE_UNUSED, - HOST_WIDE_INT vcall_offset, const_tree function) -{ - /* 64-bit can handle anything. */ - if (TARGET_64BIT) - return true; - - /* For 32-bit, everything's fine if we have one free register. */ - if (ix86_function_regparm (TREE_TYPE (function), function) < 3) - return true; - - /* Need a free register for vcall_offset. */ - if (vcall_offset) - return false; - - /* Need a free register for GOT references. */ - if (flag_pic && !(*targetm.binds_local_p) (function)) - return false; - - /* Otherwise ok. */ - return true; -} - -/* Output the assembler code for a thunk function. THUNK_DECL is the - declaration for the thunk function itself, FUNCTION is the decl for - the target function. DELTA is an immediate constant offset to be - added to THIS. If VCALL_OFFSET is nonzero, the word at - *(*this + vcall_offset) should be added to THIS. */ - -static void -x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED, - tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta, - HOST_WIDE_INT vcall_offset, tree function) -{ - rtx xops[3]; - rtx this_param = x86_this_parameter (function); - rtx this_reg, tmp; - - /* If VCALL_OFFSET, we'll need THIS in a register. Might as well - pull it in now and let DELTA benefit. */ - if (REG_P (this_param)) - this_reg = this_param; - else if (vcall_offset) - { - /* Put the this parameter into %eax. */ - xops[0] = this_param; - xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG); - output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops); - } - else - this_reg = NULL_RTX; - - /* Adjust the this parameter by a fixed constant. */ - if (delta) - { - xops[0] = GEN_INT (delta); - xops[1] = this_reg ? this_reg : this_param; - if (TARGET_64BIT) - { - if (!x86_64_general_operand (xops[0], DImode)) - { - tmp = gen_rtx_REG (DImode, R10_REG); - xops[1] = tmp; - output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops); - xops[0] = tmp; - xops[1] = this_param; - } - output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops); - } - else - output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops); - } - - /* Adjust the this parameter by a value stored in the vtable. */ - if (vcall_offset) - { - if (TARGET_64BIT) - tmp = gen_rtx_REG (DImode, R10_REG); - else - { - int tmp_regno = CX_REG; - if (lookup_attribute ("fastcall", - TYPE_ATTRIBUTES (TREE_TYPE (function)))) - tmp_regno = AX_REG; - tmp = gen_rtx_REG (SImode, tmp_regno); - } - - xops[0] = gen_rtx_MEM (Pmode, this_reg); - xops[1] = tmp; - output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops); - - /* Adjust the this parameter. */ - xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset)); - if (TARGET_64BIT && !memory_operand (xops[0], Pmode)) - { - rtx tmp2 = gen_rtx_REG (DImode, R11_REG); - xops[0] = GEN_INT (vcall_offset); - xops[1] = tmp2; - output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops); - xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2)); - } - xops[1] = this_reg; - output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops); - } - - /* If necessary, drop THIS back to its stack slot. */ - if (this_reg && this_reg != this_param) - { - xops[0] = this_reg; - xops[1] = this_param; - output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops); - } - - xops[0] = XEXP (DECL_RTL (function), 0); - if (TARGET_64BIT) - { - if (!flag_pic || (*targetm.binds_local_p) (function)) - output_asm_insn ("jmp\t%P0", xops); - /* All thunks should be in the same object as their target, - and thus binds_local_p should be true. */ - else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI) - gcc_unreachable (); - else - { - tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL); - tmp = gen_rtx_CONST (Pmode, tmp); - tmp = gen_rtx_MEM (QImode, tmp); - xops[0] = tmp; - output_asm_insn ("jmp\t%A0", xops); - } - } - else - { - if (!flag_pic || (*targetm.binds_local_p) (function)) - output_asm_insn ("jmp\t%P0", xops); - else -#if TARGET_MACHO - if (TARGET_MACHO) - { - rtx sym_ref = XEXP (DECL_RTL (function), 0); - tmp = (gen_rtx_SYMBOL_REF - (Pmode, - machopic_indirection_name (sym_ref, /*stub_p=*/true))); - tmp = gen_rtx_MEM (QImode, tmp); - xops[0] = tmp; - output_asm_insn ("jmp\t%0", xops); - } - else -#endif /* TARGET_MACHO */ - { - tmp = gen_rtx_REG (SImode, CX_REG); - output_set_got (tmp, NULL_RTX); - - xops[1] = tmp; - output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops); - output_asm_insn ("jmp\t{*}%1", xops); - } - } -} - -static void -x86_file_start (void) -{ - default_file_start (); -#if TARGET_MACHO - darwin_file_start (); -#endif - if (X86_FILE_START_VERSION_DIRECTIVE) - fputs ("\t.version\t\"01.01\"\n", asm_out_file); - if (X86_FILE_START_FLTUSED) - fputs ("\t.global\t__fltused\n", asm_out_file); - if (ix86_asm_dialect == ASM_INTEL) - fputs ("\t.intel_syntax noprefix\n", asm_out_file); -} - -int -x86_field_alignment (tree field, int computed) -{ - enum machine_mode mode; - tree type = TREE_TYPE (field); - - if (TARGET_64BIT || TARGET_ALIGN_DOUBLE) - return computed; - mode = TYPE_MODE (strip_array_types (type)); - if (mode == DFmode || mode == DCmode - || GET_MODE_CLASS (mode) == MODE_INT - || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT) - return MIN (32, computed); - return computed; -} - -/* Output assembler code to FILE to increment profiler label # LABELNO - for profiling a function entry. */ -void -x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED) -{ - if (TARGET_64BIT) - { -#ifndef NO_PROFILE_COUNTERS - fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno); -#endif - - if (DEFAULT_ABI == SYSV_ABI && flag_pic) - fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME); - else - fprintf (file, "\tcall\t%s\n", MCOUNT_NAME); - } - else if (flag_pic) - { -#ifndef NO_PROFILE_COUNTERS - fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n", - LPREFIX, labelno, PROFILE_COUNT_REGISTER); -#endif - fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME); - } - else - { -#ifndef NO_PROFILE_COUNTERS - fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno, - PROFILE_COUNT_REGISTER); -#endif - fprintf (file, "\tcall\t%s\n", MCOUNT_NAME); - } -} - -/* We don't have exact information about the insn sizes, but we may assume - quite safely that we are informed about all 1 byte insns and memory - address sizes. This is enough to eliminate unnecessary padding in - 99% of cases. */ - -static int -min_insn_size (rtx insn) -{ - int l = 0; - - if (!INSN_P (insn) || !active_insn_p (insn)) - return 0; - - /* Discard alignments we've emit and jump instructions. */ - if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE - && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN) - return 0; - if (JUMP_P (insn) - && (GET_CODE (PATTERN (insn)) == ADDR_VEC - || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)) - return 0; - - /* Important case - calls are always 5 bytes. - It is common to have many calls in the row. */ - if (CALL_P (insn) - && symbolic_reference_mentioned_p (PATTERN (insn)) - && !SIBLING_CALL_P (insn)) - return 5; - if (get_attr_length (insn) <= 1) - return 1; - - /* For normal instructions we may rely on the sizes of addresses - and the presence of symbol to require 4 bytes of encoding. - This is not the case for jumps where references are PC relative. */ - if (!JUMP_P (insn)) - { - l = get_attr_length_address (insn); - if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn))) - l = 4; - } - if (l) - return 1+l; - else - return 2; -} - -/* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte - window. */ - -static void -ix86_avoid_jump_misspredicts (void) -{ - rtx insn, start = get_insns (); - int nbytes = 0, njumps = 0; - int isjump = 0; - - /* Look for all minimal intervals of instructions containing 4 jumps. - The intervals are bounded by START and INSN. NBYTES is the total - size of instructions in the interval including INSN and not including - START. When the NBYTES is smaller than 16 bytes, it is possible - that the end of START and INSN ends up in the same 16byte page. - - The smallest offset in the page INSN can start is the case where START - ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN). - We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN). - */ - for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) - { - - nbytes += min_insn_size (insn); - if (dump_file) - fprintf(dump_file, "Insn %i estimated to %i bytes\n", - INSN_UID (insn), min_insn_size (insn)); - if ((JUMP_P (insn) - && GET_CODE (PATTERN (insn)) != ADDR_VEC - && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC) - || CALL_P (insn)) - njumps++; - else - continue; - - while (njumps > 3) - { - start = NEXT_INSN (start); - if ((JUMP_P (start) - && GET_CODE (PATTERN (start)) != ADDR_VEC - && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC) - || CALL_P (start)) - njumps--, isjump = 1; - else - isjump = 0; - nbytes -= min_insn_size (start); - } - gcc_assert (njumps >= 0); - if (dump_file) - fprintf (dump_file, "Interval %i to %i has %i bytes\n", - INSN_UID (start), INSN_UID (insn), nbytes); - - if (njumps == 3 && isjump && nbytes < 16) - { - int padsize = 15 - nbytes + min_insn_size (insn); - - if (dump_file) - fprintf (dump_file, "Padding insn %i by %i bytes!\n", - INSN_UID (insn), padsize); - emit_insn_before (gen_align (GEN_INT (padsize)), insn); - } - } -} - -/* AMD Athlon works faster - when RET is not destination of conditional jump or directly preceded - by other jump instruction. We avoid the penalty by inserting NOP just - before the RET instructions in such cases. */ -static void -ix86_pad_returns (void) -{ - edge e; - edge_iterator ei; - - FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds) - { - basic_block bb = e->src; - rtx ret = BB_END (bb); - rtx prev; - bool replace = false; - - if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN - || optimize_bb_for_size_p (bb)) - continue; - for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev)) - if (active_insn_p (prev) || LABEL_P (prev)) - break; - if (prev && LABEL_P (prev)) - { - edge e; - edge_iterator ei; - - FOR_EACH_EDGE (e, ei, bb->preds) - if (EDGE_FREQUENCY (e) && e->src->index >= 0 - && !(e->flags & EDGE_FALLTHRU)) - replace = true; - } - if (!replace) - { - prev = prev_active_insn (ret); - if (prev - && ((JUMP_P (prev) && any_condjump_p (prev)) - || CALL_P (prev))) - replace = true; - /* Empty functions get branch mispredict even when the jump destination - is not visible to us. */ - if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED) - replace = true; - } - if (replace) - { - emit_insn_before (gen_return_internal_long (), ret); - delete_insn (ret); - } - } -} - -/* Count the minimum number of instructions in BB. Return 4 if the - number of instructions >= 4. */ - -static int -ix86_count_insn_bb (basic_block bb) -{ - rtx insn; - int insn_count = 0; - - /* Count number of instructions in this block. Return 4 if the number - of instructions >= 4. */ - FOR_BB_INSNS (bb, insn) - { - /* Only happen in exit blocks. */ - if (JUMP_P (insn) - && GET_CODE (PATTERN (insn)) == RETURN) - break; - - if (NONDEBUG_INSN_P (insn) - && GET_CODE (PATTERN (insn)) != USE - && GET_CODE (PATTERN (insn)) != CLOBBER) - { - insn_count++; - if (insn_count >= 4) - return insn_count; - } - } - - return insn_count; -} - - -/* Count the minimum number of instructions in code path in BB. - Return 4 if the number of instructions >= 4. */ - -static int -ix86_count_insn (basic_block bb) -{ - edge e; - edge_iterator ei; - int min_prev_count; - - /* Only bother counting instructions along paths with no - more than 2 basic blocks between entry and exit. Given - that BB has an edge to exit, determine if a predecessor - of BB has an edge from entry. If so, compute the number - of instructions in the predecessor block. If there - happen to be multiple such blocks, compute the minimum. */ - min_prev_count = 4; - FOR_EACH_EDGE (e, ei, bb->preds) - { - edge prev_e; - edge_iterator prev_ei; - - if (e->src == ENTRY_BLOCK_PTR) - { - min_prev_count = 0; - break; - } - FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds) - { - if (prev_e->src == ENTRY_BLOCK_PTR) - { - int count = ix86_count_insn_bb (e->src); - if (count < min_prev_count) - min_prev_count = count; - break; - } - } - } - - if (min_prev_count < 4) - min_prev_count += ix86_count_insn_bb (bb); - - return min_prev_count; -} - -/* Pad short funtion to 4 instructions. */ - -static void -ix86_pad_short_function (void) -{ - edge e; - edge_iterator ei; - - FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds) - { - rtx ret = BB_END (e->src); - if (JUMP_P (ret) && GET_CODE (PATTERN (ret)) == RETURN) - { - int insn_count = ix86_count_insn (e->src); - - /* Pad short function. */ - if (insn_count < 4) - { - rtx insn = ret; - - /* Find epilogue. */ - while (insn - && (!NOTE_P (insn) - || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG)) - insn = PREV_INSN (insn); - - if (!insn) - insn = ret; - - /* Two NOPs are counted as one instruction. */ - insn_count = 2 * (4 - insn_count); - emit_insn_before (gen_nops (GEN_INT (insn_count)), insn); - } - } - } -} - -/* Implement machine specific optimizations. We implement padding of returns - for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */ -static void -ix86_reorg (void) -{ - if (optimize && optimize_function_for_speed_p (cfun)) - { - if (TARGET_PAD_SHORT_FUNCTION) - ix86_pad_short_function (); - else if (TARGET_PAD_RETURNS) - ix86_pad_returns (); - if (TARGET_FOUR_JUMP_LIMIT) - ix86_avoid_jump_misspredicts (); - } -} - -/* Return nonzero when QImode register that must be represented via REX prefix - is used. */ -bool -x86_extended_QIreg_mentioned_p (rtx insn) -{ - int i; - extract_insn_cached (insn); - for (i = 0; i < recog_data.n_operands; i++) - if (REG_P (recog_data.operand[i]) - && REGNO (recog_data.operand[i]) > BX_REG) - return true; - return false; -} - -/* Return nonzero when P points to register encoded via REX prefix. - Called via for_each_rtx. */ -static int -extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED) -{ - unsigned int regno; - if (!REG_P (*p)) - return 0; - regno = REGNO (*p); - return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno); -} - -/* Return true when INSN mentions register that must be encoded using REX - prefix. */ -bool -x86_extended_reg_mentioned_p (rtx insn) -{ - return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn, - extended_reg_mentioned_1, NULL); -} - -/* Generate an unsigned DImode/SImode to FP conversion. This is the same code - optabs would emit if we didn't have TFmode patterns. */ - -void -x86_emit_floatuns (rtx operands[2]) -{ - rtx neglab, donelab, i0, i1, f0, in, out; - enum machine_mode mode, inmode; - - inmode = GET_MODE (operands[1]); - gcc_assert (inmode == SImode || inmode == DImode); - - out = operands[0]; - in = force_reg (inmode, operands[1]); - mode = GET_MODE (out); - neglab = gen_label_rtx (); - donelab = gen_label_rtx (); - f0 = gen_reg_rtx (mode); - - emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab); - - expand_float (out, in, 0); - - emit_jump_insn (gen_jump (donelab)); - emit_barrier (); - - emit_label (neglab); - - i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL, - 1, OPTAB_DIRECT); - i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL, - 1, OPTAB_DIRECT); - i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT); - - expand_float (f0, i0, 0); - - emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0))); - - emit_label (donelab); -} - -/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector - with all elements equal to VAR. Return true if successful. */ - -static bool -ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode, - rtx target, rtx val) -{ - enum machine_mode hmode, smode, wsmode, wvmode; - rtx x; - - switch (mode) - { - case V2SImode: - case V2SFmode: - if (!mmx_ok) - return false; - /* FALLTHRU */ - - case V2DFmode: - case V2DImode: - case V4SFmode: - case V4SImode: - val = force_reg (GET_MODE_INNER (mode), val); - x = gen_rtx_VEC_DUPLICATE (mode, val); - emit_insn (gen_rtx_SET (VOIDmode, target, x)); - return true; - - case V4HImode: - if (!mmx_ok) - return false; - if (TARGET_SSE || TARGET_3DNOW_A) - { - val = gen_lowpart (SImode, val); - x = gen_rtx_TRUNCATE (HImode, val); - x = gen_rtx_VEC_DUPLICATE (mode, x); - emit_insn (gen_rtx_SET (VOIDmode, target, x)); - return true; - } - else - { - smode = HImode; - wsmode = SImode; - wvmode = V2SImode; - goto widen; - } - - case V8QImode: - if (!mmx_ok) - return false; - smode = QImode; - wsmode = HImode; - wvmode = V4HImode; - goto widen; - case V8HImode: - if (TARGET_SSE2) - { - rtx tmp1, tmp2; - /* Extend HImode to SImode using a paradoxical SUBREG. */ - tmp1 = gen_reg_rtx (SImode); - emit_move_insn (tmp1, gen_lowpart (SImode, val)); - /* Insert the SImode value as low element of V4SImode vector. */ - tmp2 = gen_reg_rtx (V4SImode); - tmp1 = gen_rtx_VEC_MERGE (V4SImode, - gen_rtx_VEC_DUPLICATE (V4SImode, tmp1), - CONST0_RTX (V4SImode), - const1_rtx); - emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1)); - /* Cast the V4SImode vector back to a V8HImode vector. */ - tmp1 = gen_reg_rtx (V8HImode); - emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2)); - /* Duplicate the low short through the whole low SImode word. */ - emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1)); - /* Cast the V8HImode vector back to a V4SImode vector. */ - tmp2 = gen_reg_rtx (V4SImode); - emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1)); - /* Replicate the low element of the V4SImode vector. */ - emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx)); - /* Cast the V2SImode back to V8HImode, and store in target. */ - emit_move_insn (target, gen_lowpart (V8HImode, tmp2)); - return true; - } - smode = HImode; - wsmode = SImode; - wvmode = V4SImode; - goto widen; - case V16QImode: - if (TARGET_SSE2) - { - rtx tmp1, tmp2; - /* Extend QImode to SImode using a paradoxical SUBREG. */ - tmp1 = gen_reg_rtx (SImode); - emit_move_insn (tmp1, gen_lowpart (SImode, val)); - /* Insert the SImode value as low element of V4SImode vector. */ - tmp2 = gen_reg_rtx (V4SImode); - tmp1 = gen_rtx_VEC_MERGE (V4SImode, - gen_rtx_VEC_DUPLICATE (V4SImode, tmp1), - CONST0_RTX (V4SImode), - const1_rtx); - emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1)); - /* Cast the V4SImode vector back to a V16QImode vector. */ - tmp1 = gen_reg_rtx (V16QImode); - emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2)); - /* Duplicate the low byte through the whole low SImode word. */ - emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1)); - emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1)); - /* Cast the V16QImode vector back to a V4SImode vector. */ - tmp2 = gen_reg_rtx (V4SImode); - emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1)); - /* Replicate the low element of the V4SImode vector. */ - emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx)); - /* Cast the V2SImode back to V16QImode, and store in target. */ - emit_move_insn (target, gen_lowpart (V16QImode, tmp2)); - return true; - } - smode = QImode; - wsmode = HImode; - wvmode = V8HImode; - goto widen; - widen: - /* Replicate the value once into the next wider mode and recurse. */ - val = convert_modes (wsmode, smode, val, true); - x = expand_simple_binop (wsmode, ASHIFT, val, - GEN_INT (GET_MODE_BITSIZE (smode)), - NULL_RTX, 1, OPTAB_LIB_WIDEN); - val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN); - - x = gen_reg_rtx (wvmode); - if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val)) - gcc_unreachable (); - emit_move_insn (target, gen_lowpart (mode, x)); - return true; - - case V4DFmode: - hmode = V2DFmode; - goto half; - case V4DImode: - hmode = V2DImode; - goto half; - case V8SFmode: - hmode = V4SFmode; - goto half; - case V8SImode: - hmode = V4SImode; - goto half; - case V16HImode: - hmode = V8HImode; - goto half; - case V32QImode: - hmode = V16QImode; - goto half; -half: - { - rtx tmp = gen_reg_rtx (hmode); - ix86_expand_vector_init_duplicate (mmx_ok, hmode, tmp, val); - emit_insn (gen_rtx_SET (VOIDmode, target, - gen_rtx_VEC_CONCAT (mode, tmp, tmp))); - } - return true; - - default: - return false; - } -} - -/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector - whose ONE_VAR element is VAR, and other elements are zero. Return true - if successful. */ - -static bool -ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode, - rtx target, rtx var, int one_var) -{ - enum machine_mode vsimode; - rtx new_target; - rtx x, tmp; - bool use_vector_set = false; - - switch (mode) - { - case V2DImode: - /* For SSE4.1, we normally use vector set. But if the second - element is zero and inter-unit moves are OK, we use movq - instead. */ - use_vector_set = (TARGET_64BIT - && TARGET_SSE4_1 - && !(TARGET_INTER_UNIT_MOVES - && one_var == 0)); - break; - case V16QImode: - case V4SImode: - case V4SFmode: - use_vector_set = TARGET_SSE4_1; - break; - case V8HImode: - use_vector_set = TARGET_SSE2; - break; - case V4HImode: - use_vector_set = TARGET_SSE || TARGET_3DNOW_A; - break; - case V32QImode: - case V16HImode: - case V8SImode: - case V8SFmode: - case V4DFmode: - use_vector_set = TARGET_AVX; - break; - case V4DImode: - /* Use ix86_expand_vector_set in 64bit mode only. */ - use_vector_set = TARGET_AVX && TARGET_64BIT; - break; - default: - break; - } - - if (use_vector_set) - { - emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode))); - var = force_reg (GET_MODE_INNER (mode), var); - ix86_expand_vector_set (mmx_ok, target, var, one_var); - return true; - } - - switch (mode) - { - case V2SFmode: - case V2SImode: - if (!mmx_ok) - return false; - /* FALLTHRU */ - - case V2DFmode: - case V2DImode: - if (one_var != 0) - return false; - var = force_reg (GET_MODE_INNER (mode), var); - x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode))); - emit_insn (gen_rtx_SET (VOIDmode, target, x)); - return true; - - case V4SFmode: - case V4SImode: - if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER) - new_target = gen_reg_rtx (mode); - else - new_target = target; - var = force_reg (GET_MODE_INNER (mode), var); - x = gen_rtx_VEC_DUPLICATE (mode, var); - x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx); - emit_insn (gen_rtx_SET (VOIDmode, new_target, x)); - if (one_var != 0) - { - /* We need to shuffle the value to the correct position, so - create a new pseudo to store the intermediate result. */ - - /* With SSE2, we can use the integer shuffle insns. */ - if (mode != V4SFmode && TARGET_SSE2) - { - emit_insn (gen_sse2_pshufd_1 (new_target, new_target, - GEN_INT (1), - GEN_INT (one_var == 1 ? 0 : 1), - GEN_INT (one_var == 2 ? 0 : 1), - GEN_INT (one_var == 3 ? 0 : 1))); - if (target != new_target) - emit_move_insn (target, new_target); - return true; - } - - /* Otherwise convert the intermediate result to V4SFmode and - use the SSE1 shuffle instructions. */ - if (mode != V4SFmode) - { - tmp = gen_reg_rtx (V4SFmode); - emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target)); - } - else - tmp = new_target; - - emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp, - GEN_INT (1), - GEN_INT (one_var == 1 ? 0 : 1), - GEN_INT (one_var == 2 ? 0+4 : 1+4), - GEN_INT (one_var == 3 ? 0+4 : 1+4))); - - if (mode != V4SFmode) - emit_move_insn (target, gen_lowpart (V4SImode, tmp)); - else if (tmp != target) - emit_move_insn (target, tmp); - } - else if (target != new_target) - emit_move_insn (target, new_target); - return true; - - case V8HImode: - case V16QImode: - vsimode = V4SImode; - goto widen; - case V4HImode: - case V8QImode: - if (!mmx_ok) - return false; - vsimode = V2SImode; - goto widen; - widen: - if (one_var != 0) - return false; - - /* Zero extend the variable element to SImode and recurse. */ - var = convert_modes (SImode, GET_MODE_INNER (mode), var, true); - - x = gen_reg_rtx (vsimode); - if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x, - var, one_var)) - gcc_unreachable (); - - emit_move_insn (target, gen_lowpart (mode, x)); - return true; - - default: - return false; - } -} - -/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector - consisting of the values in VALS. It is known that all elements - except ONE_VAR are constants. Return true if successful. */ - -static bool -ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode, - rtx target, rtx vals, int one_var) -{ - rtx var = XVECEXP (vals, 0, one_var); - enum machine_mode wmode; - rtx const_vec, x; - - const_vec = copy_rtx (vals); - XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode)); - const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0)); - - switch (mode) - { - case V2DFmode: - case V2DImode: - case V2SFmode: - case V2SImode: - /* For the two element vectors, it's just as easy to use - the general case. */ - return false; - - case V4DImode: - /* Use ix86_expand_vector_set in 64bit mode only. */ - if (!TARGET_64BIT) - return false; - case V4DFmode: - case V8SFmode: - case V8SImode: - case V16HImode: - case V32QImode: - case V4SFmode: - case V4SImode: - case V8HImode: - case V4HImode: - break; - - case V16QImode: - if (TARGET_SSE4_1) - break; - wmode = V8HImode; - goto widen; - case V8QImode: - wmode = V4HImode; - goto widen; - widen: - /* There's no way to set one QImode entry easily. Combine - the variable value with its adjacent constant value, and - promote to an HImode set. */ - x = XVECEXP (vals, 0, one_var ^ 1); - if (one_var & 1) - { - var = convert_modes (HImode, QImode, var, true); - var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8), - NULL_RTX, 1, OPTAB_LIB_WIDEN); - x = GEN_INT (INTVAL (x) & 0xff); - } - else - { - var = convert_modes (HImode, QImode, var, true); - x = gen_int_mode (INTVAL (x) << 8, HImode); - } - if (x != const0_rtx) - var = expand_simple_binop (HImode, IOR, var, x, var, - 1, OPTAB_LIB_WIDEN); - - x = gen_reg_rtx (wmode); - emit_move_insn (x, gen_lowpart (wmode, const_vec)); - ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1); - - emit_move_insn (target, gen_lowpart (mode, x)); - return true; - - default: - return false; - } - - emit_move_insn (target, const_vec); - ix86_expand_vector_set (mmx_ok, target, var, one_var); - return true; -} - -/* A subroutine of ix86_expand_vector_init_general. Use vector - concatenate to handle the most general case: all values variable, - and none identical. */ - -static void -ix86_expand_vector_init_concat (enum machine_mode mode, - rtx target, rtx *ops, int n) -{ - enum machine_mode cmode, hmode = VOIDmode; - rtx first[8], second[4]; - rtvec v; - int i, j; - - switch (n) - { - case 2: - switch (mode) - { - case V8SImode: - cmode = V4SImode; - break; - case V8SFmode: - cmode = V4SFmode; - break; - case V4DImode: - cmode = V2DImode; - break; - case V4DFmode: - cmode = V2DFmode; - break; - case V4SImode: - cmode = V2SImode; - break; - case V4SFmode: - cmode = V2SFmode; - break; - case V2DImode: - cmode = DImode; - break; - case V2SImode: - cmode = SImode; - break; - case V2DFmode: - cmode = DFmode; - break; - case V2SFmode: - cmode = SFmode; - break; - default: - gcc_unreachable (); - } - - if (!register_operand (ops[1], cmode)) - ops[1] = force_reg (cmode, ops[1]); - if (!register_operand (ops[0], cmode)) - ops[0] = force_reg (cmode, ops[0]); - emit_insn (gen_rtx_SET (VOIDmode, target, - gen_rtx_VEC_CONCAT (mode, ops[0], - ops[1]))); - break; - - case 4: - switch (mode) - { - case V4DImode: - cmode = V2DImode; - break; - case V4DFmode: - cmode = V2DFmode; - break; - case V4SImode: - cmode = V2SImode; - break; - case V4SFmode: - cmode = V2SFmode; - break; - default: - gcc_unreachable (); - } - goto half; - - case 8: - switch (mode) - { - case V8SImode: - cmode = V2SImode; - hmode = V4SImode; - break; - case V8SFmode: - cmode = V2SFmode; - hmode = V4SFmode; - break; - default: - gcc_unreachable (); - } - goto half; - -half: - /* FIXME: We process inputs backward to help RA. PR 36222. */ - i = n - 1; - j = (n >> 1) - 1; - for (; i > 0; i -= 2, j--) - { - first[j] = gen_reg_rtx (cmode); - v = gen_rtvec (2, ops[i - 1], ops[i]); - ix86_expand_vector_init (false, first[j], - gen_rtx_PARALLEL (cmode, v)); - } - - n >>= 1; - if (n > 2) - { - gcc_assert (hmode != VOIDmode); - for (i = j = 0; i < n; i += 2, j++) - { - second[j] = gen_reg_rtx (hmode); - ix86_expand_vector_init_concat (hmode, second [j], - &first [i], 2); - } - n >>= 1; - ix86_expand_vector_init_concat (mode, target, second, n); - } - else - ix86_expand_vector_init_concat (mode, target, first, n); - break; - - default: - gcc_unreachable (); - } -} - -/* A subroutine of ix86_expand_vector_init_general. Use vector - interleave to handle the most general case: all values variable, - and none identical. */ - -static void -ix86_expand_vector_init_interleave (enum machine_mode mode, - rtx target, rtx *ops, int n) -{ - enum machine_mode first_imode, second_imode, third_imode, inner_mode; - int i, j; - rtx op0, op1; - rtx (*gen_load_even) (rtx, rtx, rtx); - rtx (*gen_interleave_first_low) (rtx, rtx, rtx); - rtx (*gen_interleave_second_low) (rtx, rtx, rtx); - - switch (mode) - { - case V8HImode: - gen_load_even = gen_vec_setv8hi; - gen_interleave_first_low = gen_vec_interleave_lowv4si; - gen_interleave_second_low = gen_vec_interleave_lowv2di; - inner_mode = HImode; - first_imode = V4SImode; - second_imode = V2DImode; - third_imode = VOIDmode; - break; - case V16QImode: - gen_load_even = gen_vec_setv16qi; - gen_interleave_first_low = gen_vec_interleave_lowv8hi; - gen_interleave_second_low = gen_vec_interleave_lowv4si; - inner_mode = QImode; - first_imode = V8HImode; - second_imode = V4SImode; - third_imode = V2DImode; - break; - default: - gcc_unreachable (); - } - - for (i = 0; i < n; i++) - { - /* Extend the odd elment to SImode using a paradoxical SUBREG. */ - op0 = gen_reg_rtx (SImode); - emit_move_insn (op0, gen_lowpart (SImode, ops [i + i])); - - /* Insert the SImode value as low element of V4SImode vector. */ - op1 = gen_reg_rtx (V4SImode); - op0 = gen_rtx_VEC_MERGE (V4SImode, - gen_rtx_VEC_DUPLICATE (V4SImode, - op0), - CONST0_RTX (V4SImode), - const1_rtx); - emit_insn (gen_rtx_SET (VOIDmode, op1, op0)); - - /* Cast the V4SImode vector back to a vector in orignal mode. */ - op0 = gen_reg_rtx (mode); - emit_move_insn (op0, gen_lowpart (mode, op1)); - - /* Load even elements into the second positon. */ - emit_insn ((*gen_load_even) (op0, - force_reg (inner_mode, - ops [i + i + 1]), - const1_rtx)); - - /* Cast vector to FIRST_IMODE vector. */ - ops[i] = gen_reg_rtx (first_imode); - emit_move_insn (ops[i], gen_lowpart (first_imode, op0)); - } - - /* Interleave low FIRST_IMODE vectors. */ - for (i = j = 0; i < n; i += 2, j++) - { - op0 = gen_reg_rtx (first_imode); - emit_insn ((*gen_interleave_first_low) (op0, ops[i], ops[i + 1])); - - /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */ - ops[j] = gen_reg_rtx (second_imode); - emit_move_insn (ops[j], gen_lowpart (second_imode, op0)); - } - - /* Interleave low SECOND_IMODE vectors. */ - switch (second_imode) - { - case V4SImode: - for (i = j = 0; i < n / 2; i += 2, j++) - { - op0 = gen_reg_rtx (second_imode); - emit_insn ((*gen_interleave_second_low) (op0, ops[i], - ops[i + 1])); - - /* Cast the SECOND_IMODE vector to the THIRD_IMODE - vector. */ - ops[j] = gen_reg_rtx (third_imode); - emit_move_insn (ops[j], gen_lowpart (third_imode, op0)); - } - second_imode = V2DImode; - gen_interleave_second_low = gen_vec_interleave_lowv2di; - /* FALLTHRU */ - - case V2DImode: - op0 = gen_reg_rtx (second_imode); - emit_insn ((*gen_interleave_second_low) (op0, ops[0], - ops[1])); - - /* Cast the SECOND_IMODE vector back to a vector on original - mode. */ - emit_insn (gen_rtx_SET (VOIDmode, target, - gen_lowpart (mode, op0))); - break; - - default: - gcc_unreachable (); - } -} - -/* A subroutine of ix86_expand_vector_init. Handle the most general case: - all values variable, and none identical. */ - -static void -ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode, - rtx target, rtx vals) -{ - rtx ops[32], op0, op1; - enum machine_mode half_mode = VOIDmode; - int n, i; - - switch (mode) - { - case V2SFmode: - case V2SImode: - if (!mmx_ok && !TARGET_SSE) - break; - /* FALLTHRU */ - - case V8SFmode: - case V8SImode: - case V4DFmode: - case V4DImode: - case V4SFmode: - case V4SImode: - case V2DFmode: - case V2DImode: - n = GET_MODE_NUNITS (mode); - for (i = 0; i < n; i++) - ops[i] = XVECEXP (vals, 0, i); - ix86_expand_vector_init_concat (mode, target, ops, n); - return; - - case V32QImode: - half_mode = V16QImode; - goto half; - - case V16HImode: - half_mode = V8HImode; - goto half; - -half: - n = GET_MODE_NUNITS (mode); - for (i = 0; i < n; i++) - ops[i] = XVECEXP (vals, 0, i); - op0 = gen_reg_rtx (half_mode); - op1 = gen_reg_rtx (half_mode); - ix86_expand_vector_init_interleave (half_mode, op0, ops, - n >> 2); - ix86_expand_vector_init_interleave (half_mode, op1, - &ops [n >> 1], n >> 2); - emit_insn (gen_rtx_SET (VOIDmode, target, - gen_rtx_VEC_CONCAT (mode, op0, op1))); - return; - - case V16QImode: - if (!TARGET_SSE4_1) - break; - /* FALLTHRU */ - - case V8HImode: - if (!TARGET_SSE2) - break; - - /* Don't use ix86_expand_vector_init_interleave if we can't - move from GPR to SSE register directly. */ - if (!TARGET_INTER_UNIT_MOVES) - break; - - n = GET_MODE_NUNITS (mode); - for (i = 0; i < n; i++) - ops[i] = XVECEXP (vals, 0, i); - ix86_expand_vector_init_interleave (mode, target, ops, n >> 1); - return; - - case V4HImode: - case V8QImode: - break; - - default: - gcc_unreachable (); - } - - { - int i, j, n_elts, n_words, n_elt_per_word; - enum machine_mode inner_mode; - rtx words[4], shift; - - inner_mode = GET_MODE_INNER (mode); - n_elts = GET_MODE_NUNITS (mode); - n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD; - n_elt_per_word = n_elts / n_words; - shift = GEN_INT (GET_MODE_BITSIZE (inner_mode)); - - for (i = 0; i < n_words; ++i) - { - rtx word = NULL_RTX; - - for (j = 0; j < n_elt_per_word; ++j) - { - rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1); - elt = convert_modes (word_mode, inner_mode, elt, true); - - if (j == 0) - word = elt; - else - { - word = expand_simple_binop (word_mode, ASHIFT, word, shift, - word, 1, OPTAB_LIB_WIDEN); - word = expand_simple_binop (word_mode, IOR, word, elt, - word, 1, OPTAB_LIB_WIDEN); - } - } - - words[i] = word; - } - - if (n_words == 1) - emit_move_insn (target, gen_lowpart (mode, words[0])); - else if (n_words == 2) - { - rtx tmp = gen_reg_rtx (mode); - emit_clobber (tmp); - emit_move_insn (gen_lowpart (word_mode, tmp), words[0]); - emit_move_insn (gen_highpart (word_mode, tmp), words[1]); - emit_move_insn (target, tmp); - } - else if (n_words == 4) - { - rtx tmp = gen_reg_rtx (V4SImode); - gcc_assert (word_mode == SImode); - vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words)); - ix86_expand_vector_init_general (false, V4SImode, tmp, vals); - emit_move_insn (target, gen_lowpart (mode, tmp)); - } - else - gcc_unreachable (); - } -} - -/* Initialize vector TARGET via VALS. Suppress the use of MMX - instructions unless MMX_OK is true. */ - -void -ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals) -{ - enum machine_mode mode = GET_MODE (target); - enum machine_mode inner_mode = GET_MODE_INNER (mode); - int n_elts = GET_MODE_NUNITS (mode); - int n_var = 0, one_var = -1; - bool all_same = true, all_const_zero = true; - int i; - rtx x; - - for (i = 0; i < n_elts; ++i) - { - x = XVECEXP (vals, 0, i); - if (!(CONST_INT_P (x) - || GET_CODE (x) == CONST_DOUBLE - || GET_CODE (x) == CONST_FIXED)) - n_var++, one_var = i; - else if (x != CONST0_RTX (inner_mode)) - all_const_zero = false; - if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0))) - all_same = false; - } - - /* Constants are best loaded from the constant pool. */ - if (n_var == 0) - { - emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0))); - return; - } - - /* If all values are identical, broadcast the value. */ - if (all_same - && ix86_expand_vector_init_duplicate (mmx_ok, mode, target, - XVECEXP (vals, 0, 0))) - return; - - /* Values where only one field is non-constant are best loaded from - the pool and overwritten via move later. */ - if (n_var == 1) - { - if (all_const_zero - && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target, - XVECEXP (vals, 0, one_var), - one_var)) - return; - - if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var)) - return; - } - - ix86_expand_vector_init_general (mmx_ok, mode, target, vals); -} - -void -ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt) -{ - enum machine_mode mode = GET_MODE (target); - enum machine_mode inner_mode = GET_MODE_INNER (mode); - enum machine_mode half_mode; - bool use_vec_merge = false; - rtx tmp; - static rtx (*gen_extract[6][2]) (rtx, rtx) - = { - { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi }, - { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi }, - { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si }, - { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di }, - { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf }, - { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df } - }; - static rtx (*gen_insert[6][2]) (rtx, rtx, rtx) - = { - { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi }, - { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi }, - { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si }, - { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di }, - { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf }, - { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df } - }; - int i, j, n; - - switch (mode) - { - case V2SFmode: - case V2SImode: - if (mmx_ok) - { - tmp = gen_reg_rtx (GET_MODE_INNER (mode)); - ix86_expand_vector_extract (true, tmp, target, 1 - elt); - if (elt == 0) - tmp = gen_rtx_VEC_CONCAT (mode, tmp, val); - else - tmp = gen_rtx_VEC_CONCAT (mode, val, tmp); - emit_insn (gen_rtx_SET (VOIDmode, target, tmp)); - return; - } - break; - - case V2DImode: - use_vec_merge = TARGET_SSE4_1; - if (use_vec_merge) - break; - - case V2DFmode: - { - rtx op0, op1; - - /* For the two element vectors, we implement a VEC_CONCAT with - the extraction of the other element. */ - - tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt))); - tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp); - - if (elt == 0) - op0 = val, op1 = tmp; - else - op0 = tmp, op1 = val; - - tmp = gen_rtx_VEC_CONCAT (mode, op0, op1); - emit_insn (gen_rtx_SET (VOIDmode, target, tmp)); - } - return; - - case V4SFmode: - use_vec_merge = TARGET_SSE4_1; - if (use_vec_merge) - break; - - switch (elt) - { - case 0: - use_vec_merge = true; - break; - - case 1: - /* tmp = target = A B C D */ - tmp = copy_to_reg (target); - /* target = A A B B */ - emit_insn (gen_sse_unpcklps (target, target, target)); - /* target = X A B B */ - ix86_expand_vector_set (false, target, val, 0); - /* target = A X C D */ - emit_insn (gen_sse_shufps_v4sf (target, target, tmp, - GEN_INT (1), GEN_INT (0), - GEN_INT (2+4), GEN_INT (3+4))); - return; - - case 2: - /* tmp = target = A B C D */ - tmp = copy_to_reg (target); - /* tmp = X B C D */ - ix86_expand_vector_set (false, tmp, val, 0); - /* target = A B X D */ - emit_insn (gen_sse_shufps_v4sf (target, target, tmp, - GEN_INT (0), GEN_INT (1), - GEN_INT (0+4), GEN_INT (3+4))); - return; - - case 3: - /* tmp = target = A B C D */ - tmp = copy_to_reg (target); - /* tmp = X B C D */ - ix86_expand_vector_set (false, tmp, val, 0); - /* target = A B X D */ - emit_insn (gen_sse_shufps_v4sf (target, target, tmp, - GEN_INT (0), GEN_INT (1), - GEN_INT (2+4), GEN_INT (0+4))); - return; - - default: - gcc_unreachable (); - } - break; - - case V4SImode: - use_vec_merge = TARGET_SSE4_1; - if (use_vec_merge) - break; - - /* Element 0 handled by vec_merge below. */ - if (elt == 0) - { - use_vec_merge = true; - break; - } - - if (TARGET_SSE2) - { - /* With SSE2, use integer shuffles to swap element 0 and ELT, - store into element 0, then shuffle them back. */ - - rtx order[4]; - - order[0] = GEN_INT (elt); - order[1] = const1_rtx; - order[2] = const2_rtx; - order[3] = GEN_INT (3); - order[elt] = const0_rtx; - - emit_insn (gen_sse2_pshufd_1 (target, target, order[0], - order[1], order[2], order[3])); - - ix86_expand_vector_set (false, target, val, 0); - - emit_insn (gen_sse2_pshufd_1 (target, target, order[0], - order[1], order[2], order[3])); - } - else - { - /* For SSE1, we have to reuse the V4SF code. */ - ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target), - gen_lowpart (SFmode, val), elt); - } - return; - - case V8HImode: - use_vec_merge = TARGET_SSE2; - break; - case V4HImode: - use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A); - break; - - case V16QImode: - use_vec_merge = TARGET_SSE4_1; - break; - - case V8QImode: - break; - - case V32QImode: - half_mode = V16QImode; - j = 0; - n = 16; - goto half; - - case V16HImode: - half_mode = V8HImode; - j = 1; - n = 8; - goto half; - - case V8SImode: - half_mode = V4SImode; - j = 2; - n = 4; - goto half; - - case V4DImode: - half_mode = V2DImode; - j = 3; - n = 2; - goto half; - - case V8SFmode: - half_mode = V4SFmode; - j = 4; - n = 4; - goto half; - - case V4DFmode: - half_mode = V2DFmode; - j = 5; - n = 2; - goto half; - -half: - /* Compute offset. */ - i = elt / n; - elt %= n; - - gcc_assert (i <= 1); - - /* Extract the half. */ - tmp = gen_reg_rtx (half_mode); - emit_insn ((*gen_extract[j][i]) (tmp, target)); - - /* Put val in tmp at elt. */ - ix86_expand_vector_set (false, tmp, val, elt); - - /* Put it back. */ - emit_insn ((*gen_insert[j][i]) (target, target, tmp)); - return; - - default: - break; - } - - if (use_vec_merge) - { - tmp = gen_rtx_VEC_DUPLICATE (mode, val); - tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt)); - emit_insn (gen_rtx_SET (VOIDmode, target, tmp)); - } - else - { - rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false); - - emit_move_insn (mem, target); - - tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode)); - emit_move_insn (tmp, val); - - emit_move_insn (target, mem); - } -} - -void -ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt) -{ - enum machine_mode mode = GET_MODE (vec); - enum machine_mode inner_mode = GET_MODE_INNER (mode); - bool use_vec_extr = false; - rtx tmp; - - switch (mode) - { - case V2SImode: - case V2SFmode: - if (!mmx_ok) - break; - /* FALLTHRU */ - - case V2DFmode: - case V2DImode: - use_vec_extr = true; - break; - - case V4SFmode: - use_vec_extr = TARGET_SSE4_1; - if (use_vec_extr) - break; - - switch (elt) - { - case 0: - tmp = vec; - break; - - case 1: - case 3: - tmp = gen_reg_rtx (mode); - emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec, - GEN_INT (elt), GEN_INT (elt), - GEN_INT (elt+4), GEN_INT (elt+4))); - break; - - case 2: - tmp = gen_reg_rtx (mode); - emit_insn (gen_sse_unpckhps (tmp, vec, vec)); - break; - - default: - gcc_unreachable (); - } - vec = tmp; - use_vec_extr = true; - elt = 0; - break; - - case V4SImode: - use_vec_extr = TARGET_SSE4_1; - if (use_vec_extr) - break; - - if (TARGET_SSE2) - { - switch (elt) - { - case 0: - tmp = vec; - break; - - case 1: - case 3: - tmp = gen_reg_rtx (mode); - emit_insn (gen_sse2_pshufd_1 (tmp, vec, - GEN_INT (elt), GEN_INT (elt), - GEN_INT (elt), GEN_INT (elt))); - break; - - case 2: - tmp = gen_reg_rtx (mode); - emit_insn (gen_sse2_punpckhdq (tmp, vec, vec)); - break; - - default: - gcc_unreachable (); - } - vec = tmp; - use_vec_extr = true; - elt = 0; - } - else - { - /* For SSE1, we have to reuse the V4SF code. */ - ix86_expand_vector_extract (false, gen_lowpart (SFmode, target), - gen_lowpart (V4SFmode, vec), elt); - return; - } - break; - - case V8HImode: - use_vec_extr = TARGET_SSE2; - break; - case V4HImode: - use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A); - break; - - case V16QImode: - use_vec_extr = TARGET_SSE4_1; - break; - - case V8QImode: - /* ??? Could extract the appropriate HImode element and shift. */ - default: - break; - } - - if (use_vec_extr) - { - tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt))); - tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp); - - /* Let the rtl optimizers know about the zero extension performed. */ - if (inner_mode == QImode || inner_mode == HImode) - { - tmp = gen_rtx_ZERO_EXTEND (SImode, tmp); - target = gen_lowpart (SImode, target); - } - - emit_insn (gen_rtx_SET (VOIDmode, target, tmp)); - } - else - { - rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false); - - emit_move_insn (mem, vec); - - tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode)); - emit_move_insn (target, tmp); - } -} - -/* Expand a vector reduction on V4SFmode for SSE1. FN is the binary - pattern to reduce; DEST is the destination; IN is the input vector. */ - -void -ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in) -{ - rtx tmp1, tmp2, tmp3; - - tmp1 = gen_reg_rtx (V4SFmode); - tmp2 = gen_reg_rtx (V4SFmode); - tmp3 = gen_reg_rtx (V4SFmode); - - emit_insn (gen_sse_movhlps (tmp1, in, in)); - emit_insn (fn (tmp2, tmp1, in)); - - emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2, - GEN_INT (1), GEN_INT (1), - GEN_INT (1+4), GEN_INT (1+4))); - emit_insn (fn (dest, tmp2, tmp3)); -} - -/* Target hook for scalar_mode_supported_p. */ -static bool -ix86_scalar_mode_supported_p (enum machine_mode mode) -{ - if (DECIMAL_FLOAT_MODE_P (mode)) - return default_decimal_float_supported_p (); - else if (mode == TFmode) - return true; - else - return default_scalar_mode_supported_p (mode); -} - -/* Implements target hook vector_mode_supported_p. */ -static bool -ix86_vector_mode_supported_p (enum machine_mode mode) -{ - if (TARGET_SSE && VALID_SSE_REG_MODE (mode)) - return true; - if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode)) - return true; - if (TARGET_AVX && VALID_AVX256_REG_MODE (mode)) - return true; - if (TARGET_MMX && VALID_MMX_REG_MODE (mode)) - return true; - if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode)) - return true; - return false; -} - -/* Target hook for c_mode_for_suffix. */ -static enum machine_mode -ix86_c_mode_for_suffix (char suffix) -{ - if (suffix == 'q') - return TFmode; - if (suffix == 'w') - return XFmode; - - return VOIDmode; -} - -/* Worker function for TARGET_MD_ASM_CLOBBERS. - - We do this in the new i386 backend to maintain source compatibility - with the old cc0-based compiler. */ - -static tree -ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED, - tree inputs ATTRIBUTE_UNUSED, - tree clobbers) -{ - clobbers = tree_cons (NULL_TREE, build_string (5, "flags"), - clobbers); - clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"), - clobbers); - return clobbers; -} - -/* Implements target vector targetm.asm.encode_section_info. This - is not used by netware. */ - -static void ATTRIBUTE_UNUSED -ix86_encode_section_info (tree decl, rtx rtl, int first) -{ - default_encode_section_info (decl, rtl, first); - - if (TREE_CODE (decl) == VAR_DECL - && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)) - && ix86_in_large_data_p (decl)) - SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR; -} - -/* Worker function for REVERSE_CONDITION. */ - -enum rtx_code -ix86_reverse_condition (enum rtx_code code, enum machine_mode mode) -{ - return (mode != CCFPmode && mode != CCFPUmode - ? reverse_condition (code) - : reverse_condition_maybe_unordered (code)); -} - -/* Output code to perform an x87 FP register move, from OPERANDS[1] - to OPERANDS[0]. */ - -const char * -output_387_reg_move (rtx insn, rtx *operands) -{ - if (REG_P (operands[0])) - { - if (REG_P (operands[1]) - && find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - { - if (REGNO (operands[0]) == FIRST_STACK_REG) - return output_387_ffreep (operands, 0); - return "fstp\t%y0"; - } - if (STACK_TOP_P (operands[0])) - return "fld%z1\t%y1"; - return "fst\t%y0"; - } - else if (MEM_P (operands[0])) - { - gcc_assert (REG_P (operands[1])); - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp%z0\t%y0"; - else - { - /* There is no non-popping store to memory for XFmode. - So if we need one, follow the store with a load. */ - if (GET_MODE (operands[0]) == XFmode) - return "fstp%z0\t%y0\n\tfld%z0\t%y0"; - else - return "fst%z0\t%y0"; - } - } - else - gcc_unreachable(); -} - -/* Output code to perform a conditional jump to LABEL, if C2 flag in - FP status register is set. */ - -void -ix86_emit_fp_unordered_jump (rtx label) -{ - rtx reg = gen_reg_rtx (HImode); - rtx temp; - - emit_insn (gen_x86_fnstsw_1 (reg)); - - if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ())) - { - emit_insn (gen_x86_sahf_1 (reg)); - - temp = gen_rtx_REG (CCmode, FLAGS_REG); - temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx); - } - else - { - emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04))); - - temp = gen_rtx_REG (CCNOmode, FLAGS_REG); - temp = gen_rtx_NE (VOIDmode, temp, const0_rtx); - } - - temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp, - gen_rtx_LABEL_REF (VOIDmode, label), - pc_rtx); - temp = gen_rtx_SET (VOIDmode, pc_rtx, temp); - - emit_jump_insn (temp); - predict_jump (REG_BR_PROB_BASE * 10 / 100); -} - -/* Output code to perform a log1p XFmode calculation. */ - -void ix86_emit_i387_log1p (rtx op0, rtx op1) -{ - rtx label1 = gen_label_rtx (); - rtx label2 = gen_label_rtx (); - - rtx tmp = gen_reg_rtx (XFmode); - rtx tmp2 = gen_reg_rtx (XFmode); - - emit_insn (gen_absxf2 (tmp, op1)); - emit_insn (gen_cmpxf (tmp, - CONST_DOUBLE_FROM_REAL_VALUE ( - REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode), - XFmode))); - emit_jump_insn (gen_bge (label1)); - - emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */ - emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2)); - emit_jump (label2); - - emit_label (label1); - emit_move_insn (tmp, CONST1_RTX (XFmode)); - emit_insn (gen_addxf3 (tmp, op1, tmp)); - emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */ - emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2)); - - emit_label (label2); -} - -/* Output code to perform a Newton-Rhapson approximation of a single precision - floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */ - -void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode) -{ - rtx x0, x1, e0, e1, two; - - x0 = gen_reg_rtx (mode); - e0 = gen_reg_rtx (mode); - e1 = gen_reg_rtx (mode); - x1 = gen_reg_rtx (mode); - - two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode); - - if (VECTOR_MODE_P (mode)) - two = ix86_build_const_vector (SFmode, true, two); - - two = force_reg (mode, two); - - /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */ - - /* x0 = rcp(b) estimate */ - emit_insn (gen_rtx_SET (VOIDmode, x0, - gen_rtx_UNSPEC (mode, gen_rtvec (1, b), - UNSPEC_RCP))); - /* e0 = x0 * b */ - emit_insn (gen_rtx_SET (VOIDmode, e0, - gen_rtx_MULT (mode, x0, b))); - /* e1 = 2. - e0 */ - emit_insn (gen_rtx_SET (VOIDmode, e1, - gen_rtx_MINUS (mode, two, e0))); - /* x1 = x0 * e1 */ - emit_insn (gen_rtx_SET (VOIDmode, x1, - gen_rtx_MULT (mode, x0, e1))); - /* res = a * x1 */ - emit_insn (gen_rtx_SET (VOIDmode, res, - gen_rtx_MULT (mode, a, x1))); -} - -/* Output code to perform a Newton-Rhapson approximation of a - single precision floating point [reciprocal] square root. */ - -void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode, - bool recip) -{ - rtx x0, e0, e1, e2, e3, mthree, mhalf; - REAL_VALUE_TYPE r; - - x0 = gen_reg_rtx (mode); - e0 = gen_reg_rtx (mode); - e1 = gen_reg_rtx (mode); - e2 = gen_reg_rtx (mode); - e3 = gen_reg_rtx (mode); - - real_from_integer (&r, VOIDmode, -3, -1, 0); - mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode); - - real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL); - mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode); - - if (VECTOR_MODE_P (mode)) - { - mthree = ix86_build_const_vector (SFmode, true, mthree); - mhalf = ix86_build_const_vector (SFmode, true, mhalf); - } - - /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) - rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */ - - /* x0 = rsqrt(a) estimate */ - emit_insn (gen_rtx_SET (VOIDmode, x0, - gen_rtx_UNSPEC (mode, gen_rtvec (1, a), - UNSPEC_RSQRT))); - - /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */ - if (!recip) - { - rtx zero, mask; - - zero = gen_reg_rtx (mode); - mask = gen_reg_rtx (mode); - - zero = force_reg (mode, CONST0_RTX(mode)); - emit_insn (gen_rtx_SET (VOIDmode, mask, - gen_rtx_NE (mode, zero, a))); - - emit_insn (gen_rtx_SET (VOIDmode, x0, - gen_rtx_AND (mode, x0, mask))); - } - - /* e0 = x0 * a */ - emit_insn (gen_rtx_SET (VOIDmode, e0, - gen_rtx_MULT (mode, x0, a))); - /* e1 = e0 * x0 */ - emit_insn (gen_rtx_SET (VOIDmode, e1, - gen_rtx_MULT (mode, e0, x0))); - - /* e2 = e1 - 3. */ - mthree = force_reg (mode, mthree); - emit_insn (gen_rtx_SET (VOIDmode, e2, - gen_rtx_PLUS (mode, e1, mthree))); - - mhalf = force_reg (mode, mhalf); - if (recip) - /* e3 = -.5 * x0 */ - emit_insn (gen_rtx_SET (VOIDmode, e3, - gen_rtx_MULT (mode, x0, mhalf))); - else - /* e3 = -.5 * e0 */ - emit_insn (gen_rtx_SET (VOIDmode, e3, - gen_rtx_MULT (mode, e0, mhalf))); - /* ret = e2 * e3 */ - emit_insn (gen_rtx_SET (VOIDmode, res, - gen_rtx_MULT (mode, e2, e3))); -} - -/* Solaris implementation of TARGET_ASM_NAMED_SECTION. */ - -static void ATTRIBUTE_UNUSED -i386_solaris_elf_named_section (const char *name, unsigned int flags, - tree decl) -{ - /* With Binutils 2.15, the "@unwind" marker must be specified on - every occurrence of the ".eh_frame" section, not just the first - one. */ - if (TARGET_64BIT - && strcmp (name, ".eh_frame") == 0) - { - fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name, - flags & SECTION_WRITE ? "aw" : "a"); - return; - } - default_elf_asm_named_section (name, flags, decl); -} - -/* Return the mangling of TYPE if it is an extended fundamental type. */ - -static const char * -ix86_mangle_type (const_tree type) -{ - type = TYPE_MAIN_VARIANT (type); - - if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE - && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE) - return NULL; - - switch (TYPE_MODE (type)) - { - case TFmode: - /* __float128 is "g". */ - return "g"; - case XFmode: - /* "long double" or __float80 is "e". */ - return "e"; - default: - return NULL; - } -} - -/* For 32-bit code we can save PIC register setup by using - __stack_chk_fail_local hidden function instead of calling - __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC - register, so it is better to call __stack_chk_fail directly. */ - -static tree -ix86_stack_protect_fail (void) -{ - return TARGET_64BIT - ? default_external_stack_protect_fail () - : default_hidden_stack_protect_fail (); -} - -/* Select a format to encode pointers in exception handling data. CODE - is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is - true if the symbol may be affected by dynamic relocations. - - ??? All x86 object file formats are capable of representing this. - After all, the relocation needed is the same as for the call insn. - Whether or not a particular assembler allows us to enter such, I - guess we'll have to see. */ -int -asm_preferred_eh_data_format (int code, int global) -{ - if (flag_pic) - { - int type = DW_EH_PE_sdata8; - if (!TARGET_64BIT - || ix86_cmodel == CM_SMALL_PIC - || (ix86_cmodel == CM_MEDIUM_PIC && (global || code))) - type = DW_EH_PE_sdata4; - return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type; - } - if (ix86_cmodel == CM_SMALL - || (ix86_cmodel == CM_MEDIUM && code)) - return DW_EH_PE_udata4; - return DW_EH_PE_absptr; -} - -/* Expand copysign from SIGN to the positive value ABS_VALUE - storing in RESULT. If MASK is non-null, it shall be a mask to mask out - the sign-bit. */ -static void -ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask) -{ - enum machine_mode mode = GET_MODE (sign); - rtx sgn = gen_reg_rtx (mode); - if (mask == NULL_RTX) - { - mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false); - if (!VECTOR_MODE_P (mode)) - { - /* We need to generate a scalar mode mask in this case. */ - rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx)); - tmp = gen_rtx_VEC_SELECT (mode, mask, tmp); - mask = gen_reg_rtx (mode); - emit_insn (gen_rtx_SET (VOIDmode, mask, tmp)); - } - } - else - mask = gen_rtx_NOT (mode, mask); - emit_insn (gen_rtx_SET (VOIDmode, sgn, - gen_rtx_AND (mode, mask, sign))); - emit_insn (gen_rtx_SET (VOIDmode, result, - gen_rtx_IOR (mode, abs_value, sgn))); -} - -/* Expand fabs (OP0) and return a new rtx that holds the result. The - mask for masking out the sign-bit is stored in *SMASK, if that is - non-null. */ -static rtx -ix86_expand_sse_fabs (rtx op0, rtx *smask) -{ - enum machine_mode mode = GET_MODE (op0); - rtx xa, mask; - - xa = gen_reg_rtx (mode); - mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true); - if (!VECTOR_MODE_P (mode)) - { - /* We need to generate a scalar mode mask in this case. */ - rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx)); - tmp = gen_rtx_VEC_SELECT (mode, mask, tmp); - mask = gen_reg_rtx (mode); - emit_insn (gen_rtx_SET (VOIDmode, mask, tmp)); - } - emit_insn (gen_rtx_SET (VOIDmode, xa, - gen_rtx_AND (mode, op0, mask))); - - if (smask) - *smask = mask; - - return xa; -} - -/* Expands a comparison of OP0 with OP1 using comparison code CODE, - swapping the operands if SWAP_OPERANDS is true. The expanded - code is a forward jump to a newly created label in case the - comparison is true. The generated label rtx is returned. */ -static rtx -ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1, - bool swap_operands) -{ - rtx label, tmp; - - if (swap_operands) - { - tmp = op0; - op0 = op1; - op1 = tmp; - } - - label = gen_label_rtx (); - tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG); - emit_insn (gen_rtx_SET (VOIDmode, tmp, - gen_rtx_COMPARE (CCFPUmode, op0, op1))); - tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx); - tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, - gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx); - tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp)); - JUMP_LABEL (tmp) = label; - - return label; -} - -/* Expand a mask generating SSE comparison instruction comparing OP0 with OP1 - using comparison code CODE. Operands are swapped for the comparison if - SWAP_OPERANDS is true. Returns a rtx for the generated mask. */ -static rtx -ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1, - bool swap_operands) -{ - enum machine_mode mode = GET_MODE (op0); - rtx mask = gen_reg_rtx (mode); - - if (swap_operands) - { - rtx tmp = op0; - op0 = op1; - op1 = tmp; - } - - if (mode == DFmode) - emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1, - gen_rtx_fmt_ee (code, mode, op0, op1))); - else - emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1, - gen_rtx_fmt_ee (code, mode, op0, op1))); - - return mask; -} - -/* Generate and return a rtx of mode MODE for 2**n where n is the number - of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */ -static rtx -ix86_gen_TWO52 (enum machine_mode mode) -{ - REAL_VALUE_TYPE TWO52r; - rtx TWO52; - - real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23); - TWO52 = const_double_from_real_value (TWO52r, mode); - TWO52 = force_reg (mode, TWO52); - - return TWO52; -} - -/* Expand SSE sequence for computing lround from OP1 storing - into OP0. */ -void -ix86_expand_lround (rtx op0, rtx op1) -{ - /* C code for the stuff we're doing below: - tmp = op1 + copysign (nextafter (0.5, 0.0), op1) - return (long)tmp; - */ - enum machine_mode mode = GET_MODE (op1); - const struct real_format *fmt; - REAL_VALUE_TYPE pred_half, half_minus_pred_half; - rtx adj; - - /* load nextafter (0.5, 0.0) */ - fmt = REAL_MODE_FORMAT (mode); - real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode); - REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half); - - /* adj = copysign (0.5, op1) */ - adj = force_reg (mode, const_double_from_real_value (pred_half, mode)); - ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX); - - /* adj = op1 + adj */ - adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT); - - /* op0 = (imode)adj */ - expand_fix (op0, adj, 0); -} - -/* Expand SSE2 sequence for computing lround from OPERAND1 storing - into OPERAND0. */ -void -ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor) -{ - /* C code for the stuff we're doing below (for do_floor): - xi = (long)op1; - xi -= (double)xi > op1 ? 1 : 0; - return xi; - */ - enum machine_mode fmode = GET_MODE (op1); - enum machine_mode imode = GET_MODE (op0); - rtx ireg, freg, label, tmp; - - /* reg = (long)op1 */ - ireg = gen_reg_rtx (imode); - expand_fix (ireg, op1, 0); - - /* freg = (double)reg */ - freg = gen_reg_rtx (fmode); - expand_float (freg, ireg, 0); - - /* ireg = (freg > op1) ? ireg - 1 : ireg */ - label = ix86_expand_sse_compare_and_jump (UNLE, - freg, op1, !do_floor); - tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS, - ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT); - emit_move_insn (ireg, tmp); - - emit_label (label); - LABEL_NUSES (label) = 1; - - emit_move_insn (op0, ireg); -} - -/* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the - result in OPERAND0. */ -void -ix86_expand_rint (rtx operand0, rtx operand1) -{ - /* C code for the stuff we're doing below: - xa = fabs (operand1); - if (!isless (xa, 2**52)) - return operand1; - xa = xa + 2**52 - 2**52; - return copysign (xa, operand1); - */ - enum machine_mode mode = GET_MODE (operand0); - rtx res, xa, label, TWO52, mask; - - res = gen_reg_rtx (mode); - emit_move_insn (res, operand1); - - /* xa = abs (operand1) */ - xa = ix86_expand_sse_fabs (res, &mask); - - /* if (!isless (xa, TWO52)) goto label; */ - TWO52 = ix86_gen_TWO52 (mode); - label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false); - - xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT); - xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT); - - ix86_sse_copysign_to_positive (res, xa, res, mask); - - emit_label (label); - LABEL_NUSES (label) = 1; - - emit_move_insn (operand0, res); -} - -/* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing - into OPERAND0. */ -void -ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor) -{ - /* C code for the stuff we expand below. - double xa = fabs (x), x2; - if (!isless (xa, TWO52)) - return x; - xa = xa + TWO52 - TWO52; - x2 = copysign (xa, x); - Compensate. Floor: - if (x2 > x) - x2 -= 1; - Compensate. Ceil: - if (x2 < x) - x2 -= -1; - return x2; - */ - enum machine_mode mode = GET_MODE (operand0); - rtx xa, TWO52, tmp, label, one, res, mask; - - TWO52 = ix86_gen_TWO52 (mode); - - /* Temporary for holding the result, initialized to the input - operand to ease control flow. */ - res = gen_reg_rtx (mode); - emit_move_insn (res, operand1); - - /* xa = abs (operand1) */ - xa = ix86_expand_sse_fabs (res, &mask); - - /* if (!isless (xa, TWO52)) goto label; */ - label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false); - - /* xa = xa + TWO52 - TWO52; */ - xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT); - xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT); - - /* xa = copysign (xa, operand1) */ - ix86_sse_copysign_to_positive (xa, xa, res, mask); - - /* generate 1.0 or -1.0 */ - one = force_reg (mode, - const_double_from_real_value (do_floor - ? dconst1 : dconstm1, mode)); - - /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */ - tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor); - emit_insn (gen_rtx_SET (VOIDmode, tmp, - gen_rtx_AND (mode, one, tmp))); - /* We always need to subtract here to preserve signed zero. */ - tmp = expand_simple_binop (mode, MINUS, - xa, tmp, NULL_RTX, 0, OPTAB_DIRECT); - emit_move_insn (res, tmp); - - emit_label (label); - LABEL_NUSES (label) = 1; - - emit_move_insn (operand0, res); -} - -/* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing - into OPERAND0. */ -void -ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor) -{ - /* C code for the stuff we expand below. - double xa = fabs (x), x2; - if (!isless (xa, TWO52)) - return x; - x2 = (double)(long)x; - Compensate. Floor: - if (x2 > x) - x2 -= 1; - Compensate. Ceil: - if (x2 < x) - x2 += 1; - if (HONOR_SIGNED_ZEROS (mode)) - return copysign (x2, x); - return x2; - */ - enum machine_mode mode = GET_MODE (operand0); - rtx xa, xi, TWO52, tmp, label, one, res, mask; - - TWO52 = ix86_gen_TWO52 (mode); - - /* Temporary for holding the result, initialized to the input - operand to ease control flow. */ - res = gen_reg_rtx (mode); - emit_move_insn (res, operand1); - - /* xa = abs (operand1) */ - xa = ix86_expand_sse_fabs (res, &mask); - - /* if (!isless (xa, TWO52)) goto label; */ - label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false); - - /* xa = (double)(long)x */ - xi = gen_reg_rtx (mode == DFmode ? DImode : SImode); - expand_fix (xi, res, 0); - expand_float (xa, xi, 0); - - /* generate 1.0 */ - one = force_reg (mode, const_double_from_real_value (dconst1, mode)); - - /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */ - tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor); - emit_insn (gen_rtx_SET (VOIDmode, tmp, - gen_rtx_AND (mode, one, tmp))); - tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS, - xa, tmp, NULL_RTX, 0, OPTAB_DIRECT); - emit_move_insn (res, tmp); - - if (HONOR_SIGNED_ZEROS (mode)) - ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask); - - emit_label (label); - LABEL_NUSES (label) = 1; - - emit_move_insn (operand0, res); -} - -/* Expand SSE sequence for computing round from OPERAND1 storing - into OPERAND0. Sequence that works without relying on DImode truncation - via cvttsd2siq that is only available on 64bit targets. */ -void -ix86_expand_rounddf_32 (rtx operand0, rtx operand1) -{ - /* C code for the stuff we expand below. - double xa = fabs (x), xa2, x2; - if (!isless (xa, TWO52)) - return x; - Using the absolute value and copying back sign makes - -0.0 -> -0.0 correct. - xa2 = xa + TWO52 - TWO52; - Compensate. - dxa = xa2 - xa; - if (dxa <= -0.5) - xa2 += 1; - else if (dxa > 0.5) - xa2 -= 1; - x2 = copysign (xa2, x); - return x2; - */ - enum machine_mode mode = GET_MODE (operand0); - rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask; - - TWO52 = ix86_gen_TWO52 (mode); - - /* Temporary for holding the result, initialized to the input - operand to ease control flow. */ - res = gen_reg_rtx (mode); - emit_move_insn (res, operand1); - - /* xa = abs (operand1) */ - xa = ix86_expand_sse_fabs (res, &mask); - - /* if (!isless (xa, TWO52)) goto label; */ - label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false); - - /* xa2 = xa + TWO52 - TWO52; */ - xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT); - xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT); - - /* dxa = xa2 - xa; */ - dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT); - - /* generate 0.5, 1.0 and -0.5 */ - half = force_reg (mode, const_double_from_real_value (dconsthalf, mode)); - one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT); - mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX, - 0, OPTAB_DIRECT); - - /* Compensate. */ - tmp = gen_reg_rtx (mode); - /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */ - tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false); - emit_insn (gen_rtx_SET (VOIDmode, tmp, - gen_rtx_AND (mode, one, tmp))); - xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT); - /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */ - tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false); - emit_insn (gen_rtx_SET (VOIDmode, tmp, - gen_rtx_AND (mode, one, tmp))); - xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT); - - /* res = copysign (xa2, operand1) */ - ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask); - - emit_label (label); - LABEL_NUSES (label) = 1; - - emit_move_insn (operand0, res); -} - -/* Expand SSE sequence for computing trunc from OPERAND1 storing - into OPERAND0. */ -void -ix86_expand_trunc (rtx operand0, rtx operand1) -{ - /* C code for SSE variant we expand below. - double xa = fabs (x), x2; - if (!isless (xa, TWO52)) - return x; - x2 = (double)(long)x; - if (HONOR_SIGNED_ZEROS (mode)) - return copysign (x2, x); - return x2; - */ - enum machine_mode mode = GET_MODE (operand0); - rtx xa, xi, TWO52, label, res, mask; - - TWO52 = ix86_gen_TWO52 (mode); - - /* Temporary for holding the result, initialized to the input - operand to ease control flow. */ - res = gen_reg_rtx (mode); - emit_move_insn (res, operand1); - - /* xa = abs (operand1) */ - xa = ix86_expand_sse_fabs (res, &mask); - - /* if (!isless (xa, TWO52)) goto label; */ - label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false); - - /* x = (double)(long)x */ - xi = gen_reg_rtx (mode == DFmode ? DImode : SImode); - expand_fix (xi, res, 0); - expand_float (res, xi, 0); - - if (HONOR_SIGNED_ZEROS (mode)) - ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask); - - emit_label (label); - LABEL_NUSES (label) = 1; - - emit_move_insn (operand0, res); -} - -/* Expand SSE sequence for computing trunc from OPERAND1 storing - into OPERAND0. */ -void -ix86_expand_truncdf_32 (rtx operand0, rtx operand1) -{ - enum machine_mode mode = GET_MODE (operand0); - rtx xa, mask, TWO52, label, one, res, smask, tmp; - - /* C code for SSE variant we expand below. - double xa = fabs (x), x2; - if (!isless (xa, TWO52)) - return x; - xa2 = xa + TWO52 - TWO52; - Compensate: - if (xa2 > xa) - xa2 -= 1.0; - x2 = copysign (xa2, x); - return x2; - */ - - TWO52 = ix86_gen_TWO52 (mode); - - /* Temporary for holding the result, initialized to the input - operand to ease control flow. */ - res = gen_reg_rtx (mode); - emit_move_insn (res, operand1); - - /* xa = abs (operand1) */ - xa = ix86_expand_sse_fabs (res, &smask); - - /* if (!isless (xa, TWO52)) goto label; */ - label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false); - - /* res = xa + TWO52 - TWO52; */ - tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT); - tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT); - emit_move_insn (res, tmp); - - /* generate 1.0 */ - one = force_reg (mode, const_double_from_real_value (dconst1, mode)); - - /* Compensate: res = xa2 - (res > xa ? 1 : 0) */ - mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false); - emit_insn (gen_rtx_SET (VOIDmode, mask, - gen_rtx_AND (mode, mask, one))); - tmp = expand_simple_binop (mode, MINUS, - res, mask, NULL_RTX, 0, OPTAB_DIRECT); - emit_move_insn (res, tmp); - - /* res = copysign (res, operand1) */ - ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask); - - emit_label (label); - LABEL_NUSES (label) = 1; - - emit_move_insn (operand0, res); -} - -/* Expand SSE sequence for computing round from OPERAND1 storing - into OPERAND0. */ -void -ix86_expand_round (rtx operand0, rtx operand1) -{ - /* C code for the stuff we're doing below: - double xa = fabs (x); - if (!isless (xa, TWO52)) - return x; - xa = (double)(long)(xa + nextafter (0.5, 0.0)); - return copysign (xa, x); - */ - enum machine_mode mode = GET_MODE (operand0); - rtx res, TWO52, xa, label, xi, half, mask; - const struct real_format *fmt; - REAL_VALUE_TYPE pred_half, half_minus_pred_half; - - /* Temporary for holding the result, initialized to the input - operand to ease control flow. */ - res = gen_reg_rtx (mode); - emit_move_insn (res, operand1); - - TWO52 = ix86_gen_TWO52 (mode); - xa = ix86_expand_sse_fabs (res, &mask); - label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false); - - /* load nextafter (0.5, 0.0) */ - fmt = REAL_MODE_FORMAT (mode); - real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode); - REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half); - - /* xa = xa + 0.5 */ - half = force_reg (mode, const_double_from_real_value (pred_half, mode)); - xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT); - - /* xa = (double)(int64_t)xa */ - xi = gen_reg_rtx (mode == DFmode ? DImode : SImode); - expand_fix (xi, xa, 0); - expand_float (xa, xi, 0); - - /* res = copysign (xa, operand1) */ - ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask); - - emit_label (label); - LABEL_NUSES (label) = 1; - - emit_move_insn (operand0, res); -} - - -/* Validate whether a SSE5 instruction is valid or not. - OPERANDS is the array of operands. - NUM is the number of operands. - USES_OC0 is true if the instruction uses OC0 and provides 4 variants. - NUM_MEMORY is the maximum number of memory operands to accept. - when COMMUTATIVE is set, operand 1 and 2 can be swapped. */ - -bool -ix86_sse5_valid_op_p (rtx operands[], rtx insn ATTRIBUTE_UNUSED, int num, - bool uses_oc0, int num_memory, bool commutative) -{ - int mem_mask; - int mem_count; - int i; - - /* Count the number of memory arguments */ - mem_mask = 0; - mem_count = 0; - for (i = 0; i < num; i++) - { - enum machine_mode mode = GET_MODE (operands[i]); - if (register_operand (operands[i], mode)) - ; - - else if (memory_operand (operands[i], mode)) - { - mem_mask |= (1 << i); - mem_count++; - } - - else - { - rtx pattern = PATTERN (insn); - - /* allow 0 for pcmov */ - if (GET_CODE (pattern) != SET - || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE - || i < 2 - || operands[i] != CONST0_RTX (mode)) - return false; - } - } - - /* Special case pmacsdq{l,h} where we allow the 3rd argument to be - a memory operation. */ - if (num_memory < 0) - { - num_memory = -num_memory; - if ((mem_mask & (1 << (num-1))) != 0) - { - mem_mask &= ~(1 << (num-1)); - mem_count--; - } - } - - /* If there were no memory operations, allow the insn */ - if (mem_mask == 0) - return true; - - /* Do not allow the destination register to be a memory operand. */ - else if (mem_mask & (1 << 0)) - return false; - - /* If there are too many memory operations, disallow the instruction. While - the hardware only allows 1 memory reference, before register allocation - for some insns, we allow two memory operations sometimes in order to allow - code like the following to be optimized: - - float fmadd (float *a, float *b, float *c) { return (*a * *b) + *c; } - - or similar cases that are vectorized into using the fmaddss - instruction. */ - else if (mem_count > num_memory) - return false; - - /* Don't allow more than one memory operation if not optimizing. */ - else if (mem_count > 1 && !optimize) - return false; - - else if (num == 4 && mem_count == 1) - { - /* formats (destination is the first argument), example fmaddss: - xmm1, xmm1, xmm2, xmm3/mem - xmm1, xmm1, xmm2/mem, xmm3 - xmm1, xmm2, xmm3/mem, xmm1 - xmm1, xmm2/mem, xmm3, xmm1 */ - if (uses_oc0) - return ((mem_mask == (1 << 1)) - || (mem_mask == (1 << 2)) - || (mem_mask == (1 << 3))); - - /* format, example pmacsdd: - xmm1, xmm2, xmm3/mem, xmm1 */ - if (commutative) - return (mem_mask == (1 << 2) || mem_mask == (1 << 1)); - else - return (mem_mask == (1 << 2)); - } - - else if (num == 4 && num_memory == 2) - { - /* If there are two memory operations, we can load one of the memory ops - into the destination register. This is for optimizing the - multiply/add ops, which the combiner has optimized both the multiply - and the add insns to have a memory operation. We have to be careful - that the destination doesn't overlap with the inputs. */ - rtx op0 = operands[0]; - - if (reg_mentioned_p (op0, operands[1]) - || reg_mentioned_p (op0, operands[2]) - || reg_mentioned_p (op0, operands[3])) - return false; - - /* formats (destination is the first argument), example fmaddss: - xmm1, xmm1, xmm2, xmm3/mem - xmm1, xmm1, xmm2/mem, xmm3 - xmm1, xmm2, xmm3/mem, xmm1 - xmm1, xmm2/mem, xmm3, xmm1 - - For the oc0 case, we will load either operands[1] or operands[3] into - operands[0], so any combination of 2 memory operands is ok. */ - if (uses_oc0) - return true; - - /* format, example pmacsdd: - xmm1, xmm2, xmm3/mem, xmm1 - - For the integer multiply/add instructions be more restrictive and - require operands[2] and operands[3] to be the memory operands. */ - if (commutative) - return (mem_mask == ((1 << 1) | (1 << 3)) || ((1 << 2) | (1 << 3))); - else - return (mem_mask == ((1 << 2) | (1 << 3))); - } - - else if (num == 3 && num_memory == 1) - { - /* formats, example protb: - xmm1, xmm2, xmm3/mem - xmm1, xmm2/mem, xmm3 */ - if (uses_oc0) - return ((mem_mask == (1 << 1)) || (mem_mask == (1 << 2))); - - /* format, example comeq: - xmm1, xmm2, xmm3/mem */ - else - return (mem_mask == (1 << 2)); - } - - else - gcc_unreachable (); - - return false; -} - - -/* Fixup an SSE5 instruction that has 2 memory input references into a form the - hardware will allow by using the destination register to load one of the - memory operations. Presently this is used by the multiply/add routines to - allow 2 memory references. */ - -void -ix86_expand_sse5_multiple_memory (rtx operands[], - int num, - enum machine_mode mode) -{ - rtx op0 = operands[0]; - if (num != 4 - || memory_operand (op0, mode) - || reg_mentioned_p (op0, operands[1]) - || reg_mentioned_p (op0, operands[2]) - || reg_mentioned_p (op0, operands[3])) - gcc_unreachable (); - - /* For 2 memory operands, pick either operands[1] or operands[3] to move into - the destination register. */ - if (memory_operand (operands[1], mode)) - { - emit_move_insn (op0, operands[1]); - operands[1] = op0; - } - else if (memory_operand (operands[3], mode)) - { - emit_move_insn (op0, operands[3]); - operands[3] = op0; - } - else - gcc_unreachable (); - - return; -} - - -/* Table of valid machine attributes. */ -static const struct attribute_spec ix86_attribute_table[] = -{ - /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */ - /* Stdcall attribute says callee is responsible for popping arguments - if they are not variable. */ - { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute }, - /* Fastcall attribute says callee is responsible for popping arguments - if they are not variable. */ - { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute }, - /* Cdecl attribute says the callee is a normal C declaration */ - { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute }, - /* Regparm attribute specifies how many integer arguments are to be - passed in registers. */ - { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute }, - /* Sseregparm attribute says we are using x86_64 calling conventions - for FP arguments. */ - { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute }, - /* force_align_arg_pointer says this function realigns the stack at entry. */ - { (const char *)&ix86_force_align_arg_pointer_string, 0, 0, - false, true, true, ix86_handle_cconv_attribute }, -#if TARGET_DLLIMPORT_DECL_ATTRIBUTES - { "dllimport", 0, 0, false, false, false, handle_dll_attribute }, - { "dllexport", 0, 0, false, false, false, handle_dll_attribute }, - { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute }, -#endif - { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute }, - { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute }, -#ifdef SUBTARGET_ATTRIBUTE_TABLE - SUBTARGET_ATTRIBUTE_TABLE, -#endif - /* ms_abi and sysv_abi calling convention function attributes. */ - { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute }, - { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute }, - /* End element. */ - { NULL, 0, 0, false, false, false, NULL } -}; - -/* Implement targetm.vectorize.builtin_vectorization_cost. */ -static int -x86_builtin_vectorization_cost (bool runtime_test) -{ - /* If the branch of the runtime test is taken - i.e. - the vectorized - version is skipped - this incurs a misprediction cost (because the - vectorized version is expected to be the fall-through). So we subtract - the latency of a mispredicted branch from the costs that are incured - when the vectorized version is executed. - - TODO: The values in individual target tables have to be tuned or new - fields may be needed. For eg. on K8, the default branch path is the - not-taken path. If the taken path is predicted correctly, the minimum - penalty of going down the taken-path is 1 cycle. If the taken-path is - not predicted correctly, then the minimum penalty is 10 cycles. */ - - if (runtime_test) - { - return (-(ix86_cost->cond_taken_branch_cost)); - } - else - return 0; -} - -/* This function returns the calling abi specific va_list type node. - It returns the FNDECL specific va_list type. */ - -tree -ix86_fn_abi_va_list (tree fndecl) -{ - if (!TARGET_64BIT) - return va_list_type_node; - gcc_assert (fndecl != NULL_TREE); - - if (ix86_function_abi ((const_tree) fndecl) == MS_ABI) - return ms_va_list_type_node; - else - return sysv_va_list_type_node; -} - -/* Returns the canonical va_list type specified by TYPE. If there - is no valid TYPE provided, it return NULL_TREE. */ - -tree -ix86_canonical_va_list_type (tree type) -{ - tree wtype, htype; - - /* Resolve references and pointers to va_list type. */ - if (INDIRECT_REF_P (type)) - type = TREE_TYPE (type); - else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type))) - type = TREE_TYPE (type); - - if (TARGET_64BIT) - { - wtype = va_list_type_node; - gcc_assert (wtype != NULL_TREE); - htype = type; - if (TREE_CODE (wtype) == ARRAY_TYPE) - { - /* If va_list is an array type, the argument may have decayed - to a pointer type, e.g. by being passed to another function. - In that case, unwrap both types so that we can compare the - underlying records. */ - if (TREE_CODE (htype) == ARRAY_TYPE - || POINTER_TYPE_P (htype)) - { - wtype = TREE_TYPE (wtype); - htype = TREE_TYPE (htype); - } - } - if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype)) - return va_list_type_node; - wtype = sysv_va_list_type_node; - gcc_assert (wtype != NULL_TREE); - htype = type; - if (TREE_CODE (wtype) == ARRAY_TYPE) - { - /* If va_list is an array type, the argument may have decayed - to a pointer type, e.g. by being passed to another function. - In that case, unwrap both types so that we can compare the - underlying records. */ - if (TREE_CODE (htype) == ARRAY_TYPE - || POINTER_TYPE_P (htype)) - { - wtype = TREE_TYPE (wtype); - htype = TREE_TYPE (htype); - } - } - if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype)) - return sysv_va_list_type_node; - wtype = ms_va_list_type_node; - gcc_assert (wtype != NULL_TREE); - htype = type; - if (TREE_CODE (wtype) == ARRAY_TYPE) - { - /* If va_list is an array type, the argument may have decayed - to a pointer type, e.g. by being passed to another function. - In that case, unwrap both types so that we can compare the - underlying records. */ - if (TREE_CODE (htype) == ARRAY_TYPE - || POINTER_TYPE_P (htype)) - { - wtype = TREE_TYPE (wtype); - htype = TREE_TYPE (htype); - } - } - if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype)) - return ms_va_list_type_node; - return NULL_TREE; - } - return std_canonical_va_list_type (type); -} - -/* Iterate through the target-specific builtin types for va_list. - IDX denotes the iterator, *PTREE is set to the result type of - the va_list builtin, and *PNAME to its internal type. - Returns zero if there is no element for this index, otherwise - IDX should be increased upon the next call. - Note, do not iterate a base builtin's name like __builtin_va_list. - Used from c_common_nodes_and_builtins. */ - -int -ix86_enum_va_list (int idx, const char **pname, tree *ptree) -{ - if (!TARGET_64BIT) - return 0; - switch (idx) { - case 0: - *ptree = ms_va_list_type_node; - *pname = "__builtin_ms_va_list"; - break; - case 1: - *ptree = sysv_va_list_type_node; - *pname = "__builtin_sysv_va_list"; - break; - default: - return 0; - } - return 1; -} - -/* Initialize the GCC target structure. */ -#undef TARGET_RETURN_IN_MEMORY -#define TARGET_RETURN_IN_MEMORY ix86_return_in_memory - -#undef TARGET_ATTRIBUTE_TABLE -#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table -#if TARGET_DLLIMPORT_DECL_ATTRIBUTES -# undef TARGET_MERGE_DECL_ATTRIBUTES -# define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes -#endif - -#undef TARGET_COMP_TYPE_ATTRIBUTES -#define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes - -#undef TARGET_INIT_BUILTINS -#define TARGET_INIT_BUILTINS ix86_init_builtins -#undef TARGET_EXPAND_BUILTIN -#define TARGET_EXPAND_BUILTIN ix86_expand_builtin - -#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION -#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \ - ix86_builtin_vectorized_function - -#undef TARGET_VECTORIZE_BUILTIN_CONVERSION -#define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion - -#undef TARGET_BUILTIN_RECIPROCAL -#define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal - -#undef TARGET_ASM_FUNCTION_EPILOGUE -#define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue - -#undef TARGET_ENCODE_SECTION_INFO -#ifndef SUBTARGET_ENCODE_SECTION_INFO -#define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info -#else -#define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO -#endif - -#undef TARGET_ASM_OPEN_PAREN -#define TARGET_ASM_OPEN_PAREN "" -#undef TARGET_ASM_CLOSE_PAREN -#define TARGET_ASM_CLOSE_PAREN "" - -#undef TARGET_ASM_ALIGNED_HI_OP -#define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT -#undef TARGET_ASM_ALIGNED_SI_OP -#define TARGET_ASM_ALIGNED_SI_OP ASM_LONG -#ifdef ASM_QUAD -#undef TARGET_ASM_ALIGNED_DI_OP -#define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD -#endif - -#undef TARGET_ASM_UNALIGNED_HI_OP -#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP -#undef TARGET_ASM_UNALIGNED_SI_OP -#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP -#undef TARGET_ASM_UNALIGNED_DI_OP -#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP - -#undef TARGET_SCHED_ADJUST_COST -#define TARGET_SCHED_ADJUST_COST ix86_adjust_cost -#undef TARGET_SCHED_ISSUE_RATE -#define TARGET_SCHED_ISSUE_RATE ix86_issue_rate -#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD -#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \ - ia32_multipass_dfa_lookahead - -#undef TARGET_FUNCTION_OK_FOR_SIBCALL -#define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall - -#ifdef HAVE_AS_TLS -#undef TARGET_HAVE_TLS -#define TARGET_HAVE_TLS true -#endif -#undef TARGET_CANNOT_FORCE_CONST_MEM -#define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem -#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P -#define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true - -#undef TARGET_DELEGITIMIZE_ADDRESS -#define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address - -#undef TARGET_MS_BITFIELD_LAYOUT_P -#define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p - -#if TARGET_MACHO -#undef TARGET_BINDS_LOCAL_P -#define TARGET_BINDS_LOCAL_P darwin_binds_local_p -#endif -#if TARGET_DLLIMPORT_DECL_ATTRIBUTES -#undef TARGET_BINDS_LOCAL_P -#define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p -#endif - -#undef TARGET_ASM_OUTPUT_MI_THUNK -#define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk -#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK -#define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk - -#undef TARGET_ASM_FILE_START -#define TARGET_ASM_FILE_START x86_file_start - -#undef TARGET_DEFAULT_TARGET_FLAGS -#define TARGET_DEFAULT_TARGET_FLAGS \ - (TARGET_DEFAULT \ - | TARGET_SUBTARGET_DEFAULT \ - | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT) - -#undef TARGET_HANDLE_OPTION -#define TARGET_HANDLE_OPTION ix86_handle_option - -#undef TARGET_RTX_COSTS -#define TARGET_RTX_COSTS ix86_rtx_costs -#undef TARGET_ADDRESS_COST -#define TARGET_ADDRESS_COST ix86_address_cost - -#undef TARGET_FIXED_CONDITION_CODE_REGS -#define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs -#undef TARGET_CC_MODES_COMPATIBLE -#define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible - -#undef TARGET_MACHINE_DEPENDENT_REORG -#define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg - -#undef TARGET_BUILTIN_SETJMP_FRAME_VALUE -#define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value - -#undef TARGET_BUILD_BUILTIN_VA_LIST -#define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list - -#undef TARGET_FN_ABI_VA_LIST -#define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list - -#undef TARGET_CANONICAL_VA_LIST_TYPE -#define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type - -#undef TARGET_EXPAND_BUILTIN_VA_START -#define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start - -#undef TARGET_MD_ASM_CLOBBERS -#define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers - -#undef TARGET_PROMOTE_PROTOTYPES -#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true -#undef TARGET_STRUCT_VALUE_RTX -#define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx -#undef TARGET_SETUP_INCOMING_VARARGS -#define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs -#undef TARGET_MUST_PASS_IN_STACK -#define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack -#undef TARGET_PASS_BY_REFERENCE -#define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference -#undef TARGET_INTERNAL_ARG_POINTER -#define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer -#undef TARGET_UPDATE_STACK_BOUNDARY -#define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary -#undef TARGET_GET_DRAP_RTX -#define TARGET_GET_DRAP_RTX ix86_get_drap_rtx -#undef TARGET_STRICT_ARGUMENT_NAMING -#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true - -#undef TARGET_GIMPLIFY_VA_ARG_EXPR -#define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg - -#undef TARGET_SCALAR_MODE_SUPPORTED_P -#define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p - -#undef TARGET_VECTOR_MODE_SUPPORTED_P -#define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p - -#undef TARGET_C_MODE_FOR_SUFFIX -#define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix - -#ifdef HAVE_AS_TLS -#undef TARGET_ASM_OUTPUT_DWARF_DTPREL -#define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel -#endif - -#ifdef SUBTARGET_INSERT_ATTRIBUTES -#undef TARGET_INSERT_ATTRIBUTES -#define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES -#endif - -#undef TARGET_MANGLE_TYPE -#define TARGET_MANGLE_TYPE ix86_mangle_type - -#undef TARGET_STACK_PROTECT_FAIL -#define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail - -#undef TARGET_FUNCTION_VALUE -#define TARGET_FUNCTION_VALUE ix86_function_value - -#undef TARGET_SECONDARY_RELOAD -#define TARGET_SECONDARY_RELOAD ix86_secondary_reload - -#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST -#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST x86_builtin_vectorization_cost - -#undef TARGET_SET_CURRENT_FUNCTION -#define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function - -#undef TARGET_OPTION_VALID_ATTRIBUTE_P -#define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p - -#undef TARGET_OPTION_SAVE -#define TARGET_OPTION_SAVE ix86_function_specific_save - -#undef TARGET_OPTION_RESTORE -#define TARGET_OPTION_RESTORE ix86_function_specific_restore - -#undef TARGET_OPTION_PRINT -#define TARGET_OPTION_PRINT ix86_function_specific_print - -#undef TARGET_OPTION_CAN_INLINE_P -#define TARGET_OPTION_CAN_INLINE_P ix86_can_inline_p - -#undef TARGET_EXPAND_TO_RTL_HOOK -#define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi - -struct gcc_target targetm = TARGET_INITIALIZER; - -#include "gt-i386.h" |